; Copyright 2001 Pace Micro Technology plc
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
;> fp


        GBLL    UseCLZ
UseCLZ  SETL    :LNOT: NoARMv5

FLOATY  TEQ     TYPE,#0
FLOATZ  MOVMI   PC,R14
FLOATQ  BEQ     ERTYPEINT
FLOATB  MOV     TYPE,#TFP              ;set type to floating
;float integer in FACC
IFLT
 [ FPOINT=0
        MOVS    FSIGN,FACC             ;set sign
        BEQ     IFLTZ                  ;exit if number 0
        AND     FSIGN,FSIGN,#&80000000
        RSBMI   FACC,FACC,#0           ;complement number if rqd
      [ UseCLZ
        CLZ     FACCX,FACC
        MOV     FACC,FACC,LSL FACCX
        RSB     FACCX,FACCX,#&A0
      |
        MOV     FACCX,#&A0             ;initial exponent
IFLTA   CMP     FACC,#&10000
        MOVCC   FACC,FACC,LSL #16
        SUBCC   FACCX,FACCX,#16
        CMP     FACC,#&1000000
        MOVCC   FACC,FACC,LSL #8
        SUBCC   FACCX,FACCX,#8
        CMP     FACC,#&10000000
        MOVCC   FACC,FACC,LSL #4
        SUBCC   FACCX,FACCX,#4
        CMP     FACC,#&40000000
        MOVCC   FACC,FACC,LSL #2
        SUBCC   FACCX,FACCX,#2
        CMP     FACC,#&80000000
        MOVCC   FACC,FACC,LSL #1
        SUBCC   FACCX,FACCX,#1
      ]
        MOV     PC,R14
IFLTZ   MOV     FACCX,#0
 |
        FLTD    FACC,IACC
 ]
        MOV     PC,R14
INTEGY  TEQ     TYPE,#0
INTEGZ  BEQ     ERTYPEINT
        MOVPL   PC,R14
INTEGB  MOV     TYPE,#TINTEGER         ;set type to integer
;fix fp number to integer in FACC
SFIX
 [ FPOINT=1
        FIXZ    IACC,FACC
        MOV     PC,R14
 |
        SUBS    FACCX,FACCX,#&80       ;subtract bias
        BLS     FCLR                   ;branch if too small
        RSBS    FACCX,FACCX,#32        ;decide whether possible
        BLS     SFIXONE                ;too large (but check carefully for maximum negative integer)
        MOV     FACC,FACC,LSR FACCX    ;shift by exponent
        TEQ     FSIGN,#0               ;check sign
        RSBMI   FACC,FACC,#0           ;negate
        MOV     PC,R14
SFIXONE BNE     FOVR                   ;modulus greater than 2^32
        TEQ     FSIGN,#0
        BPL     FOVR                   ;positive
        CMP     FACC,#TFP
        MOVEQ   PC,R14                 ;-2^31
        B       FOVR
INTRND  SUBS    FACCX,FACCX,#&80       ;subtract bias
        BCC     FCLR                   ;branch if too small
        MOV     FGRD,FACC,LSL FACCX    ;remaining fraction
        RSBS    FACCX,FACCX,#32        ;decide whether possible
        BLS     FOVR                   ;too large
        MOV     FACC,FACC,LSR FACCX    ;shift by exponent
        TEQ     FGRD,#0
        ADDMI   FACC,FACC,#1
        TEQ     FSIGN,#0               ;check sign
        RSBMI   FACC,FACC,#0           ;negate
        MOV     PC,R14
;ffrac sets fwgrd to integer part and facc to remaining floating fraction
FFRAC   SUBS    FWACCX,FACCX,#&80      ;subtract bias
        BCC     FFRAC2                 ;branch if too small
        MOV     FWACC,FACC
        RSBS    FWGRD,FWACCX,#32       ;decide whether possible
        BCC     FOVR                   ;too large
        MOV     FGRD,#0
        MOV     FWGRD,FACC,LSR FWGRD   ;shift by 32-exponent
        TEQ     FSIGN,#0               ;check sign
        RSBMI   FWGRD,FWGRD,#0         ;negate
        MOV     FACCX,#&80
        MOVS    FACC,FWACC,LSL FWACCX  ;remaining fraction (and test if <.5)
        BPL     FNRM2
        EORS    FSIGN,FSIGN,#&80000000 ;negate
        ADDMI   FWGRD,FWGRD,#1
        SUBPL   FWGRD,FWGRD,#1
        RSBS    FACC,FACC,#0
        B       FNRM2
FFRAC2  MOV     FWGRD,#0
        MOV     PC,R14
FSQR2   LDMFD   SP!,{AELINE,R14}       ;entry from fipow
;[FACC^2]
FSQR    TEQ     FACC,#0
        MOVEQ   PC,R14
        STR     R14,[SP,#-4]!
        MOV     FSIGN,#0
        MOV     FWACC,FACC
        ADD     FACCX,FACCX,FACCX
        BL      IFMUL2
        LDR     R14,[SP],#4
FTIDY   CMP     FGRD,#&80000000
        BCC     FTRNDZ
        BICEQ   FACC,FACC,#1           ;if eq clear bottom bit so add sets it
        ADDS    FACC,FACC,#1
        MOVCS   FACC,FACC,RRX
        ADDCS   FACCX,FACCX,#1
FTRNDZ  CMP     FACCX,#256
        MOVCC   PC,R14
        TEQ     FACCX,#0
        BMI     FCLR
        B       FOVR
;[TYPE]*FACC F1
F1MUL   TEQ     FACC,#0
        BEQ     FCLR
        LOAD5   FWACC,FWACCX,TYPE,FWGRD,FWSIGN
        AND     FWSIGN,FWACC,#&80000000
        ORRNE   FWACC,FWACC,#&80000000
        BNE     FMUL1
        B       FCLR
;[TYPE]*FACC
FMUL    TEQ     FACC,#0
        LDMNEIA TYPE,{FWACC,FWACCX}    ;skip if 0
        ANDNE   FWSIGN,FWACCX,#&80000000
        ANDNE   FWACCX,FWACCX,#255
FMUL0   TEQNE   FWACC,#0
        BEQ     FCLR                   ;*0 implies zero
FMUL1   EOR     FSIGN,FSIGN,FWSIGN     ;correct sign
        ADD     FACCX,FACCX,FWACCX     ;calculate exponent
        SUB     FACCX,FACCX,#&80       ;restore bias
        MOV     FWGRD,FACC,LSR #16
        MOV     FWACCX,FWACC,LSR #16
        EOR     FWSIGN,FACC,FWGRD,LSL #16
        EORS    FWACC,FWACC,FWACCX,LSL #16
        MUL     FGRD,FWSIGN,FWACC
        MULNE   FWACC,FWGRD,FWACC
        MUL     FWSIGN,FWACCX,FWSIGN
        MUL     FWACCX,FWGRD,FWACCX
        ADDS    FWSIGN,FWACC,FWSIGN
        ADDCS   FWACCX,FWACCX,#&10000
        ADDS    FGRD,FGRD,FWSIGN,LSL #16
        ADCS    FACC,FWACCX,FWSIGN,LSR #16
        BMI     FMULTIDY
        ADDS    FGRD,FGRD,FGRD
        ADC     FACC,FACC,FACC
        SUB     FACCX,FACCX,#1
FMULTIDY
        CMP     FGRD,#&80000000
        BCC     FMULTRNDT
        BICEQ   FACC,FACC,#1           ;if eq clear bottom bit so add sets it
        ADDS    FACC,FACC,#1
        MOVCS   FACC,FACC,RRX
        ADDCS   FACCX,FACCX,#1
FMULTRNDT
        CMP     FACCX,#256
        MOVCC   PC,R14
        TEQ     FACCX,#0
        BMI     FCLR
        B       FOVR
;1/FACC
FRECIP  TEQ     FACC,#0
        MOVNE   FWACC,#&80000000
        RSBNE   FACCX,FACCX,#&81       ;calculate final exponent
        ADDNE   FACCX,FACCX,#&81       ;restore bias
        BNE     FDIVB
        B       ZDIVOR
;[TYPE]/FACC F1
F1XDIV  TEQ     FACC,#0
        BEQ     ZDIVOR
        LOAD5   FWACC,FWACCX,TYPE,FWGRD,FWSIGN
        AND     FWSIGN,FWACC,#&80000000
        ORRNE   FWACC,FWACC,#&80000000
        BNE     FDIVA
        B       FCLR
;format 2 [TYPE]/FACC
FXDIV   TEQ     FACC,#0
        BEQ     ZDIVOR
        LDMIA   TYPE,{FWACC,FWACCX}
        AND     FWSIGN,FWACCX,#&80000000
        AND     FWACCX,FWACCX,#255
FXDIV0  TEQ     FWACC,#0
        BEQ     FCLR                   ;if w=0 clear facc and return
FDIVA   EOR     FSIGN,FSIGN,FWSIGN     ;correct sign
        SUB     FACCX,FWACCX,FACCX     ;calculate final exponent
        ADD     FACCX,FACCX,#&81       ;restore bias
FDIVB   CMP     FACC,#&80000000
        BEQ     FDIVQUIK
        MOVS    FWSIGN,#256            ;clear carry and init loop count
FDIVC   CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx
        MOVS    FWACC,FWACC,ASL #1
        SUB     FWSIGN,FWSIGN,#64
        TEQ     FWSIGN,#0              ;done like this to preserve carry
        BNE     FDIVC
        TEQ     FWACCX,#0
        BMI     FDIVD                  ;already normalised
        CMPCCS  FWACC,FACC
        SUBCS   FWACC,FWACC,FACC
        ADC     FWACCX,FWACCX,FWACCX   ;rotate 0/1 into fwaccx, thus normalising
        MOVS    FWACC,FWACC,ASL #1
        SUB     FACCX,FACCX,#1
FDIVD   CMPCCS  FWACC,FACC
; BCC FDIVF
; TEQ FWACC,FACC
; BICEQ FWACCX,FWACCX,#1
        ADDCSS  FWACCX,FWACCX,#1
        MOVCS   FWACCX,FWACCX,RRX
        ADDCS   FACCX,FACCX,#1
FDIVF   MOV     FACC,FWACCX
        BICS    FWACCX,FACCX,#255
        MOVEQ   PC,R14
        BMI     FCLR
        B       FOVR
FDIVQUIK
        MOV     FACC,FWACC
        BICS    FWACCX,FACCX,#255
        MOVEQ   PC,R14
        BMI     FCLR
        B       FOVR
;-FACC+[TYPE] F1
F1XSUB  TEQ     FACC,#0
        EORNE   FSIGN,FSIGN,#&80000000
;FACC+[TYPE] F1
F1ADD   LOAD5   FWACC,FWACCX,TYPE,FWGRD,FWSIGN
        AND     FWSIGN,FWACC,#&80000000
        ORRNE   FWACC,FWACC,#&80000000
        BNE     FADDW
        MOV     PC,R14
;-FACC+[TYPE]
FXSUB   TEQ     FACC,#0
        EORNE   FSIGN,FSIGN,#&80000000
;FACC+[TYPE]
FADD    LDMIA   TYPE,{FWACC,FWACCX}
        AND     FWSIGN,FWACCX,#&80000000
        AND     FWACCX,FWACCX,#255
        TEQ     FWACC,#0
        MOVEQ   PC,R14
;add two accs (called from Expr fp +). Fwgrd, fgrd irrelevant
FADDW   ROUT
        TEQ     FACC,#0                ;no work needed?
        BEQ     FWTOA                  ;no round
        SUBS    FWACCX,FACCX,FWACCX    ;difference in exponents
        BEQ     %10                    ;no difference means no normalisation
        BHI     %06                    ;FACCX>FWRKX
        RSB     FWACCX,FWACCX,#0       ;negate
        ADD     FACCX,FWACCX,FACCX     ;update result exponent
        RSBS    FGRD,FWACCX,#32
        BCC     FSWTOA                 ;return if shift too large for significance: no round
        TEQ     FSIGN,FWSIGN           ;test signs
        BMI     %04                    ;signs different
        MOV     FGRD,FACC,LSL FGRD
02      ADDS    FACC,FWACC,FACC,LSR FWACCX
        BCC     %16                    ;no renorm needed
;renormalise by right shift with guard
        MOVS    FACC,FACC,RRX
        MOV     FGRD,FGRD,RRX
        ADD     FACCX,FACCX,#1
        CMP     FACCX,#256
        BCS     FOVR
        CMP     FGRD,#&80000000        ;round with no overflow pain
        MOVCC   PC,R14
        BICEQ   FACC,FACC,#1           ;if eq clear bottom bit so add sets it
        ADDS    FACC,FACC,#1
        MOVCC   PC,R14
        MOV     FACC,FACC,RRX          ;carry set
        ADD     FACCX,FACCX,#1
        CMP     FACCX,#256
        MOVCC   PC,R14
        B       FOVR
04      MOV     FWGRD,#0
        SUBS    FGRD,FWGRD,FACC,LSL FGRD
        SBCS    FACC,FWACC,FACC,LSR FWACCX
        MOV     FSIGN,FWSIGN
        BPL     %12
        MOV     PC,R14
06      RSBS    FWGRD,FWACCX,#32
        MOVCC   PC,R14                 ;return if shift too large for significance: no round
        TEQ     FSIGN,FWSIGN           ;test signs
        BMI     %08
        MOV     FGRD,FWACC,LSL FWGRD
        ADDS    FACC,FACC,FWACC,LSR FWACCX
        BCC     %16                    ;no renorm needed
;renormalise by right shift with guard
        MOVS    FACC,FACC,RRX
        MOV     FGRD,FGRD,RRX
        ADD     FACCX,FACCX,#1
        CMP     FACCX,#256
        BCS     FOVR
16      CMP     FGRD,#&80000000        ;round with no overflow pain
        MOVCC   PC,R14
        BICEQ   FACC,FACC,#1           ;if eq clear bottom bit so add sets it
        ADDS    FACC,FACC,#1
        MOVCC   PC,R14
        MOV     FACC,FACC,RRX          ;carry set
        ADD     FACCX,FACCX,#1
        CMP     FACCX,#256
        MOVCC   PC,R14
        B       FOVR
08      MOV     FGRD,#0
        SUBS    FGRD,FGRD,FWACC,LSL FWGRD
        SBCS    FACC,FACC,FWACC,LSR FWACCX
        BPL     %12
        MOV     PC,R14
10      MOV     FGRD,#0
        TEQ     FSIGN,FWSIGN
        BPL     %02                    ;signs same
        SUBS    FACC,FACC,FWACC
        MOVCC   FSIGN,FWSIGN
        RSBCCS  FACC,FACC,#0
        MOVMI   PC,R14                 ;return if normalised
;renormalise by left shift with guard: had to subtract mantissas
12      BEQ     FNRMB                  ;no answer in facc: no round possible
        ADDS    FGRD,FGRD,FGRD
        ADCS    FACC,FACC,FACC
        BMI     %18
        ADDS    FGRD,FGRD,FGRD
        ADCS    FACC,FACC,FACC
        BPL     %14
        SUB     FACCX,FACCX,#1
18      SUBS    FACCX,FACCX,#1
        BPL     %16
        CMP     FGRD,#&80000000        ;incipient underflow: see if can round up
        BCC     FCLR                   ;no, underflow to zero
        BICEQ   FACC,FACC,#1           ;if eq clear bottom bit so add sets it
        ADDS    FACC,FACC,#1
        MOVCS   FACC,FACC,RRX
        ADDCS   FACCX,FACCX,#1
        TEQ     FACCX,#0
        MOVPL   PC,R14
        B       FCLR                   ;finally underflow
14      SUB     FACCX,FACCX,#2
        ADDS    FGRD,FGRD,FGRD
        ADCS    FACC,FACC,FACC
        BMI     %18
        ADDS    FGRD,FGRD,FGRD
        ADCS    FACC,FACC,FACC
        BPL     %14
        SUBS    FACCX,FACCX,#2
        BPL     %16
        CMP     FGRD,#&80000000        ;incipient underflow: see if can round up
        BCC     FCLR                   ;no, underflow to zero
        BICEQ   FACC,FACC,#1           ;if eq clear bottom bit so add sets it
        ADDS    FACC,FACC,#1
        MOVCS   FACC,FACC,RRX
        ADDCS   FACCX,FACCX,#1
        TEQ     FACCX,#0
        MOVPL   PC,R14
        B       FCLR                   ;finally underflow
FWTOA   MOV     FACCX,FWACCX
FSWTOA  MOV     FSIGN,FWSIGN
        MOV     FACC,FWACC
        MOV     PC,R14
;high precision add. Fwgrd not needed on input. Fgrd must be 0
;no overflow detection (and precious little underflow!)
FADDW1  ROUT
        TEQ     FACC,#0                ;no work needed
        BEQ     FWTOA
        SUBS    FWACCX,FACCX,FWACCX    ;difference in exponents
        BEQ     %20                    ;no difference means no normalisation
        BCC     %10                    ;FACCX<FWRKX
        CMP     FWACCX,#&24
        MOVHI   PC,R14                 ;return if shift too large for significance
        MOV     FWGRD,FWACC
        MOV     FWACC,FWACC,LSR FWACCX ;shift mantissa high to position
        RSBS    FWACCX,FWACCX,#32
        MOVPL   FWGRD,FWGRD,LSL FWACCX ;if n<=32 shift left by 32-n
        RSBMI   FWACCX,FWACCX,#0       ;if n>32 then
        MOVMI   FWGRD,FWGRD,LSR FWACCX ;shift right by n-32
        B       %20
10      RSB     FWACCX,FWACCX,#0       ;negate
        ADD     FACCX,FWACCX,FACCX     ;update result exponent
        CMP     FWACCX,#&24
        BHI     FSWTOA                 ;return if shift too large for significance
        MOV     FGRD,FACC
        MOV     FACC,FACC,LSR FWACCX   ;shift mantissa high to position
        RSBS    FWACCX,FWACCX,#32
        MOVPL   FGRD,FGRD,LSL FWACCX   ;if n<=32 shift left by 32-n
        RSBMI   FWACCX,FWACCX,#0       ;if n>32 then
        MOVMI   FGRD,FGRD,LSR FWACCX   ;shift right by n-32
20      TEQ     FSIGN,FWSIGN           ;test signs
        BMI     %30
        ADDS    FGRD,FGRD,FWGRD        ;same sign so add
        ADCS    FACC,FACC,FWACC
        MOVCC   PC,R14                 ;no renorm needed
;renormalise by right shift with guard
        MOVS    FACC,FACC,RRX
        MOV     FGRD,FGRD,RRX
        ADD     FACCX,FACCX,#1
        MOV     PC,R14
30      SUBS    FGRD,FGRD,FWGRD
        SBCS    FACC,FACC,FWACC
        BCS     FNRM2
        MOV     FSIGN,FWSIGN
        RSBS    FGRD,FGRD,#0
        RSCS    FACC,FACC,#0
;renormalise by left shift with guard
FNRM2   MOVMI   PC,R14                 ;return if normalised
        BEQ     FNRMB
FNRMA   SUB     FACCX,FACCX,#1
        ADDS    FGRD,FGRD,FGRD
        ADCS    FACC,FACC,FACC
        MOVVS   PC,R14
        SUB     FACCX,FACCX,#1
        ADDS    FGRD,FGRD,FGRD
        ADCS    FACC,FACC,FACC
        BVC     FNRMA
        MOV     PC,R14
FNRMB   MOVS    FACC,FGRD              ;if facc zero then facc:=fgrd
        BEQ     FCLR
        SUBS    FACCX,FACCX,#32        ;exponent dec by word
      [ UseCLZ
        BMI     FCLR
        CLZ     FGRD,FACC
        MOV     FACC,FACC,LSL FGRD
        SUB     FACCX,FACCX,FGRD
        MOV     PC,LR
      |
        BPL     IFLTA
      ]
;clear facc
FCLR    MOV     FACCX,#0
        MOV     FACC,#0
        MOV     FSIGN,#0
        MOV     FGRD,#0
        MOV     PC,R14
FONE    MOV     FACC,#&80000000
        MOV     FACCX,#&81
        MOV     FSIGN,#0
        MOV     PC,R14
FTOW    MOV     FWACC,FACC
        MOV     FWACCX,FACCX
        MOV     FWSIGN,FSIGN
        MOV     PC,R14
;facc*[TYPE]
IFMUL   ROUT
        TEQ     FACC,#0
        LDMNEIA TYPE,{FWACC,FWACCX}    ;skip if 0
        ANDNE   FWSIGN,FWACCX,#&80000000
        ANDNE   FWACCX,FWACCX,#255
        TEQNE   FWACC,#0
        BEQ     FCLR                   ;*0 implies zero
        EOR     FSIGN,FSIGN,FWSIGN     ;correct sign
        ADD     FACCX,FACCX,FWACCX     ;calculate exponent
IFMUL2  SUB     FACCX,FACCX,#&80       ;restore bias
        MOV     FWGRD,FACC,LSR #16
        MOV     FWACCX,FWACC,LSR #16
        EOR     FWSIGN,FACC,FWGRD,LSL #16
        EORS    FWACC,FWACC,FWACCX,LSL #16
        MUL     FGRD,FWSIGN,FWACC
        MULNE   FWACC,FWGRD,FWACC
        MUL     FWSIGN,FWACCX,FWSIGN
        MUL     FWACCX,FWGRD,FWACCX
        ADDS    FWSIGN,FWACC,FWSIGN
        ADDCS   FWACCX,FWACCX,#&10000
        ADDS    FGRD,FGRD,FWSIGN,LSL #16
        ADCS    FACC,FWACCX,FWSIGN,LSR #16
        MOVMI   PC,R14
        ADDS    FGRD,FGRD,FGRD
        ADC     FACC,FACC,FACC
        SUB     FACCX,FACCX,#1
        MOV     PC,R14
;raise FACC to integer power of TYPE
FIPOW   ROUT
        TEQ     TYPE,#0
        BEQ     FONE
        STMFD   SP!,{R14,AELINE}
        RSBMI   TYPE,TYPE,#0           ;if negative negate TYPE
        BLMI    FRECIP
        CMP     AELINE,#2
        BCC     %99
        BEQ     FSQR2
        MOV     AELINE,#0
10      MOVS    TYPE,TYPE,LSR #1
        BEQ     %50
        ADDCS   AELINE,AELINE,#1
        BLCS    FPUSH
        BL      FSQR
        B       %10
40      MOV     TYPE,SP
        BL      FMUL
        ADD     SP,SP,#8
50      SUBS    AELINE,AELINE,#1
        BPL     %40
99      LDMFD   SP!,{AELINE,PC}
;SQRONE STR   R14,[SP,#-4]! ;SQR(1-ACC^2)
; BL FSQR
; EOR FSIGN,FSIGN,#&80000000
; MOV FWSIGN,#0
; MOV FWACC,#&80000000
; MOV FWACCX,#&81
; BL FADDW
; LDR   R14,[SP],#4
;square root of FACC
FSQRT   ROUT
        TEQ     FACC,#0
        MOVEQ   PC,R14                 ;square root of zero easy
        TEQ     FSIGN,#0
        BMI     FSQRTN
        MOV     FWACC,#&40000000
        MOVS    FACCX,FACCX,LSR #1
        ADC     FACCX,FACCX,#&40
        SUBCC   FACC,FACC,#&40000000
        SUBCS   FACC,FACC,#&80000000   ;adjust for odd exponent
        MOVCC   FWSIGN,#&10000000      ;rotating bit
        MOVCS   FWSIGN,#&08000000
;for first word's worth of work KNOW only top 32 bits used
05      EOR     FWACC,FWACC,FWSIGN     ;set bit
        CMP     FACC,FWACC             ;check if can subtract
        SUBCS   FACC,FACC,FWACC        ;can subtract
        EORCS   FWACC,FWACC,FWSIGN,ASL #1 ;subtracted, so set higher bit
        EOR     FWACC,FWACC,FWSIGN     ;clear bit
        ADD     FACC,FACC,FACC         ;double facc
        MOVS    FWSIGN,FWSIGN,ROR #1   ;rotate bit
        BPL     %05
        MOV     FGRD,#0
;repeat above for extra 3 bits worth on double precision
        CMP     FACC,FWACC
; CMPEQS FGRD,#&80000000
; BCC %10
        BLS     %10                    ;transformation of the above two instructions (!)
        SUBS    FGRD,FGRD,#&80000000
        SBC     FACC,FACC,FWACC
        EOR     FWACC,FWACC,#1
10      ADDS    FGRD,FGRD,FGRD
        ADC     FACC,FACC,FACC
        MOV     FWGRD,#0
        CMP     FACC,FWACC
        CMPEQS  FGRD,#&40000000
        BCC     %20
        SUBS    FGRD,FGRD,#&40000000
        SBC     FACC,FACC,FWACC
        EOR     FWGRD,FWGRD,#&80000000
20      ADDS    FGRD,FGRD,FGRD
        ADC     FACC,FACC,FACC
        EOR     FWGRD,FWGRD,#&20000000
        CMP     FACC,FWACC
        CMPEQS  FGRD,FWGRD
        BCC     %30
        SUBS    FGRD,FGRD,FWGRD
        SBC     FACC,FACC,FWACC
        EOR     FWGRD,FWGRD,#&40000000
30      EOR     FWGRD,FWGRD,#&20000000
; ORR FWSIGN,FGRD,FACC,LSL #1 ;zero remainder produced?
        MOVS    FGRD,FWGRD,ASL #1
        ADC     FACC,FWACC,FWACC
        MOVPL   PC,R14
; TEQ FWSIGN,#0
; BICEQ FACC,FACC,#1
        ADDS    FACC,FACC,#1
        MOVCS   FACC,FACC,RRX
        ADDCS   FACCX,FACCX,#1
        MOV     PC,R14
FEXP    CMP     FACCX,#&87
        BCC     FEXPA                  ;certainly in range, certainly not falls through
        CMPEQ   FACC,#&B3000000
        BCC     FEXPA
        TEQ     FSIGN,#0               ;not in range, under or overflow?
        BMI     FCLR                   ;underflow
        B       ERFEXP
FONEOVERLN2
        DCD     &B8AA3B29
        =       &81,0,0,0              ;1.4426950408889634074
FEXPA   CMP     FACCX,#&61
        BCC     FONE
        STR     R14,[SP,#-4]!
        FPUSH                          ;stack X, link
        ADR     TYPE,FONEOVERLN2
        BL      FMUL
        BL      INTRND
        MOVS    R10,FACC
        BEQ     FEXPB
        RSB     R1,FACC,FACC,LSL #2
        RSB     R1,FACC,R1,LSL #2
        ADD     R1,FACC,R1,LSL #3
        RSB     FACC,FACC,R1,LSL #2    ;N*355
        RSB     FACC,FACC,#0
        BL      IFLT                   ;-XN*355
        SUB     FACCX,FACCX,#9         ;/512 (no underflow possible) -XN*355/512
        MOV     TYPE,SP
        BL      FADD
        FSTA    TYPE
        RSB     FACC,R10,#0
        BL      IFLT
        ADR     TYPE,LOGC2
        BL      FMUL
        MOV     TYPE,SP
        BL      FADD
        FSTA    TYPE                   ;stack g, link
        B       FEXPC
FEXPB   FLDA    SP
FEXPC   BL      FSQR
        FPUSH                          ;stack z, g, link
        ADR     TYPE,EXPP1
        BL      FMUL
        ADR     TYPE,EXPP0
        BL      FADD
        ADD     TYPE,SP,#8
        BL      FMUL
        FPUSH                          ;stack g*P(z), z, g, link
        ADD     TYPE,SP,#8
        FLDA    TYPE
        ADR     TYPE,EXPQ2
        BL      FMUL
        ADR     TYPE,EXPQ1
        BL      FADD
        ADD     TYPE,SP,#8
        BL      FMUL
        MOV     FWACC,#&80000000
        MOV     FWSIGN,#0
        MOV     FWACCX,#&80
        BL      FADDW                  ;Q(z)
        LDMFD   SP,{FWACC,FWACCX}
        AND     FWSIGN,FWACCX,#&80000000
        AND     FWACCX,FWACCX,#255
        TEQ     FWACC,#0
        EORNE   FWSIGN,FWSIGN,#&80000000
        BL      FADDW                  ;Q(z)-g*P(z)
        MOV     TYPE,SP
        BL      FXDIV
        MOV     FWACC,#&80000000
        MOV     FWSIGN,#0
        MOV     FWACCX,#&80
        BL      FADDW
        ADD     FACCX,FACCX,R10
        ADD     FACCX,FACCX,#1
        ADD     SP,SP,#24
        LDR     R14,[SP],#4
        B       FTRNDZ
EXPP1   DCD     &C2FBC974
        =       &79,0,0,0              ;.59504254977591E-2
EXPP0   DCD     &80000000
        =       &7F,0,0,0              ;.24999999999992
EXPQ2   DCD     &9BDE1394
        =       &75,0,0,0              ;.29729363682238E-3
EXPQ1   DCD     &DB699D07
        =       &7C,0,0,0              ;.53567517645222E-1
LOGC2   DCD     &DE8082E3
        =       &74,0,0,&80            ;-2.121944400546905827679E-4
FLOG    TEQ     FACC,#0
        BEQ     ERFLOG
        TEQ     FSIGN,#0
        BMI     ERFLOG
        STR     R14,[SP,#-4]!
        SUB     R10,FACCX,#&80         ;determine N
        MOV     FACCX,#&80             ;determine f
        SUBS    R4,FACC,#&B5000000     ;f>c0=sqrt(1/2)
; SUBCSS R4,R4,#&00040000
; SUBCSS R4,R4,#&0000F300
; SUBCSS R4,R4,#&00000034
        BCC     FLOGA
        STMFD   SP!,{FACC,FACCX,FSIGN} ;stack f
        MOV     FWACC,#&80000000
        MOV     FWSIGN,#&80000000
        MOV     FWACCX,#&81            ;-1
        BL      FADDW                  ;f-1
        LDMFD   SP!,{FWACC,FWACCX,FWSIGN} ;work=pop stack=f
        FPUSH                          ;stack znum=f-1
        SUB     FWACCX,FWACCX,#1       ;f*.5
        MOV     FACC,#&80000000
        MOV     FSIGN,#0
        MOV     FACCX,#&80             ;.5
        BL      FADDW                  ;zden=f*.5+.5
        B       FLOGB
FLOGA   SUB     R10,R10,#1             ;N=N-1
        MOV     FWACC,#&80000000
        MOV     FWSIGN,#&80000000
        MOV     FWACCX,#&80            ;-.5
        BL      FADDW
        FPUSH                          ;stack znum=f-.5
        SUB     FACCX,FACCX,#1         ;znum*.5
        MOV     FWACC,#&80000000
        MOV     FWSIGN,#0
        MOV     FWACCX,#&80            ;.5
        BL      FADDW                  ;zden=znum*.5+.5
FLOGB   MOV     TYPE,SP
        BL      FXDIV                  ;z=znum/zden
        FSTA    TYPE                   ;stack z, link
        SUB     SP,SP,#8               ;gap on stack
        BL      FSQR                   ;w=z^2
        FPUSH                          ;stack w, ?, z, link
        ADR     TYPE,LOGA1
        BL      FMUL                   ;a1*w
        ADR     TYPE,LOGA0
        BL      FADD                   ;a1*w+a0
        MOV     TYPE,SP
        BL      FMUL                   ;w*(a1*w+a0)
        ADD     TYPE,SP,#8
        FSTA    TYPE                   ;stack w, w*A(w), z, link
        BL      FPULL                  ;stack w*A(w), z, link
        ADR     TYPE,LOGB0
        BL      FADD                   ;B(w)
        MOV     TYPE,SP
        BL      FXDIV                  ;r(z^2)=B(w) XDIV w*A(w)
        ADD     SP,SP,#8
        MOV     TYPE,SP                ;stack z, link
        BL      FMUL                   ;z*r(z^2)
        BL      FADD                   ;z+z*r(z^2)
        TEQ     R10,#0
        BEQ     FLOGC
        FSTA    TYPE                   ;stack R(z), link
        MOV     FACC,R10
        BL      IFLT                   ;XN
        ADR     TYPE,LOGC2
        BL      FMUL                   ;XN*C2
        MOV     TYPE,SP
        BL      FADD                   ;XN*C2+R(z)
        BL      FTOW
        RSB     R1,R10,R10,LSL #2
        RSB     R1,R10,R1,LSL #2
        ADD     R1,R10,R1,LSL #3
        RSB     FACC,R10,R1,LSL #2     ;N*355
        BL      IFLT                   ;XN*355
        SUB     FACCX,FACCX,#9         ;/512 (no underflow possible) XN*355/512
        BL      FADDW                  ;(XN*C2+R(z))+XN*C1
FLOGC   ADD     SP,SP,#8
        LDR     PC,[SP],#4
LOGA1   DCD     &DED689E5
        =       &7A,0,0,0              ;.1360095468621E-1
LOGA0   DCD     &EE08307E
        =       &7F,0,0,&80            ;-.4649062303464E0
LOGB0   DCD     &B286223E
        =       &83,0,0,&80            ;-.5578873750242E1
;f1sta stores in external form packed format. Destroys FGRD
F1STA   BIC     FGRD,FACC,#&80000000
        ORR     FGRD,FGRD,FSIGN        ;fsign only 0 or &80000000!
      [ NoARMv6 :LOR: NoUnaligned
        TST     TYPE,#2
        BNE     F1STA2
        STRB    FACCX,[TYPE,#4]
        TST     TYPE,#1
        STREQ   FGRD,[TYPE]
        MOVEQ   PC,R14
        LDRB    FSIGN,[TYPE,#-1]
        ORR     FSIGN,FSIGN,FGRD,LSL #8
        STR     FSIGN,[TYPE,#-1]
        MOV     FSIGN,FGRD,LSR #24
        STRB    FSIGN,[TYPE,#3]
        AND     FSIGN,FGRD,#&80000000  ;restore sign bit
        MOV     PC,R14
F1STA2  STRB    FGRD,[TYPE]
        MOV     FGRD,FGRD,LSR #8
        ORR     FGRD,FGRD,FACCX,LSL #24
        TST     TYPE,#1
        STRNE   FGRD,[TYPE,#1]
        MOVNE   PC,R14
        STRB    FGRD,[TYPE,#1]
        MOV     FGRD,FGRD,LSR #8
        LDRB    FSIGN,[TYPE,#5]
        ORR     FSIGN,FGRD,FSIGN,LSL #24
        STR     FSIGN,[TYPE,#2]
        MOV     FSIGN,FSIGN,LSL #16
        AND     FSIGN,FSIGN,#&80000000
      |
        STR     FGRD,[TYPE] ; unaligned store
        STRB    FACCX,[TYPE,#4]
      ]
        MOV     PC,R14
;FACC=[TYPE] F1
F1LDA   LOAD5   FACC,FACCX,TYPE,FGRD,FSIGN
        AND     FSIGN,FACC,#&80000000
        ORRNE   FACC,FACC,#&80000000
        MOV     PC,R14
 ]
ASN     STR     R14,[SP,#-4]!
        BL      FACTOR
        BLPL    FLOATQ
 [ FPOINT=0
        MOV     R10,#0
ASN1    STMFD   SP!,{FSIGN,R10}        ;stack fsign, FLAG
        MOV     FSIGN,#0
        CMP     FACCX,#&80
        CMPEQ   FACC,#&80000000        ;>1/2?
        BLS     ASN2
        CMP     FACCX,#&81
        CMPEQ   FACC,#&80000000
        BHI     FOVR1
        RSB     R10,R10,#1
        SUB     SP,SP,#8               ;space for Y
        EOR     FSIGN,FSIGN,#&80000000 ;safe since not 0-0 situation
        MOV     FWACC,#&80000000
        MOV     FWACCX,#&81
        MOV     FWSIGN,#0
        BL      FADDW                  ;1-Y
        SUB     FACCX,FACCX,#1
        FPUSH                          ;stack g, ?, fsign, FLAG
        BL      FSQRT
        ADD     FACCX,FACCX,#1         ;can't be zero
        EOR     FSIGN,FSIGN,#&80000000
        ADD     TYPE,SP,#8
        FSTA    TYPE                   ;stack g, Y, fsign, FLAG
        FLDA    SP
        B       ASN3
ASN2    CMP     FACCX,#&71
        BCC     ASN9
        FPUSH                          ;stack Y, fsign, FLAG
        BL      FSQR
        FPUSH                          ;stack g, Y, fsign, FLAG
ASN3    ADR     TYPE,ASNP3
        BL      FMUL
        ADR     TYPE,ASNP2
        BL      FADD
        MOV     TYPE,SP
        BL      FMUL
        ADR     TYPE,ASNP1
        BL      FADD
        MOV     TYPE,SP
        BL      FMUL                   ;g*P(g)
        FPUSH                          ;stack g*P(g), g, Y, fsign, FLAG
        ADD     TYPE,SP,#8
        FLDA    TYPE
        ADR     TYPE,ASNQ2
        BL      FADD
        ADD     TYPE,SP,#8
        BL      FMUL
        ADR     TYPE,ASNQ1
        BL      FADD
        ADD     TYPE,SP,#8
        BL      FMUL
        ADR     TYPE,ASNQ0
        BL      FADD
        MOV     TYPE,SP
        BL      FXDIV
        ADD     TYPE,SP,#16
        BL      FMUL
        BL      FADD
        ADD     SP,SP,#24              ;stack fsign, FLAG
ASN9    LDR     R4,[SP,#4]
        CMP     R4,#0
        BEQ     ASN9A
        LDMFD   SP!,{R4,R5}
        CMP     R4,#0
        BEQ     ASN9B
        CMP     R10,#0
        ADREQ   TYPE,FULLPI
        ADRNE   TYPE,HALFPI
        BL      FADD
        B       FSINSTK
ASN9B   TEQ     FACC,#0
        EORNE   FSIGN,FSIGN,#&80000000
        CMP     R10,#0
        ADRNE   TYPE,HALFPI
        BLNE    FADD
        B       FSINSTK
ASN9A   CMP     R10,#0
        BEQ     ASN9AA
        ADR     TYPE,HALFPI
        BL      FADD
ASN9AA  LDMFD   SP!,{R4,R5}
        CMP     R4,#0
        BEQ     FSINSTK
        TEQ     FACC,#0
        EORNE   FSIGN,FSIGN,#&80000000
        B       FSINSTK
ASNP3   DCD     &98313F1B
        =       &80,0,0,&80            ;-.59450144193246E0
ASNP2   DCD     &B9F9E054
        =       &82,0,0,0              ;.29058762374859E1
ASNP1   DCD     &B01B1FCB
        =       &82,0,0,&80            ;-.27516555290596E1
ASNQ2   DCD     &A5578500
        =       &84,0,0,&80            ;-.10333867072113E2
ASNQ1   DCD     &C6EAF706
        =       &85,0,0,0              ;.24864728969164E2
ASNQ0   DCD     &841457DC
        =       &85,0,0,&80            ;-.16509933202424E2
FULLPI  =       &A2,&DA,&0F,&C9
        =       &82,0,0,0
HALFPI  =       &A2,&DA,&0F,&C9
        =       &81,0,0,0              ;1.570796327E0
THIRDPI DCD     &860A91C1
        =       &81,0,0,0              ;1.047197551196597
SIXTHPI DCD     &860A91C1
        =       &80,0,0,0              ;.523598775598298
;HPIHI = &00,&00,&10,&C9
; = &81,0,0,&80 ;-1.570800781E0
;HPILO = &61,&7A,&77,&95
; = &6F,0,0,0 ;4.454455111E-6
;load fp acc with pi
PI      ADR     TYPE,FULLPI
        FLDA    TYPE
 |
        ASND    FACC,FACC
        MOVS    TYPE,#TFP
        LDR     PC,[SP],#4
FULLPI  DCFD    3.14159265358979323846264
PI      LDFD    FACC,FULLPI
 ]
        MOVS    TYPE,#TFP
        MOV     PC,R14

        LNK     Fp2.s