memcpset 22.7 KB
Newer Older
Kevin Bracey's avatar
Kevin Bracey committed
1
; Copyright 1997 Acorn Computers Ltd
Neil Turton's avatar
Neil Turton committed
2 3 4 5 6 7 8 9 10 11 12 13 14
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
Kevin Bracey's avatar
Kevin Bracey committed
15 16
;;; memcpyset.s: fast implementation of memcpy, memset, memmove
;;; Copyright (C) Advanced RISC Machines Ltd., 1991
Neil Turton's avatar
Neil Turton committed
17

Kevin Bracey's avatar
Kevin Bracey committed
18
        GET objmacs.s
Neil Turton's avatar
Neil Turton committed
19

Kevin Bracey's avatar
Kevin Bracey committed
20
        CodeArea
Neil Turton's avatar
Neil Turton committed
21

Kevin Bracey's avatar
Kevin Bracey committed
22 23 24 25 26 27 28 29 30
        GBLS  SLA                       ; shift towards low address end
        GBLS  SHA                       ; shift towards high address end
 [ {ENDIAN} = "big"
SLA     SETS "LSL"
SHA     SETS "LSR"
 |                                      ; assume little-endian
SLA     SETS "LSR"
SHA     SETS "LSL"
 ]
Neil Turton's avatar
Neil Turton committed
31

Kevin Bracey's avatar
Kevin Bracey committed
32
 [ make = "memcpy" :LOR: make = "all" :LOR: make = "shared-library"
Neil Turton's avatar
Neil Turton committed
33 34 35 36 37 38
src     RN    a2
dst     RN    a1
n       RN    a3
tmp1    RN    a4
tmp3    RN    ip

Kevin Bracey's avatar
Kevin Bracey committed
39 40 41
        Function memmove, leaf
        Function memcpy, leaf

Neil Turton's avatar
Neil Turton committed
42 43
        CMP     src, dst
        BLO     CopyDown
Kevin Bracey's avatar
Kevin Bracey committed
44 45 46
        Return  , "", LinkNotStacked, EQ ; dst == src, no move required

        FunctionEntry , "dst"           ; Must return original dst.
Neil Turton's avatar
Neil Turton committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
        SUBS    n, n, #4                ; need at least 4 bytes
        BLT     Up_TrailingBytes        ; < 4 bytes to go

        ; word align the dst - first find out how many bytes
        ; must be stored to do this.  If the number is 0
        ; check the src too.

        ANDS    tmp3, dst, #3           ; eq means aligned!
        BNE     Up_AlignDst
        ANDS    tmp3, src, #3
        BNE     Up_SrcUnaligned         ; more difficult!

        ; here when source and destination are both aligned.
        ; number of bytes to transfer is (n+4), n is >= 0.

Up_SrcDstAligned
        SUBS    n, n, #12-4             ; 12 bytes or more?
        BLT     Up_TrailingWords
        ; We only have three registers to play with.  It is
        ; worth gaining more only if the number of bytes to
        ; transfer is greater than 12+8*<registers stacked>
        ; We need to stack 8 (4+4) registers to gain 8 temporaries,
        ; so look for >=44 bytes.  Since we would save 8*4 = 32
        ; bytes at a time we actually compare with 64.

        SUBS    n, n, #32-12            ; n+32 to go.
        BLT     %F1

75
        STR     v1, [sp, #-4]!
Neil Turton's avatar
Neil Turton committed
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

        ; loop loading 4 registers per time, twice (32 bytes)

0       LDMIA   src!, {tmp1, v1, tmp3, lr}
        STMIA   dst!, {tmp1, v1, tmp3, lr}
        LDMIA   src!, {tmp1, v1, tmp3, lr}
        STMIA   dst!, {tmp1, v1, tmp3, lr}
        SUBS    n, n, #32
        BGE     %B0
        ; see if we can handle another 8

        CMN     n, #16
        LDMGEIA src!, {tmp1, v1, tmp3, lr}
        STMGEIA dst!, {tmp1, v1, tmp3, lr}
        SUBGE   n, n, #16

        ; Reload the registers - note that we still have (n+32)
        ; bytes to go, and that this is <16.

95
        LDR     v1, [sp], #4
Neil Turton's avatar
Neil Turton committed
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123

        ; Here when there are fewer than 16 bytes to go.

1       ADDS    n, n, #32-12               ; (n-12) to go

        ; Ok - do three words at a time.

2       LDMGEIA src!, {tmp1, tmp3, lr}
        STMGEIA dst!, {tmp1, tmp3, lr}
        SUBGES  n, n, #12
        BGE     %B2
        ; (n-12) bytes to go - 0, 1 or 2 words.  Check
        ; which.

Up_TrailingWords
        ADDS    n, n, #12-4             ; (n-4) to go
        BLT     Up_TrailingBytes        ; < 4 bytes to go
        SUBS    n, n, #4
        LDRLT   tmp1, [src], #4
        STRLT   tmp1, [dst], #4
        LDMGEIA src!, {tmp1, tmp3}
        STMGEIA dst!, {tmp1, tmp3}
        SUBGE   n, n, #4

        ; Here with less than 4 bytes to go

Up_TrailingBytes
        ADDS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
124
        Return  , "a1", , EQ            ; 0 bytes
Neil Turton's avatar
Neil Turton committed
125 126 127 128 129 130 131
        CMP     n, #2                   ; 1, 2 or 3 bytes
        LDRB    tmp1, [src], #1
        STRB    tmp1, [dst], #1
        LDRGEB  tmp1, [src], #1
        STRGEB  tmp1, [dst], #1
        LDRGTB  tmp1, [src], #1
        STRGTB  tmp1, [dst], #1
Kevin Bracey's avatar
Kevin Bracey committed
132
        Return  , "a1"                  ; recover old dst value
Neil Turton's avatar
Neil Turton committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181

;------------------------------------------------------------

; word align dst - tmp3 contains current destination
; alignment.  We can store at least 4 bytes here.

Up_AlignDst
        RSB     tmp3, tmp3, #4          ; 1-3 bytes to go
        CMP     tmp3, #2
        LDRB    tmp1, [src], #1
        STRB    tmp1, [dst], #1
        LDRGEB  tmp1, [src], #1
        STRGEB  tmp1, [dst], #1
        LDRGTB  tmp1, [src], #1
        STRGTB  tmp1, [dst], #1
        SUBS    n, n, tmp3              ; check number to go
        BLT     Up_TrailingBytes        ; less than 4 bytes
        ANDS    tmp3, src, #3
        BEQ     Up_SrcDstAligned        ; coaligned case

        ; The source is not coaligned with the destination,
        ; the destination IS currently word aligned.

Up_SrcUnaligned
        BIC     src, src, #3            ; tmp3 holds extra!
        LDR     lr, [src], #4           ; 1-3 useful bytes
        CMP     tmp3, #2
        BGT     Up_OneByte              ; one byte in tmp1
        BEQ     Up_TwoBytes             ; two bytes in tmp1

; The next three source bytes are in tmp1, one byte must
; come from the next source word.  At least four bytes
; more must be stored.  Check first to see if there are a
; sufficient number of bytes to go to justify using stm/ldm
; instructions.

Up_ThreeBytes
        CMP     n, #16-4                ; at least 16 bytes?
        BLT     %F1                     ; no                    ; 1
        SUB     n, n, #16-4             ; (n+16) bytes to go    ; 1

        ; save some work registers.  The point at which this
        ; is done is based on the ldm/stm time being = (n+3)+(n/4)S

        STMFD   sp!, {v1, v2}                                  ; 14   ????

        ; loop doing 16 bytes at a time.  There are currently
        ; three useful bytes in lr.

Kevin Bracey's avatar
Kevin Bracey committed
182 183 184 185 186 187 188 189 190 191 192 193
0       MOV     tmp1, lr, $SLA #8        ; first three bytes     ; 1
        LDMIA   src!, {v1, v2, tmp3, lr}                         ; 12/13
        ORR     tmp1, tmp1, v1, $SHA #24         ; word 1        ; 1
        MOV     v1, v1, $SLA #8                                  ; ...
        ORR     v1, v1, v2, $SHA #24             ; word 2        ; 2 (1+1)
        MOV     v2, v2, $SLA #8
        ORR     v2, v2, tmp3, $SHA #24           ; word 3        ; 2
        MOV     tmp3, tmp3, $SLA #8
        ORR     tmp3, tmp3, lr, $SHA #24         ; word 4        ; 2
        STMIA   dst!, {tmp1, v1, v2, tmp3}                       ; 12/13
        SUBS    n, n, #16                                        ; 1
        BGE     %B0                                              ; 4 / 1
Neil Turton's avatar
Neil Turton committed
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208

        ; loop timing (depends on alignment) for n loops:-

        ;       pre:    17
        ;               ((45/46/47)n - 3) for 32n bytes
        ;       post:   13/14
        ;       total:  (45/46/47)n+(27/28)
        ;       32 bytes:       72-75
        ;       64 bytes:       117-122
        ;       96 bytes:       162-169

        ; Reload registers

        LDMFD   sp!, {v1, v2}                                   ; 12/13 ????

Kevin Bracey's avatar
Kevin Bracey committed
209 210
        ADDS    n, n, #16-4              ; check for at least 4
        BLT     %F2                      ; < 4 bytes
Neil Turton's avatar
Neil Turton committed
211

Kevin Bracey's avatar
Kevin Bracey committed
212 213 214 215 216 217
1       MOV     tmp3, lr, $SLA #8        ; first three bytes     ; 1
        LDR     lr, [src], #4            ; next four bytes       ; 4
        ORR     tmp3, tmp3, lr, $SHA #24                         ; 1
        STR     tmp3, [dst], #4                                  ; 4
        SUBS    n, n, #4                                         ; 1
        BGE     %B1                      ; tmp1 contains three bytes 1 / 4
Neil Turton's avatar
Neil Turton committed
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246

        ; Loop timing:

        ;               15n-3   for 4n bytes
        ;       32:     117
        ;       64:     237

        ; Less than four bytes to go - readjust the src
        ; address.

2       SUB     src, src, #3
        B       Up_TrailingBytes

; The next two source bytes are in tmp1, two bytes must
; come from the next source word.  At least four bytes
; more must be stored.

Up_TwoBytes
        CMP     n, #16-4                ; at least 16 bytes?
        BLT     %F1                     ; no
        SUB     n, n, #16-4             ; (n+16) bytes to go

        ; form a stack frame and save registers

        STMFD   sp!, {v1, v2}

        ; loop doing 32 bytes at a time.  There are currently
        ; two useful bytes in lr.

Kevin Bracey's avatar
Kevin Bracey committed
247
0       MOV     tmp1, lr, $SLA #16       ; first two bytes
Neil Turton's avatar
Neil Turton committed
248
        LDMIA   src!, {v1, v2, tmp3, lr}
Kevin Bracey's avatar
Kevin Bracey committed
249 250 251 252 253 254 255
        ORR     tmp1, tmp1, v1, $SHA #16 ; word 1
        MOV     v1, v1, $SLA #16
        ORR     v1, v1, v2, $SHA #16     ; word 2
        MOV     v2, v2, $SLA #16
        ORR     v2, v2, tmp3, $SHA #16   ; word 3
        MOV     tmp3, tmp3, $SLA #16
        ORR     tmp3, tmp3, lr, $SHA #16 ; word 4
Neil Turton's avatar
Neil Turton committed
256 257 258 259 260 261 262
        STMIA   dst!, {tmp1, v1, v2, tmp3}
        SUBS    n, n, #16
        BGE     %B0
        ; Reload registers

        LDMFD   sp!, {v1, v2}

Kevin Bracey's avatar
Kevin Bracey committed
263
        ADDS    n, n, #16-4              ; check number of bytes
Neil Turton's avatar
Neil Turton committed
264
        BLT     %F2
Kevin Bracey's avatar
Kevin Bracey committed
265 266 267
1       MOV     tmp3, lr, $SLA #16       ; first two bytes
        LDR     lr, [src], #4            ; next four bytes
        ORR     tmp3, tmp3, lr, $SHA #16
Neil Turton's avatar
Neil Turton committed
268 269
        STR     tmp3, [dst], #4
        SUBS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
270
        BGE     %B1                      ; tmp1 contains two bytes
Neil Turton's avatar
Neil Turton committed
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293

        ; Less than four bytes to go - readjust the src
        ; address.

2       SUB     src, src, #2
        B       Up_TrailingBytes

; The next source byte is in tmp1, three bytes must
; come from the next source word.  At least four bytes
; more must be stored.

Up_OneByte
        CMP     n, #16-4                ; at least 16 bytes?
        BLT     %F1                     ; no
        SUB     n, n, #16-4             ; (n+16) bytes to go

        ; form a stack frame and save registers

        STMFD   sp!, {v1, v2}

        ; loop doing 32 bytes at a time.  There is currently
        ; one useful byte in lr

Kevin Bracey's avatar
Kevin Bracey committed
294
0       MOV     tmp1, lr, $SLA #24       ; first byte
Neil Turton's avatar
Neil Turton committed
295
        LDMIA   src!, {v1, v2, tmp3, lr}
Kevin Bracey's avatar
Kevin Bracey committed
296 297 298 299 300 301 302
        ORR     tmp1, tmp1, v1, $SHA #8  ; word 1
        MOV     v1, v1, $SLA #24
        ORR     v1, v1, v2, $SHA #8      ; word 2
        MOV     v2, v2, $SLA #24
        ORR     v2, v2, tmp3, $SHA #8    ; word 3
        MOV     tmp3, tmp3, $SLA #24
        ORR     tmp3, tmp3, lr, $SHA #8  ; word 4
Neil Turton's avatar
Neil Turton committed
303 304 305 306 307 308 309
        STMIA   dst!, {tmp1, v1, v2, tmp3}
        SUBS    n, n, #16
        BGE     %B0
        ; Reload registers

        LDMFD   sp!, {v1, v2}

Kevin Bracey's avatar
Kevin Bracey committed
310
        ADDS    n, n, #16-4              ; check number of bytes
Neil Turton's avatar
Neil Turton committed
311
        BLT     %F2
Kevin Bracey's avatar
Kevin Bracey committed
312 313 314
1       MOV     tmp3, lr, $SLA #24       ; first byte
        LDR     lr, [src], #4            ; next four bytes
        ORR     tmp3, tmp3, lr, $SHA #8
Neil Turton's avatar
Neil Turton committed
315 316
        STR     tmp3, [dst], #4
        SUBS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
317
        BGE     %B1                      ; tmp1 contains one byte
Neil Turton's avatar
Neil Turton committed
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405

        ; Less than four bytes to go - one already in tmp3.

2       SUB     src, src, #1
        B       Up_TrailingBytes

;======================================================================
; Copy down code
; ==============

;       This is exactly the same as the copy up code -
;       but it copies in the opposite direction.

CopyDown
        ADD     src, src, n             ; points beyond end
        ADD     dst, dst, n

        SUBS    n, n, #4                ; need at least 4 bytes
        BLT     Down_TrailingBytes      ; < 4 bytes to go

        ; word align the dst - first find out how many bytes
        ; must be stored to do this.  If the number is 0
        ; check the src too.

        ANDS    tmp3, dst, #3           ; eq means aligned!
        BNE     Down_AlignDst
        ANDS    tmp3, src, #3
        BNE     Down_SrcUnaligned       ; more difficult!

        ; here when source and destination are both aligned.
        ; number of bytes to transfer is (n+4), n is >= 0.

Down_SrcDstAligned
        SUBS    n, n, #12-4             ; 12 bytes or more?
        BLT     Down_TrailingWords
        ; We only have three registers to play with.  It is
        ; worth gaining more only if the number of bytes to
        ; transfer is greater than 12+8*<registers stacked>
        ; We need to stack 8 (4+4) registers to gain 8 temporaries,
        ; so look for >=44 bytes.  Since we would save 8*4 = 32
        ; bytes at a time we actually compare with 64.

        STMFD   sp!, {v1, lr}
        SUBS    n, n, #32-12            ; n+32 to go.
        BLT     %F1

        ; loop loading 4 registers per time, twice (32 bytes)

0       LDMDB   src!, {tmp1, v1, tmp3, lr}
        STMDB   dst!, {tmp1, v1, tmp3, lr}
        LDMDB   src!, {tmp1, v1, tmp3, lr}
        STMDB   dst!, {tmp1, v1, tmp3, lr}
        SUBS    n, n, #32
        BGE     %B0
        ; see if we can handle another 8

1       CMN     n, #16
        LDMGEDB src!, {tmp1, v1, tmp3, lr}
        STMGEDB dst!, {tmp1, v1, tmp3, lr}
        SUBGE   n, n, #16

        ; Here when there are fewer than 16 bytes to go.

        ADDS    n, n, #32-12            ; (n-12) to go

        ; Ok - do three words at a time.

        LDMGEDB src!, {tmp1, tmp3, lr}
        STMGEDB dst!, {tmp1, tmp3, lr}
        SUBGE   n, n, #12
        LDMFD   sp!, {v1, lr}
        ; (n-12) bytes to go - 0, 1 or 2 words.  Check
        ; which.

Down_TrailingWords
        ADDS    n, n, #12-4             ; (n-4) to go
        BLT     Down_TrailingBytes      ; < 4 bytes to go
        SUBS    n, n, #4
        LDRLT   tmp1, [src, #-4]!
        STRLT   tmp1, [dst, #-4]!
        LDMGEDB src!, {tmp1, tmp3}
        STMGEDB dst!, {tmp1, tmp3}
        SUBGE   n, n, #4

        ; Here with less than 4 bytes to go

Down_TrailingBytes
        ADDS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
406 407
        Return  , "", LinkNotStacked, EQ ; 0 bytes
        CMP     n, #2                    ; 1, 2 or 3 bytes
Neil Turton's avatar
Neil Turton committed
408 409 410 411
        LDRB    tmp1, [src, #-1]!
        STRB    tmp1, [dst, #-1]!
        LDRGEB  tmp1, [src, #-1]!
        STRGEB  tmp1, [dst, #-1]!
Kevin Bracey's avatar
Kevin Bracey committed
412
        LDRGTB  tmp1, [src, #-1]!        ; dst is now original dst
Neil Turton's avatar
Neil Turton committed
413
        STRGTB  tmp1, [dst, #-1]!
Kevin Bracey's avatar
Kevin Bracey committed
414
        Return  , "", LinkNotStacked
Neil Turton's avatar
Neil Turton committed
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463

;------------------------------------------------------------

; word align dst - tmp3 contains current destination
; alignment.  We can store at least 4 bytes here.  We are
; going downwards - so tmp3 is the actual number of bytes
; to store.

Down_AlignDst
        CMP     tmp3, #2
        LDRB    tmp1, [src, #-1]!
        STRB    tmp1, [dst, #-1]!
        LDRGEB  tmp1, [src, #-1]!
        STRGEB  tmp1, [dst, #-1]!
        LDRGTB  tmp1, [src, #-1]!
        STRGTB  tmp1, [dst, #-1]!
        SUBS    n, n, tmp3              ; check number to go
        BLT     Down_TrailingBytes      ; less than 4 bytes
        ANDS    tmp3, src, #3
        BEQ     Down_SrcDstAligned      ; coaligned case

        ; The source is not coaligned with the destination,
        ; the destination IS currently word aligned.

Down_SrcUnaligned
        BIC     src, src, #3            ; tmp3 holds extra!
        LDR     tmp1, [src]             ; 1-3 useful bytes
        CMP     tmp3, #2
        BLT     Down_OneByte            ; one byte in tmp1
        BEQ     Down_TwoBytes           ; two bytes in tmp1

; The last three source bytes are in tmp1, one byte must
; come from the previous source word.  At least four bytes
; more must be stored.  Check first to see if there are a
; sufficient number of bytes to go to justify using stm/ldm
; instructions.

Down_ThreeBytes
        CMP     n, #16-4                ; at least 16 bytes?
        BLT     %F1                     ; no
        SUB     n, n, #16-4             ; (n+16) bytes to go

        ; form a stack frame and save registers

        STMFD   sp!, {v1, v2, lr}

        ; loop doing 32 bytes at a time.  There are currently
        ; three useful bytes in tmp1 (a4).

Kevin Bracey's avatar
Kevin Bracey committed
464
0       MOV     lr, tmp1, $SHA #8        ; last three bytes
Neil Turton's avatar
Neil Turton committed
465
        LDMDB   src!, {tmp1, v1, v2, tmp3}
Kevin Bracey's avatar
Kevin Bracey committed
466 467 468 469 470 471 472
        ORR     lr, lr, tmp3, $SLA #24   ; word 4
        MOV     tmp3, tmp3, $SHA #8
        ORR     tmp3, tmp3, v2, $SLA #24 ; word 3
        MOV     v2, v2, $SHA #8
        ORR     v2, v2, v1, $SLA #24     ; word 2
        MOV     v1, v1, $SHA #8
        ORR     v1, v1, tmp1, $SLA #24   ; word 1
Neil Turton's avatar
Neil Turton committed
473 474 475 476 477 478 479
        STMDB   dst!, {v1, v2, tmp3, lr}
        SUBS    n, n, #16
        BGE     %B0
        ; Reload registers

        LDMFD   sp!, {v1, v2, lr}

Kevin Bracey's avatar
Kevin Bracey committed
480 481
        ADDS    n, n, #16-4              ; check for at least 4
        BLT     %F2                      ; < 4 bytes
Neil Turton's avatar
Neil Turton committed
482

Kevin Bracey's avatar
Kevin Bracey committed
483 484 485
1       MOV     tmp3, tmp1, $SHA #8      ; last three bytes
        LDR     tmp1, [src, #-4]!        ; previous four bytes
        ORR     tmp3, tmp3, tmp1, $SLA #24
Neil Turton's avatar
Neil Turton committed
486 487
        STR     tmp3, [dst, #-4]!
        SUBS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
488
        BGE     %B1                      ; tmp1 contains three bytes
Neil Turton's avatar
Neil Turton committed
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511

        ; Less than four bytes to go - readjust the src
        ; address.

2       ADD     src, src, #3
        B       Down_TrailingBytes

; The last two source bytes are in tmp1, two bytes must
; come from the previous source word.  At least four bytes
; more must be stored.

Down_TwoBytes
        CMP     n, #16-4                ; at least 16 bytes?
        BLT     %F1                     ; no
        SUB     n, n, #16-4             ; (n+16) bytes to go

        ; form a stack frame and save registers

        STMFD   sp!, {v1, v2, lr}

        ; loop doing 32 bytes at a time.  There are currently
        ; two useful bytes in tmp1 (a4).

Kevin Bracey's avatar
Kevin Bracey committed
512
0       MOV     lr, tmp1, $SHA #16       ; last two bytes
Neil Turton's avatar
Neil Turton committed
513
        LDMDB   src!, {tmp1, v1, v2, tmp3}
Kevin Bracey's avatar
Kevin Bracey committed
514 515 516 517 518 519 520
        ORR     lr, lr, tmp3, $SLA #16   ; word 4
        MOV     tmp3, tmp3, $SHA #16
        ORR     tmp3, tmp3, v2, $SLA #16 ; word 3
        MOV     v2, v2, $SHA #16
        ORR     v2, v2, v1, $SLA #16     ; word 2
        MOV     v1, v1, $SHA #16
        ORR     v1, v1, tmp1, $SLA #16   ; word 1
Neil Turton's avatar
Neil Turton committed
521 522 523 524 525 526 527
        STMDB   dst!, {v1, v2, tmp3, lr}
        SUBS    n, n, #16
        BGE     %B0
        ; Reload registers

        LDMFD   sp!, {v1, v2, lr}

Kevin Bracey's avatar
Kevin Bracey committed
528 529
        ADDS    n, n, #16-4              ; check for at least 4
        BLT     %F2                      ; < 4 bytes
Neil Turton's avatar
Neil Turton committed
530

Kevin Bracey's avatar
Kevin Bracey committed
531 532 533
1       MOV     tmp3, tmp1, $SHA #16     ; last two bytes
        LDR     tmp1, [src, #-4]!        ; previous four bytes
        ORR     tmp3, tmp3, tmp1, $SLA #16
Neil Turton's avatar
Neil Turton committed
534 535
        STR     tmp3, [dst, #-4]!
        SUBS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
536
        BGE     %B1                      ; tmp1 contains two bytes
Neil Turton's avatar
Neil Turton committed
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559

        ; Less than four bytes to go - readjust the src
        ; address.

2       ADD     src, src, #2
        B       Down_TrailingBytes

; The last source byte is in tmp1, three bytes must
; come from the previous source word.  At least four bytes
; more must be stored.

Down_OneByte
        CMP     n, #16-4                ; at least 16 bytes?
        BLT     %F1                     ; no
        SUB     n, n, #16-4             ; (n+16) bytes to go

        ; form a stack frame and save registers

        STMFD   sp!, {v1, v2, lr}

        ; loop doing 32 bytes at a time.  There is currently
        ; one useful byte in tmp1 (a4).

Kevin Bracey's avatar
Kevin Bracey committed
560
0       MOV     lr, tmp1, $SHA #24       ; last byte
Neil Turton's avatar
Neil Turton committed
561
        LDMDB   src!, {tmp1, v1, v2, tmp3}
Kevin Bracey's avatar
Kevin Bracey committed
562 563 564 565 566 567 568
        ORR     lr, lr, tmp3, $SLA #8    ; word 4
        MOV     tmp3, tmp3, $SHA #24
        ORR     tmp3, tmp3, v2, $SLA #8  ; word 3
        MOV     v2, v2, $SHA #24
        ORR     v2, v2, v1, $SLA #8      ; word 2
        MOV     v1, v1, $SHA #24
        ORR     v1, v1, tmp1, $SLA #8    ; word 1
Neil Turton's avatar
Neil Turton committed
569 570 571 572 573 574 575 576
        STMDB   dst!, {v1, v2, tmp3, lr}
        SUBS    n, n, #16
        BGE     %B0
        ; Reload registers

        LDMFD   sp!, {v1, v2, lr}

        ADDS    n, n, #16-4               ; check for at least 4
Kevin Bracey's avatar
Kevin Bracey committed
577
        BLT     %F2                       ; < 4 bytes
Neil Turton's avatar
Neil Turton committed
578

Kevin Bracey's avatar
Kevin Bracey committed
579 580 581
1       MOV     tmp3, tmp1, $SHA #24      ; last byte
        LDR     tmp1, [src, #-4]!         ; previous four bytes
        ORR     tmp3, tmp3, tmp1, $SLA #8
Neil Turton's avatar
Neil Turton committed
582 583
        STR     tmp3, [dst, #-4]!
        SUBS    n, n, #4
Kevin Bracey's avatar
Kevin Bracey committed
584
        BGE     %B1                       ; tmp1 contains one byte
Neil Turton's avatar
Neil Turton committed
585 586 587 588 589 590 591

        ; Less than four bytes to go - one already in tmp3.

2       ADD     src, src, #1
        B       Down_TrailingBytes

;------------------------------------------------------------
Kevin Bracey's avatar
Kevin Bracey committed
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
 ]

 [ make = "memset" :LOR: make = "all" :LOR: make = "shared-library"
src     RN    a2
dst     RN    a1
n       RN    a3
tmp1    RN    a4
tmp3    RN    ip

; extern void *memset(void * /*s*/, int /*c*/, size_t /*n*/)

        Function memset, leaf

        FunctionEntry , "dst"           ; Must return original dst.
        SUBS    n, n, #4                ; need at least 4 bytes
        BMI     TrailingBytes           ; < 4 bytes to go

        ; word align the dst - first find out how many bytes
        ; must be stored to do this.

        ANDS    tmp3, dst, #3           ; eq means aligned!
        BNE     AlignDst

        ; here when destination is word-aligned,
        ; number of bytes to transfer is (n+4), n is >= 0.

DstAligned
        AND     src, src, #&ff
        ORR     src, src, src, ASL #8
        ORR     src, src, src, ASL #16
        MOV     tmp1, src
        MOV     tmp3, src
        MOV     lr, src

        SUBS    n, n, #12-4             ; 12 bytes or more?
        BLT     TrailingWords

        SUBS    n, n, #32-12            ; n+32 to go.
        BLT     %F1

0       STMIA   dst!, {src, tmp1, tmp3, lr}
        STMIA   dst!, {src, tmp1, tmp3, lr}
        SUBS    n, n, #32
        BGE     %B0
        ; see if we can handle another 8

        CMN     n, #16
        STMGEIA dst!, {src, tmp1, tmp3, lr}
        SUBGE   n, n, #16

        ; note that we still have (n+32) bytes to go, and this is <16.

        ; Here when there are fewer than 16 bytes to go.

1       ADDS    n, n, #32-12               ; (n-12) to go

        ; Ok - do three words at a time.

2       STMGEIA dst!, {tmp1, tmp3, lr}
        SUBGES  n, n, #12
        BGE     %B2
        ; (n-12) bytes to go - 0, 1 or 2 words.  Check
        ; which.

TrailingWords
        ADDS    n, n, #12-4             ; (n-4) to go
        BLT     TrailingBytes        ; < 4 bytes to go
        SUBS    n, n, #4
        STRLT   src, [dst], #4
        STMGEIA dst!, {src, tmp1}
        SUBGE   n, n, #4

        ; Here with less than 4 bytes to go

TrailingBytes
        ADDS    n, n, #4
        Return  , "a1",, EQ             ; 0 bytes
        CMP     n, #2                   ; 1, 2 or 3 bytes
        STRB    src, [dst], #1
        STRGEB  src, [dst], #1
        STRGTB  src, [dst], #1
        Return  , "a1"                  ; recover old dst value

;------------------------------------------------------------

; word align dst - tmp3 contains current destination
; alignment.  We can store at least 4 bytes here.

AlignDst
        RSB     tmp3, tmp3, #4          ; 1-3 bytes to go
        CMP     tmp3, #2
        STRB    src, [dst], #1
        STRGEB  src, [dst], #1
        STRGTB  src, [dst], #1
        SUBS    n, n, tmp3              ; check number to go
        BLT     TrailingBytes           ; less than 4 bytes
        B       DstAligned
Neil Turton's avatar
Neil Turton committed
689

Kevin Bracey's avatar
Kevin Bracey committed
690
 ]
Neil Turton's avatar
Neil Turton committed
691
        END