; Copyright 2000 Pace Micro Technology plc ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; GET Hdr:ListOpts ; GET Hdr:Macros ; GET Hdr:System ; $GetCPU ; $GetMEMM ; GET hdr.Options ; GET Hdr:PublicWS ; GET Hdr:KernelWS ; GET hdr.Copro15ops ; GET hdr.ARMops v7 RN 10 ; EXPORT Init_ARMarch ; EXPORT ARM_Analyse ; EXPORT ARM_PrintProcessorType ; AREA KernelCode,CODE,READONLY ; ARM keep changing their mind about ID field layout. ; Here's a summary, courtesy of the ARM ARM (v5): ; ; pre-ARM 7: xxxx0xxx ; ARM 7: xxxx7xxx where bit 23 indicates v4T/~v3 ; post-ARM 7: xxxanxxx where n<>0 or 7 and a = architecture (1=4,2=4T,3=5,4=5T) ; ; int Init_ARMarch(void) ; Returns architecture, as above in a1. Also EQ if ARMv3, NE if ARMv4 or later. ; Corrupts only ip, no RAM usage. Init_ARMarch ARM_read_ID ip ANDS a1, ip, #&0000F000 MOVEQ pc, lr ; ARM 3 or ARM 6 TEQ a1, #&00007000 BNE %FT20 TST ip, #&00800000 ; ARM 7 - check for Thumb MOVNE a1, #ARMv4T MOVEQ a1, #ARMv3 MOV pc, lr 20 ANDS a1, ip, #&000F0000 ; post-ARM 7 MOV a1, a1, LSR #16 MOV pc, lr ARM_Analyse MOV a2, lr BL Init_ARMarch MOV lr, a2 [ MEMM_Type = "VMSAv6" CMP a1, #ARMvF BEQ ARM_Analyse_Fancy ; New ARM; use the feature regs to perform all the setup ] Push "v1,v2,v5,v6,v7,lr" ARM_read_ID v1 ARM_read_cachetype v2 LDR v6, =ZeroPage ADRL v7, KnownCPUTable FindARMloop LDMIA v7!, {a1, a2} ; See if it's a known ARM CMP a1, #-1 BEQ %FT20 AND a2, v1, a2 TEQ a1, a2 ADDNE v7, v7, #8 BNE FindARMloop TEQ v2, v1 ; If we don't have cache attributes, read from table LDREQ v2, [v7] 20 TEQ v2, v1 BEQ %BT20 ; Cache unknown: panic CMP a1, #-1 LDRNEB a2, [v7, #4] MOVEQ a2, #ARMunk STRB a2, [v6, #ProcessorType] ASSERT CT_Isize_pos = 0 MOV a1, v2 ADD a2, v6, #ICache_Info BL EvaluateCache MOV a1, v2, LSR #CT_Dsize_pos ADD a2, v6, #DCache_Info BL EvaluateCache AND a1, v2, #CT_ctype_mask MOV a1, a1, LSR #CT_ctype_pos STRB a1, [v6, #Cache_Type] [ No26bitCode MOV v5, #CPUFlag_32bitOS | MOV v5, #0 ] [ HiProcVecs ORR v5, v5, #CPUFlag_HiProcVecs ] TST v2, #CT_S ORRNE v5, v5, #CPUFlag_SplitCache+CPUFlag_SynchroniseCodeAreas [ CacheOff ORR v5, v5, #CPUFlag_SynchroniseCodeAreas | ARM_read_control a1 ; if Z bit set then we have branch prediction, TST a1, #MMUC_Z ; so we need OS_SynchroniseCodeAreas even if not ORRNE v5, v5, #CPUFlag_SynchroniseCodeAreas ; split caches ] ; Test abort timing (base restored or base updated) MOV a1, #&8000 LDR a2, [a1], #4 ; Will abort - DAb handler will continue execution TEQ a1, #&8000 ORREQ v5, v5, #CPUFlag_BaseRestored ; Check store of PC 30 STR pc, [sp, #-4]! ADR a2, %BT30 + 8 LDR a1, [sp], #4 TEQ a1, a2 ORREQ v5, v5, #CPUFlag_StorePCplus8 [ 0=1 ; Check whether 26-bit mode is available MSR CPSR_c, #F32_bit+I32_bit+SVC26_mode MRS a1, CPSR AND a1, a1, #M32_bits TEQ a1, #SVC26_mode ORRNE v5, v5, #CPUFlag_No26bitMode MSREQ CPSR_c, #F32_bit+I32_bit+SVC32_mode BNE %FT35 ; Do we get vector exceptions on read? LDR a2, =ZeroPage MOV a1, a2 LDR a1, [a1] ; If this aborts a1 will be left unchanged TEQ a1, a2 ORREQ v5, v5, #CPUFlag_VectorReadException ] 35 BL Init_ARMarch STRB a1, [v6, #ProcessorArch] TEQ a1, #ARMv3 ; assume long multiply available ORRNE v5, v5, #CPUFlag_LongMul ; if v4 or later TEQNE a1, #ARMv4 ; assume 26-bit available ORRNE v5, v5, #CPUFlag_No26bitMode ; iff v3 or v4 (not T) TEQNE a1, #ARMv5 ; assume Thumb available ORRNE v5, v5, #CPUFlag_Thumb ; iff not v3,v4,v5 MSR CPSR_f, #Q32_bit MRS lr, CPSR TST lr, #Q32_bit ORRNE v5, v5, #CPUFlag_DSP LDRB v4, [v6, #ProcessorType] TEQ v4, #ARMunk ; Modify deduced flags ADRNEL lr, KnownCPUFlags ADDNE lr, lr, v4, LSL #3 LDMNEIA lr, {a2, a3} ORRNE v5, v5, a2 BICNE v5, v5, a3 [ XScaleJTAGDebug TST v5, #CPUFlag_XScale BEQ %FT40 MRC p14, 0, a2, c10, c0 ; Read debug control register TST a2, #&80000000 ORRNE v5, v5, #CPUFlag_XScaleJTAGconnected MOVEQ a2, #&C000001C ; enable hot debug MCREQ p14, 0, a2, c10, c0 BNE %FT40 40 ] STR v5, [v6, #ProcessorFlags] ; Now, a1 = processor architecture (ARMv3, ARMv4 ...) ; v4 = processor type (ARM600, ARM610, ...) ; v5 = processor flags LDRB a2, [v6, #Cache_Type] [ MEMM_Type = "ARM600" CMP a1, #ARMv4 BLO Analyse_ARMv3 ; eg. ARM710 TEQ a2, #CT_ctype_WT TSTEQ v5, #CPUFlag_SplitCache BEQ Analyse_WriteThroughUnified ; eg. ARM7TDMI derivative TEQ a2, #CT_ctype_WB_Crd BEQ Analyse_WB_Crd ; eg. StrongARM TEQ a2, #CT_ctype_WB_Cal_LD BEQ Analyse_WB_Cal_LD ; assume XScale ] ; MEMM_Type = "ARM600" TEQ a2, #CT_ctype_WB_CR7_LDa BEQ Analyse_WB_CR7_LDa ; eg. ARM9 ; others ... WeirdARMPanic B WeirdARMPanic ; stiff :) [ MEMM_Type = "ARM600" Analyse_ARMv3 ADRL a1, NullOp ADRL a2, Cache_Invalidate_ARMv3 ADRL a3, WriteBuffer_Drain_ARMv3 ADRL a4, TLB_Invalidate_ARMv3 ADRL ip, TLB_InvalidateEntry_ARMv3 STR a1, [v6, #Proc_Cache_CleanAll] STR a2, [v6, #Proc_Cache_CleanInvalidateAll] STR a2, [v6, #Proc_Cache_InvalidateAll] STR a3, [v6, #Proc_WriteBuffer_Drain] STR a4, [v6, #Proc_TLB_InvalidateAll] STR ip, [v6, #Proc_TLB_InvalidateEntry] STR a1, [v6, #Proc_IMB_Full] STR a1, [v6, #Proc_IMB_Range] STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_ARMv3 ADRL a2, MMU_ChangingEntry_ARMv3 ADRL a3, MMU_ChangingUncached_ARMv3 ADRL a4, MMU_ChangingUncachedEntry_ARMv3 STR a1, [v6, #Proc_MMU_Changing] STR a2, [v6, #Proc_MMU_ChangingEntry] STR a3, [v6, #Proc_MMU_ChangingUncached] STR a4, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_ARMv3 ADRL a2, MMU_ChangingUncachedEntries_ARMv3 ADRL a3, Cache_RangeThreshold_ARMv3 ADRL a4, Cache_Examine_Simple STR a1, [v6, #Proc_MMU_ChangingEntries] STR a2, [v6, #Proc_MMU_ChangingUncachedEntries] STR a3, [v6, #Proc_Cache_RangeThreshold] STR a4, [v6, #Proc_Cache_Examine] ADRL a1, XCBTableWT STR a1, [v6, #MMU_PCBTrans] B %FT90 Analyse_WriteThroughUnified ADRL a1, NullOp ADRL a2, Cache_InvalidateUnified TST v5, #CPUFlag_NoWBDrain ADRNEL a3, WriteBuffer_Drain_OffOn ADREQL a3, WriteBuffer_Drain ADRL a4, TLB_Invalidate_Unified ADRL ip, TLB_InvalidateEntry_Unified STR a1, [v6, #Proc_Cache_CleanAll] STR a2, [v6, #Proc_Cache_CleanInvalidateAll] STR a2, [v6, #Proc_Cache_InvalidateAll] STR a3, [v6, #Proc_WriteBuffer_Drain] STR a4, [v6, #Proc_TLB_InvalidateAll] STR ip, [v6, #Proc_TLB_InvalidateEntry] STR a1, [v6, #Proc_IMB_Full] STR a1, [v6, #Proc_IMB_Range] STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_Writethrough ADRL a2, MMU_ChangingEntry_Writethrough ADRL a3, MMU_ChangingUncached ADRL a4, MMU_ChangingUncachedEntry STR a1, [v6, #Proc_MMU_Changing] STR a2, [v6, #Proc_MMU_ChangingEntry] STR a3, [v6, #Proc_MMU_ChangingUncached] STR a4, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_Writethrough ADRL a2, MMU_ChangingUncachedEntries ADRL a3, Cache_RangeThreshold_Writethrough ADRL a4, Cache_Examine_Simple STR a1, [v6, #Proc_MMU_ChangingEntries] STR a2, [v6, #Proc_MMU_ChangingUncachedEntries] STR a3, [v6, #Proc_Cache_RangeThreshold] STR a4, [v6, #Proc_Cache_Examine] ADRL a1, XCBTableWT STR a1, [v6, #MMU_PCBTrans] B %FT90 ] ; MEMM_Type = "ARM600" Analyse_WB_CR7_LDa TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard caches here (eg. ARM920) ADRL a1, Cache_CleanInvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanAll_WB_CR7_LDa STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_InvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_RangeThreshold_WB_CR7_LDa STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_Simple STR a1, [v6, #Proc_Cache_Examine] ADRL a1, TLB_InvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_CR7_LDa STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, WriteBuffer_Drain_WB_CR7_LDa STR a1, [v6, #Proc_WriteBuffer_Drain] ADRL a1, IMB_Full_WB_CR7_LDa STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_CR7_LDa STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_CR7_LDa STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_CR7_LDa STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] LDRB a2, [v6, #DCache_Associativity] MOV a3, #256 MOV a4, #8 ; to find log2(ASSOC), rounded up Analyse_WB_CR7_LDa_L1 MOV a3, a3, LSR #1 SUB a4, a4, #1 CMP a2, a3 BLO Analyse_WB_CR7_LDa_L1 ADDHI a4, a4, #1 RSB a2, a4, #32 MOV a3, #1 MOV a3, a3, LSL a2 STR a3, [v6, #DCache_IndexBit] LDR a4, [v6, #DCache_NSets] LDRB a2, [v6, #DCache_LineLen] SUB a4, a4, #1 MUL a4, a2, a4 STR a4, [v6, #DCache_IndexSegStart] MOV a2, #64*1024 ; arbitrary-ish STR a2, [v6, #DCache_RangeThreshold] [ MEMM_Type = "ARM600" ADRL a1, XCBTableWBR ; assume read-allocate WB/WT cache | ADRL a1, XCBTableVMSAv6 ] STR a1, [v6, #MMU_PCBTrans] B %FT90 [ MEMM_Type = "ARM600" Analyse_WB_Crd TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard ADRL a1, Cache_CleanInvalidateAll_WB_Crd STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanAll_WB_Crd STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_InvalidateAll_WB_Crd STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_RangeThreshold_WB_Crd STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_Simple STR a1, [v6, #Proc_Cache_Examine] ADRL a1, TLB_InvalidateAll_WB_Crd STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_Crd STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, WriteBuffer_Drain_WB_Crd STR a1, [v6, #Proc_WriteBuffer_Drain] ADRL a1, IMB_Full_WB_Crd STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_Crd STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_Crd STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_Crd STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_Crd STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_Crd STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_Crd STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_Crd STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_Crd STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] LDR a2, =DCacheCleanAddress STR a2, [v6, #DCache_CleanBaseAddress] STR a2, [v6, #DCache_CleanNextAddress] MOV a2, #64*1024 ;arbitrary-ish threshold STR a2, [v6, #DCache_RangeThreshold] LDRB a2, [v6, #ProcessorType] TEQ a2, #SA110 ADREQL a2, XCBTableSA110 BEQ Analyse_WB_Crd_finish TEQ a2, #SA1100 TEQNE a2, #SA1110 ADREQL a2, XCBTableSA1110 ADRNEL a2, XCBTableWBR Analyse_WB_Crd_finish STR a2, [v6, #MMU_PCBTrans] B %FT90 Analyse_WB_Cal_LD TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard ADRL a1, Cache_CleanInvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanAll_WB_Cal_LD STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_InvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_RangeThreshold_WB_Cal_LD STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_Simple STR a1, [v6, #Proc_Cache_Examine] ADRL a1, TLB_InvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_Cal_LD STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, WriteBuffer_Drain_WB_Cal_LD STR a1, [v6, #Proc_WriteBuffer_Drain] ADRL a1, IMB_Full_WB_Cal_LD STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_Cal_LD STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_Cal_LD STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_Cal_LD STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] LDR a2, =DCacheCleanAddress STR a2, [v6, #DCache_CleanBaseAddress] STR a2, [v6, #DCache_CleanNextAddress] [ XScaleMiniCache ! 1, "You need to arrange for XScale mini-cache clean area to be mini-cacheable" LDR a2, =DCacheCleanAddress + 4 * 32*1024 STR a2, [v6, #MCache_CleanBaseAddress] STR a2, [v6, #MCache_CleanNextAddress] ] ; arbitrary-ish values, mini cache makes global op significantly more expensive [ XScaleMiniCache MOV a2, #128*1024 | MOV a2, #32*1024 ] STR a2, [v6, #DCache_RangeThreshold] ; enable full coprocessor access LDR a2, =&3FFF MCR p15, 0, a2, c15, c1 LDR a2, [v6, #ProcessorFlags] TST a2, #CPUFlag_ExtendedPages ADREQL a2, XCBTableXScaleNoExt ADRNEL a2, XCBTableXScaleWA ; choose between RA and WA here STR a2, [v6, #MMU_PCBTrans] B %FT90 ] ; MEMM_Type = "ARM600" [ MEMM_Type = "VMSAv6" Analyse_WB_CR7_Lx TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard caches here ; Read smallest instruction & data/unified cache line length MRC p15, 0, a1, c0, c0, 1 ; Cache type register MOV v2, a1, LSR #16 AND a4, a1, #&F AND v2, v2, #&F STRB a4, [v6, #ICache_LineLen] ; Store log2(line size)-2 STRB v2, [v6, #DCache_LineLen] ; log2(line size)-2 ; Read the cache info into Cache_Lx_* MRC p15, 1, a1, c0, c0, 1 ; Cache level ID register MOV v2, v6 ; Work around DTable/ITable alignment issues STR a1, [v2, #Cache_Lx_Info]! ADD a2, v2, #Cache_Lx_DTable-Cache_Lx_Info MOV a3, #0 10 ANDS v1, a1, #6 ; Data or unified cache at this level? MCRNE p15, 2, a3, c0, c0, 0 ; Program cache size selection register myISB ,v1 MRCNE p15, 1, v1, c0, c0, 0 ; Get size info (data/unified) STR v1, [a2] ADD a3, a3, #1 ANDS v1, a1, #1 ; Instruction cache at this level? MCRNE p15, 2, a3, c0, c0, 0 ; Program cache size selection register myISB ,v1 MRCNE p15, 1, v1, c0, c0, 0 ; Get size info (instruction) STR v1, [a2, #Cache_Lx_ITable-Cache_Lx_DTable] ; Shift the cache level ID register along to get the type of the next ; cache level ; However, we need to stop once we reach the first blank entry, because ; ARM have been sneaky and started to reuse some of the bits from the ; high end of the register (the Cortex-A8 TRM lists bits 21-23 as being ; for cache level 8, but the ARMv7 ARM lists them as being for the level ; of unification for inner shareable memory). The ARMv7 ARM does warn ; about making sure you stop once you find the first blank entry, but ; it doesn't say why! TST a1, #7 ADD a3, a3, #1 MOVNE a1, a1, LSR #3 CMP a3, #14 ; Stop after level 7 (even though an 8th level might exist on some CPUs?) ADD a2, a2, #4 BLT %BT10 ; Calculate DCache_RangeThreshold MOV a1, #128*1024 ; Arbitrary-ish STR a1, [v6, #DCache_RangeThreshold] ADRL a1, Cache_CleanInvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanAll_WB_CR7_Lx STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_InvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_RangeThreshold_WB_CR7_Lx STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_WB_CR7_Lx STR a1, [v6, #Proc_Cache_Examine] ADRL a1, TLB_InvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_CR7_Lx STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, WriteBuffer_Drain_WB_CR7_Lx STR a1, [v6, #Proc_WriteBuffer_Drain] ADRL a1, IMB_Full_WB_CR7_Lx STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_CR7_Lx STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_CR7_Lx STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_CR7_Lx STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] ADRL a1, XCBTableVMSAv6 STR a1, [v6, #MMU_PCBTrans] B %FT90 ] ; MEMM_Type = "VMSAv6" 90 Pull "v1,v2,v5,v6,v7,pc" ; This routine works out the values LINELEN, ASSOCIATIVITY, NSETS and CACHE_SIZE defined ; in section B2.3.3 of the ARMv5 ARM. EvaluateCache AND a3, a1, #CT_assoc_mask+CT_M TEQ a3, #(CT_assoc_0:SHL:CT_assoc_pos)+CT_M BEQ %FT80 MOV ip, #1 ASSERT CT_len_pos = 0 AND a4, a1, #CT_len_mask ADD a4, a4, #3 MOV a4, ip, LSL a4 ; LineLen = 1 << (len+3) STRB a4, [a2, #ICache_LineLen-ICache_Info] MOV a3, #2 TST a1, #CT_M ADDNE a3, a3, #1 ; Multiplier = 2 + M AND a4, a1, #CT_assoc_mask RSB a4, ip, a4, LSR #CT_assoc_pos MOV a4, a3, LSL a4 ; Associativity = Multiplier << (assoc-1) STRB a4, [a2, #ICache_Associativity-ICache_Info] AND a4, a1, #CT_size_mask MOV a4, a4, LSR #CT_size_pos MOV a3, a3, LSL a4 MOV a3, a3, LSL #8 ; Size = Multiplier << (size+8) STR a3, [a2, #ICache_Size-ICache_Info] ADD a4, a4, #6 AND a3, a1, #CT_assoc_mask SUB a4, a4, a3, LSR #CT_assoc_pos AND a3, a1, #CT_len_mask ASSERT CT_len_pos = 0 SUB a4, a4, a3 MOV a4, ip, LSL a4 ; NSets = 1 << (size + 6 - assoc - len) STR a4, [a2, #ICache_NSets-ICache_Info] MOV pc, lr 80 MOV a1, #0 STR a1, [a2, #ICache_NSets-ICache_Info] STR a1, [a2, #ICache_Size-ICache_Info] STRB a1, [a2, #ICache_LineLen-ICache_Info] STRB a1, [a2, #ICache_Associativity-ICache_Info] MOV pc, lr ; Create a list of CPUs, 16 bytes per entry: ; ID bits (1 word) ; Test mask for ID (1 word) ; Cache type register value (1 word) ; Processor type (1 byte) ; Architecture type (1 byte) ; Reserved (2 bytes) GBLA tempcpu MACRO CPUDesc $proc, $id, $mask, $arch, $type, $s, $dsz, $das, $dln, $isz, $ias, $iln LCLA type type SETA (CT_ctype_$type:SHL:CT_ctype_pos)+($s:SHL:CT_S_pos) tempcpu CSzDesc $dsz, $das, $dln type SETA type+(tempcpu:SHL:CT_Dsize_pos) [ :LNOT:($s=0 :LAND: "$isz"="") tempcpu CSzDesc $isz, $ias, $iln ] type SETA type+(tempcpu:SHL:CT_Isize_pos) ASSERT ($id :AND: :NOT: $mask) = 0 DCD $id, $mask, type DCB $proc, $arch, 0, 0 MEND MACRO $var CSzDesc $sz, $as, $ln $var SETA (CT_size_$sz:SHL:CT_size_pos)+(CT_assoc_$as:SHL:CT_assoc_pos)+(CT_len_$ln:SHL:CT_len_pos) $var SETA $var+(CT_M_$sz:SHL:CT_M_pos) MEND ; CPUDesc table for ARMv3-ARMv6 KnownCPUTable ; /------Cache Type register fields-----\. ; ID reg Mask Arch Type S Dsz Das Dln Isz Ias Iln [ MEMM_Type = "ARM600" CPUDesc ARM600, &000600, &00FFF0, ARMv3, WT, 0, 4K, 64, 4 CPUDesc ARM610, &000610, &00FFF0, ARMv3, WT, 0, 4K, 64, 4 CPUDesc ARMunk, &000000, &00F000, ARMv3, WT, 0, 4K, 64, 4 CPUDesc ARM700, &007000, &FFFFF0, ARMv3, WT, 0, 8K, 4, 8 CPUDesc ARM710, &007100, &FFFFF0, ARMv3, WT, 0, 8K, 4, 8 CPUDesc ARM710a, &047100, &FDFFF0, ARMv3, WT, 0, 8K, 4, 4 CPUDesc ARM7500, &027100, &FFFFF0, ARMv3, WT, 0, 4K, 4, 4 CPUDesc ARM7500FE, &077100, &FFFFF0, ARMv3, WT, 0, 4K, 4, 4 CPUDesc ARMunk, &007000, &80F000, ARMv3, WT, 0, 8K, 4, 4 CPUDesc ARM720T, &807200, &FFFFF0, ARMv4T, WT, 0, 8K, 4, 4 CPUDesc ARMunk, &807000, &80F000, ARMv4T, WT, 0, 8K, 4, 4 CPUDesc SA110_preRevT, &01A100, &0FFFFC, ARMv4, WB_Crd, 1, 16K, 32, 8, 16K, 32, 8 CPUDesc SA110, &01A100, &0FFFF0, ARMv4, WB_Crd, 1, 16K, 32, 8, 16K, 32, 8 CPUDesc SA1100, &01A110, &0FFFF0, ARMv4, WB_Crd, 1, 8K, 32, 8, 16K, 32, 8 CPUDesc SA1110, &01B110, &0FFFF0, ARMv4, WB_Crd, 1, 8K, 32, 8, 16K, 32, 8 CPUDesc ARM920T, &029200, &0FFFF0, ARMv4T, WB_CR7_LDa, 1, 16K, 64, 8, 16K, 64, 8 CPUDesc ARM922T, &029220, &0FFFF0, ARMv4T, WB_CR7_LDa, 1, 8K, 64, 8, 8K, 64, 8 CPUDesc X80200, &052000, &0FFFF0, ARMv5TE, WB_Cal_LD, 1, 32K, 32, 8, 32K, 32, 8 CPUDesc X80321, &69052400, &FFFFF700, ARMv5TE, WB_Cal_LD, 1, 32K, 32, 8, 32K, 32, 8 ] ; MEMM_Type = "ARM600" DCD -1 [ MEMM_Type = "VMSAv6" ; Simplified CPUDesc table for ARMvF ; The cache size data is ignored for ARMv7. KnownCPUTable_Fancy CPUDesc ARM1176JZF_S, &00B760, &00FFF0, ARMvF, WB_CR7_LDc, 1, 16K, 4, 8, 16K, 4, 8 CPUDesc Cortex_A5, &00C050, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32,16, 16K, 32,16 CPUDesc Cortex_A7, &00C070, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32,16, 16K, 32,16 CPUDesc Cortex_A8, &00C080, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32,16, 16K, 32,16 CPUDesc Cortex_A9, &00C090, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A12, &00C0D0, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A15, &00C0F0, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A17, &00C0E0, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 DCD -1 ] ; MEMM_Type = "VMSAv6" ; Peculiar characteristics of individual ARMs not deducable otherwise. First field is ; flags to set, second flags to clear. KnownCPUFlags DCD 0, 0 ; ARM 600 DCD 0, 0 ; ARM 610 DCD 0, 0 ; ARM 700 DCD 0, 0 ; ARM 710 DCD 0, 0 ; ARM 710a DCD CPUFlag_AbortRestartBroken+CPUFlag_InterruptDelay, 0 ; SA 110 pre revT DCD CPUFlag_InterruptDelay, 0 ; SA 110 revT or later DCD 0, 0 ; ARM 7500 DCD 0, 0 ; ARM 7500FE DCD CPUFlag_InterruptDelay, 0 ; SA 1100 DCD CPUFlag_InterruptDelay, 0 ; SA 1110 DCD CPUFlag_NoWBDrain, 0 ; ARM 720T DCD 0, 0 ; ARM 920T DCD 0, 0 ; ARM 922T DCD CPUFlag_ExtendedPages+CPUFlag_XScale, 0 ; X80200 DCD CPUFlag_XScale, 0 ; X80321 DCD 0, 0 ; ARM1176JZF_S DCD 0, 0 ; Cortex_A5 DCD 0, 0 ; Cortex_A7 DCD 0, 0 ; Cortex_A8 DCD 0, 0 ; Cortex_A9 DCD 0, 0 ; Cortex_A12 DCD 0, 0 ; Cortex_A15 DCD 0, 0 ; Cortex_A17 [ MEMM_Type = "VMSAv6" ; -------------------------------------------------------------------------- ; ----- ARM_Analyse_Fancy -------------------------------------------------- ; -------------------------------------------------------------------------- ; ; For ARMv7 ARMs (arch=&F), we can detect everything via the feature registers ; TODO - There's some stuff in here that can be tidied up/removed ; Things we need to set up: ; ProcessorType (as listed in hdr.ARMops) ; Cache_Type (CT_ctype_* from hdr:MEMM.ARM600) ; ProcessorArch (as reported by Init_ARMarch) ; ProcessorFlags (CPUFlag_* from hdr.ARMops) ; Proc_* (Cache/TLB/IMB/MMU function pointers) ; MMU_PCBTrans (Points to lookup table for translating page table cache options) ; ICache_*, DCache_* (ICache, DCache properties - optional, since not used externally?) ARM_Analyse_Fancy Push "v1,v2,v5,v6,v7,lr" ARM_read_ID v1 LDR v6, =ZeroPage ADRL v7, KnownCPUTable_Fancy 10 LDMIA v7!, {a1, a2} CMP a1, #-1 BEQ %FT20 AND a2, v1, a2 TEQ a1, a2 ADDNE v7, v7, #8 BNE %BT10 20 LDR v2, [v7] CMP a1, #-1 LDRNEB a2, [v7, #4] MOVEQ a2, #ARMunk STRB a2, [v6, #ProcessorType] AND a1, v2, #CT_ctype_mask MOV a1, a1, LSR #CT_ctype_pos STRB a1, [v6, #Cache_Type] ; STM should always store PC+8 ; Should always be base restored abort model ; 26bit has been obsolete for a long time MOV v5, #CPUFlag_StorePCplus8+CPUFlag_BaseRestored+CPUFlag_32bitOS+CPUFlag_No26bitMode [ HiProcVecs ORR v5, v5, #CPUFlag_HiProcVecs ] ; Work out whether the cache info is in ARMv6 or ARMv7 style ; Top 3 bits of the cache type register give the register format ARM_read_cachetype v2 MOV a1, v2, LSR #29 TEQ a1, #4 BEQ %FT25 TEQ a1, #0 BNE WeirdARMPanic ; ARMv6 format cache type register. ; CPUs like the ARM1176JZF-S are available with a range of cache sizes, ; so it's not safe to rely on the values in the CPU table. Fortunately ; all ARMv6 CPUs implement the register (by contrast, for the "plain" ; ARM case, no ARMv3 CPUs, some ARMv4 CPUs and all ARMv5 CPUs, so it ; needs to drop back to the table in some cases). MOV a1, v2, LSR #CT_Isize_pos ADD a2, v6, #ICache_Info BL EvaluateCache MOV a1, v2, LSR #CT_Dsize_pos ADD a2, v6, #DCache_Info BL EvaluateCache TST v2, #CT_S ORRNE v5, v5, #CPUFlag_SynchroniseCodeAreas+CPUFlag_SplitCache B %FT27 25 ; ARMv7 format cache type register. ; This should(!) mean that we have the cache level ID register, ; and all the other ARMv7 cache registers. ; Do we have a split cache? MRC p15, 1, a1, c0, c0, 1 AND a2, a1, #7 TEQ a2, #3 ORREQ v5, v5, #CPUFlag_SynchroniseCodeAreas+CPUFlag_SplitCache 27 [ CacheOff ORR v5, v5, #CPUFlag_SynchroniseCodeAreas | ARM_read_control a1 ; if Z bit set then we have branch prediction, TST a1, #MMUC_Z ; so we need OS_SynchroniseCodeAreas even if not ORRNE v5, v5, #CPUFlag_SynchroniseCodeAreas ; split caches ] BL Init_ARMarch STRB a1, [v6, #ProcessorArch] MRC p15, 0, a1, c0, c2, 2 TST a1, #&F000 ORRNE v5, v5, #CPUFlag_LongMul MRC p15, 0, a1, c0, c1, 0 TST a1, #&F000 ORRNE v5, v5, #CPUFlag_Thumb MSR CPSR_f, #Q32_bit MRS lr, CPSR TST lr, #Q32_bit ORRNE v5, v5, #CPUFlag_DSP ; Should we check instruction set attr register 3 for this? ; Other flags not checked for above: ; CPUFlag_InterruptDelay ; CPUFlag_VectorReadException ; CPUFlag_ExtendedPages ; CPUFlag_NoWBDrain ; CPUFlag_AbortRestartBroken ; CPUFlag_XScale ; CPUFlag_XScaleJTAGconnected LDRB v4, [v6, #ProcessorType] TEQ v4, #ARMunk ; Modify deduced flags ADRNEL lr, KnownCPUFlags ADDNE lr, lr, v4, LSL #3 LDMNEIA lr, {a2, a3} ORRNE v5, v5, a2 BICNE v5, v5, a3 STR v5, [v6, #ProcessorFlags] ; Cache analysis LDRB a2, [v6, #Cache_Type] TEQ a2, #CT_ctype_WB_CR7_LDa ; eg. ARM9 TEQNE a2, #CT_ctype_WB_CR7_LDc ; eg. ARM1176JZF-S - differs only in cache lockdown BEQ Analyse_WB_CR7_LDa TEQ a2, #CT_ctype_WB_CR7_Lx BEQ Analyse_WB_CR7_Lx ; eg. Cortex-A8, Cortex-A9 ; others ... B WeirdARMPanic ; stiff :) ] ; MEMM_Type = "VMSAv6" ; -------------------------------------------------------------------------- ; ----- ARMops ------------------------------------------------------------- ; -------------------------------------------------------------------------- ; ; ARMops are the routines required by the kernel for cache/MMU control ; the kernel vectors to the appropriate ops for the given ARM at boot ; ; The Rules: ; - These routines may corrupt a1 and lr only ; - (lr can of course only be corrupted whilst still returning to correct ; link address) ; - stack is available, at least 16 words can be stacked ; - a NULL op would be a simple MOV pc, lr ; ; In: r1 = cache level (0-based) ; Out: r0 = Flags ; bits 0-2: cache type: ; 000 -> none ; 001 -> instruction ; 010 -> data ; 011 -> split ; 100 -> unified ; 1xx -> reserved ; Other bits: reserved ; r1 = D line length ; r2 = D size ; r3 = I line length ; r4 = I size ; r0-r4 = zero if cache level not present Cache_Examine_Simple TEQ r1, #0 MOVNE r0, #0 MOVNE r1, #0 MOVNE r2, #0 MOVNE r3, #0 MOVNE r4, #0 MOVNE pc, lr LDR r4, =ZeroPage LDR r0, [r4, #ProcessorFlags] TST r0, #CPUFlag_SplitCache MOVNE r0, #3 MOVEQ r0, #4 LDRB r1, [r4, #DCache_LineLen] LDR r2, [r4, #DCache_Size] LDRB r3, [r4, #ICache_LineLen] LDR r4, [r4, #ICache_Size] MOV pc, lr [ MEMM_Type = "ARM600" ; -------------------------------------------------------------------------- ; ----- ARMops for ARMv3 --------------------------------------------------- ; -------------------------------------------------------------------------- ; ; ARMv3 ARMs include ARM710, ARM610, ARM7500 ; Cache_Invalidate_ARMv3 MCR p15, 0, a1, c7, c0 NullOp MOV pc, lr WriteBuffer_Drain_ARMv3 ;swap always forces unbuffered write, stalling till WB empty SUB sp, sp, #4 SWP a1, a1, [sp] ADD sp, sp, #4 MOV pc, lr TLB_Invalidate_ARMv3 MCR p15, 0, a1, c5, c0 MOV pc, lr ; a1 = page entry to invalidate (page aligned address) ; TLB_InvalidateEntry_ARMv3 MCR p15, 0, a1, c6, c0 MOV pc, lr MMU_Changing_ARMv3 MCR p15, 0, a1, c7, c0 ; invalidate cache MCR p15, 0, a1, c5, c0 ; invalidate TLB MOV pc, lr MMU_ChangingUncached_ARMv3 MCR p15, 0, a1, c5, c0 ; invalidate TLB MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_ARMv3 MCR p15, 0, a1, c7, c0 ; invalidate cache MCR p15, 0, a1, c6, c0 ; invalidate TLB entry MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_ARMv3 ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_Changing_ARMv3 Push "a2" MCR p15, 0, a1, c7, c0 ; invalidate cache 10 MCR p15, 0, a1, c6, c0 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 Pull "a2" MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry_ARMv3 MCR p15, 0, a1, c6, c0 ; invalidate TLB entry MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_ARMv3 ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_ChangingUncached_ARMv3 Push "a2" 10 MCR p15, 0, a1, c6, c0 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 Pull "a2" MOV pc, lr Cache_RangeThreshold_ARMv3 ! 0, "arbitrary Cache_RangeThreshold_ARMv3" MOV a1, #16*PageSize MOV pc, lr LTORG ; -------------------------------------------------------------------------- ; ----- generic ARMops for simple ARMs, ARMv4 onwards ---------------------- ; -------------------------------------------------------------------------- ; ; eg. ARM7TDMI based ARMs, unified, writethrough cache ; Cache_InvalidateUnified MOV a1, #0 MCR p15, 0, a1, c7, c7 MOV pc, lr WriteBuffer_Drain_OffOn ; used if ARM has no drain WBuffer MCR op Push "a2" ARM_read_control a1 BIC a2, a1, #MMUC_W ARM_write_control a2 ARM_write_control a1 Pull "a2" MOV pc, lr WriteBuffer_Drain ; used if ARM has proper drain WBuffer MCR op MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 MOV pc, lr TLB_Invalidate_Unified MOV a1, #0 MCR p15, 0, a1, c8, c7 MOV pc, lr ; a1 = page entry to invalidate (page aligned address) ; TLB_InvalidateEntry_Unified MCR p15, 0, a1, c8, c7, 1 MOV pc, lr MMU_Changing_Writethrough MOV a1, #0 MCR p15, 0, a1, c7, c7 ; invalidate cache MCR p15, 0, a1, c8, c7 ; invalidate TLB MOV pc, lr MMU_ChangingUncached MOV a1, #0 MCR p15, 0, a1, c8, c7 ; invalidate TLB MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_Writethrough Push "a4" MOV a4, #0 MCR p15, 0, a4, c7, c7 ; invalidate cache MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry Pull "a4" MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_Writethrough ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_Changing_Writethrough Push "a2,a4" MOV a4, #0 MCR p15, 0, a4, c7, c7 ; invalidate cache 10 MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 Pull "a2,a4" MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_ChangingUncached Push "a2" 10 MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 Pull "a2" MOV pc, lr Cache_RangeThreshold_Writethrough ! 0, "arbitrary Cache_RangeThreshold_Writethrough" MOV a1, #16*PageSize MOV pc, lr ] ; MEMM_Type = "ARM600" ; -------------------------------------------------------------------------- ; ----- ARMops for ARM9 and the like --------------------------------------- ; -------------------------------------------------------------------------- ; WB_CR7_LDa refers to ARMs with writeback data cache, cleaned with ; register 7, lockdown available (format A) ; ; Note that ARM920 etc have writeback/writethrough data cache selectable ; by MMU regions. For simpliciity, we assume cacheable pages are mostly ; writeback. Any writethrough pages will have redundant clean operations ; applied when moved, for example, but this is a small overhead (cleaning ; a clean line is very quick on ARM 9). Cache_CleanAll_WB_CR7_LDa ROUT ; ; only guarantees to clean lines not involved in interrupts (so we can ; clean without disabling interrupts) ; ; Clean cache by traversing all segment and index values ; As a concrete example, for ARM 920 (16k+16k caches) we would have: ; ; DCache_LineLen = 32 (32 byte cache line, segment field starts at bit 5) ; DCache_IndexBit = &04000000 (index field starts at bit 26) ; DCache_IndexSegStart = &000000E0 (start at index=0, segment = 7) ; Push "a2, ip" LDR ip, =ZeroPage LDRB a1, [ip, #DCache_LineLen] ; segment field starts at this bit LDR a2, [ip, #DCache_IndexBit] ; index field starts at this bit LDR ip, [ip, #DCache_IndexSegStart] ; starting value, with index at min, seg at max 10 MCR p15, 0, ip, c7, c10, 2 ; clean DCache entry by segment/index ADDS ip, ip, a2 ; next index, counting up, CS if wrapped back to 0 BCC %BT10 SUBS ip, ip, a1 ; next segment, counting down, CC if wrapped back to max BCS %BT10 ; if segment wrapped, then we've finished MOV ip, #0 MCR p15, 0, ip, c7, c10, 4 ; drain WBuffer Pull "a2, ip" MOV pc, lr Cache_CleanInvalidateAll_WB_CR7_LDa ROUT ; ; similar to Cache_CleanAll, but does clean&invalidate of Dcache, and invalidates ICache ; Push "a2, ip" LDR ip, =ZeroPage LDRB a1, [ip, #DCache_LineLen] ; segment field starts at this bit LDR a2, [ip, #DCache_IndexBit] ; index field starts at this bit LDR ip, [ip, #DCache_IndexSegStart] ; starting value, with index at min, seg at max 10 MCR p15, 0, ip, c7, c14, 2 ; clean&invalidate DCache entry by segment/index ADDS ip, ip, a2 ; next index, counting up, CS if wrapped back to 0 BCC %BT10 SUBS ip, ip, a1 ; next segment, counting down, CC if wrapped back to max BCS %BT10 ; if segment wrapped, then we've finished MOV ip, #0 MCR p15, 0, ip, c7, c10, 4 ; drain WBuffer MCR p15, 0, ip, c7, c5, 0 ; invalidate ICache Pull "a2, ip" MOV pc, lr Cache_InvalidateAll_WB_CR7_LDa ROUT ; ; no clean, assume caller knows what's happening ; MOV a1, #0 MCR p15, 0, a1, c7, c7, 0 ; invalidate ICache and DCache MOV pc, lr Cache_RangeThreshold_WB_CR7_LDa ROUT LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr TLB_InvalidateAll_WB_CR7_LDa ROUT MMU_ChangingUncached_WB_CR7_LDa MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB MOV pc, lr ; a1 = page affected (page aligned address) ; TLB_InvalidateEntry_WB_CR7_LDa ROUT MMU_ChangingUncachedEntry_WB_CR7_LDa MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MOV pc, lr WriteBuffer_Drain_WB_CR7_LDa ROUT MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MOV pc, lr IMB_Full_WB_CR7_LDa ROUT ; ; do: clean DCache; drain WBuffer, invalidate ICache ; Push "lr" BL Cache_CleanAll_WB_CR7_LDa ; also drains Wbuffer MOV a1, #0 MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache Pull "pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; IMB_Range_WB_CR7_LDa ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; arbitrary-ish range threshold ADD a2, a2, a1 BHS IMB_Full_WB_CR7_LDa Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry by VA MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "pc" ; a1 = pointer to list of (start, end) address pairs ; a2 = pointer to end of list ; a3 = total amount of memory to be synchronised ; IMB_List_WB_CR7_LDa ROUT CMP a3, #32*1024 ; arbitrary-ish range threshold BHS IMB_Full_WB_CR7_LDa Push "v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c10, 1 ; clean DCache entry by VA MCR p15, 0, v1, c7, c5, 1 ; invalidate ICache entry ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "v1-v2,pc" MMU_Changing_WB_CR7_LDa ROUT Push "lr" BL Cache_CleanInvalidateAll_WB_CR7_LDa MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB Pull "pc" ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_WB_CR7_LDa ROUT Push "a2, lr" ADD a2, a1, #PageSize LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MOV lr, #0 MCR p15, 0, lr, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors SUB a1, a1, #PageSize MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry Pull "a2, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_WB_CR7_LDa ROUT Push "a2, a3, lr" MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors MOV a1, lr ; restore start address 20 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a1, a1, #PageSize CMP a1, a2 BLO %BT20 Pull "a2, a3, pc" ; 30 BL Cache_CleanInvalidateAll_WB_CR7_LDa MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB Pull "a2, a3, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_WB_CR7_LDa ROUT CMP a2, #32 ; arbitrary-ish threshold BHS %FT20 Push "a2" 10 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a1, a1, #PageSize SUBS a2, a2, #1 BNE %BT10 Pull "a2" MOV pc, lr ; 20 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB MOV pc, lr [ MEMM_Type = "ARM600" ; -------------------------------------------------------------------------- ; ----- ARMops for StrongARM and the like ---------------------------------- ; -------------------------------------------------------------------------- ; WB_Crd is Writeback data cache, clean by reading data from cleaner area ; Currently no support for mini data cache on some StrongARM variants. Mini ; cache is always writeback and must have cleaning support, so is very ; awkward to use for cacheable screen, say. ; Global cache cleaning requires address space for private cleaner areas (not accessed ; for any other reason). Cleaning is normally with interrupts enabled (to avoid a latency ; hit), which means that the cleaner data is not invalidated afterwards. This is fine for ; RISC OS - where the private area is not used for anything else, and any re-use of the ; cache under interrupts is safe (eg. a page being moved is *never* involved in any ; active interrupts). ; Mostly, cleaning toggles between two separate cache-sized areas, which gives minimum ; cleaning cost while guaranteeing proper clean even if previous clean data is present. If ; the clean routine is re-entered, an independent, double sized clean is initiated. This ; guarantees proper cleaning (regardless of multiple re-entrancy) whilst hardly complicating ; the routine at all. The overhead is small, since by far the most common cleaning will be ; non-re-entered. The upshot is that the cleaner address space available must be at least 4 ; times the cache size: ; 1 : used alternately, on 1st, 3rd, ... non-re-entered cleans ; 2 : used alternately, on 2nd, 4th, ... non-re-entered cleans ; 3 : used only for first half of a re-entered clean ; 4 : used only for second half of a re-entered clean ; ; DCache_CleanBaseAddress : start address of total cleaner space ; DCache_CleanNextAddress : start address for next non-re-entered clean, or 0 if re-entered Cache_CleanAll_WB_Crd ROUT ; ; - cleans data cache (and invalidates it as a side effect) ; - can be used with interrupts enabled (to avoid latency over time of clean) ; - can be re-entered ; - see remarks at top of StrongARM ops for discussion of strategy ; Push "a2-a4, v1, v2, lr" LDR lr, =ZeroPage LDR a1, [lr, #DCache_CleanBaseAddress] LDR a2, =DCache_CleanNextAddress LDR a3, [lr, #DCache_Size] LDRB a4, [lr, #DCache_LineLen] MOV v2, #0 SWP v1, v2, [a2] ; read current CleanNextAddr, zero it (semaphore) TEQ v1, #0 ; but if it is already zero, we have re-entered ADDEQ v1, a1, a3, LSL #1 ; if re-entered, start clean at Base+2*Cache_Size ADDEQ v2, v1, a3, LSL #1 ; if re-entered, do a clean of 2*Cache_Size ADDNE v2, v1, a3 ; if not re-entered, do a clean of Cache_Size 10 LDR lr, [v1], a4 TEQ v1, v2 BNE %BT10 ADD v2, a1, a3, LSL #1 ; compare end address with Base+2*Cache_Size CMP v1, v2 MOVEQ v1, a1 ; if equal, not re-entered and Next wraps back STRLS v1, [a2] ; if lower or same, not re-entered, so update Next MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer Pull "a2-a4, v1, v2, pc" Cache_CleanInvalidateAll_WB_Crd ROUT IMB_Full_WB_Crd ; ;does not truly invalidate DCache, but effectively invalidates (flushes) all lines not ;involved in interrupts - this is sufficient for OS requirements, and means we don't ;have to disable interrupts for possibly slow clean ; Push "lr" BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "pc" Cache_InvalidateAll_WB_Crd ; ; no clean, assume caller knows what is happening ; MCR p15, 0, a1, c7, c7, 0 ;flush ICache and DCache MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MOV pc, lr Cache_RangeThreshold_WB_Crd LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr TLB_InvalidateAll_WB_Crd MMU_ChangingUncached_WB_Crd MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB MOV pc, lr TLB_InvalidateEntry_WB_Crd MMU_ChangingUncachedEntry_WB_Crd MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry MCR p15, 0, a1, c8, c5, 0 ;flush ITLB MOV pc, lr WriteBuffer_Drain_WB_Crd MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MOV pc, lr IMB_Range_WB_Crd ROUT SUB a2, a2, a1 CMP a2, #64*1024 ;arbitrary-ish range threshold ADD a2, a2, a1 BHS IMB_Full_WB_Crd Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "pc" IMB_List_WB_Crd ROUT CMP a3, #64*1024 ;arbitrary-ish range threshold BHS IMB_Full_WB_Crd Push "v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c10, 1 ;clean DCache entry ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "v1-v2,pc" MMU_Changing_WB_Crd Push "lr" BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB Pull "pc" MMU_ChangingEntry_WB_Crd ROUT ; ;there is no clean&invalidate DCache instruction, however we can do clean ;entry followed by invalidate entry without an interrupt hole, because they ;are for the same virtual address (and that virtual address will not be ;involved in interrupts, since it is involved in remapping) ; Push "a2, lr" ADD a2, a1, #PageSize LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry MCR p15, 0, a1, c7, c6, 1 ;flush DCache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 SUB a1, a1, #PageSize MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry MCR p15, 0, a1, c8, c5, 0 ;flush ITLB Pull "a2, pc" MMU_ChangingEntries_WB_Crd ROUT ; ;same comments as MMU_ChangingEntry_WB_Crd ; Push "a2, a3, lr" MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry MCR p15, 0, a1, c7, c6, 1 ;flush DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache MOV a1, lr ;restore start address 20 MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry ADD a1, a1, #PageSize CMP a1, a2 BLO %BT20 MCR p15, 0, a1, c8, c5, 0 ;flush ITLB Pull "a2, a3, pc" ; 30 BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB Pull "a2, a3, pc" MMU_ChangingUncachedEntries_WB_Crd ROUT CMP a2, #32 ;arbitrary-ish threshold BHS %FT20 Push "lr" MOV lr, a2 10 MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry ADD a1, a1, #PageSize SUBS lr, lr, #1 BNE %BT10 MCR p15, 0, a1, c8, c5, 0 ;flush ITLB Pull "pc" ; 20 MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB MOV pc, lr LTORG ; ARMops for XScale, mjs Feb 2001 ; ; WB_Cal_LD is writeback, clean with allocate, lockdown ; ; If the mini data cache is used (XScaleMiniCache true), it is assumed to be ; configured writethrough (eg. used for RISC OS screen memory). This saves an ugly/slow ; mini cache clean for things like IMB_Full. ; ; Sadly, for global cache invalidate with mini cache, things are awkward. We can't clean the ; main cache then do the global invalidate MCR, unless we tolerate having _all_ interrupts ; off (else the main cache may be slightly dirty from interrupts, and the invalidate ; will lose data). So we must reluctantly 'invalidate' the mini cache by the ugly/slow ; mechanism as if we were cleaning it :-( Intel should provide a separate global invalidate ; (and perhaps a line allocate) for the mini cache. ; ; We do not use lockdown. ; ; For simplicity, we assume cacheable pages are mostly writeback. Any writethrough ; pages will be invalidated as if they were writeback, but there is little overhead ; (cleaning a clean line or allocating a line from cleaner area are both fast). ; Global cache cleaning requires address space for private cleaner areas (not accessed ; for any other reason). Cleaning is normally with interrupts enabled (to avoid a latency ; hit), which means that the cleaner data is not invalidated afterwards. This is fine for ; RISC OS - where the private area is not used for anything else, and any re-use of the ; cache under interrupts is safe (eg. a page being moved is *never* involved in any ; active interrupts). ; Mostly, cleaning toggles between two separate cache-sized areas, which gives minimum ; cleaning cost while guaranteeing proper clean even if previous clean data is present. If ; the clean routine is re-entered, an independent, double sized clean is initiated. This ; guarantees proper cleaning (regardless of multiple re-entrancy) whilst hardly complicating ; the routine at all. The overhead is small, since by far the most common cleaning will be ; non-re-entered. The upshot is that the cleaner address space available must be at least 4 ; times the cache size: ; 1 : used alternately, on 1st, 3rd, ... non-re-entered cleans ; 2 : used alternately, on 2nd, 4th, ... non-re-entered cleans ; 3 : used only for first half of a re-entered clean ; 4 : used only for second half of a re-entered clean ; ; If the mini cache is used, it has its own equivalent cleaner space and algorithm. ; Parameters for each cache are: ; ; Cache_CleanBaseAddress : start address of total cleaner space ; Cache_CleanNextAddress : start address for next non-re-entered clean, or 0 if re-entered GBLL XScaleMiniCache ; *must* be configured writethrough if used XScaleMiniCache SETL {FALSE} ; MACRO to do Intel approved CPWAIT, to guarantee any previous MCR's have taken effect ; corrupts a1 ; MACRO CPWAIT MRC p15, 0, a1, c2, c0, 0 ; arbitrary read of CP15 MOV a1, a1 ; wait for it ; SUB pc, pc, #4 omitted, because all ops have a pc load to return to caller MEND Cache_CleanAll_WB_Cal_LD ROUT ; ; - cleans main cache (and invalidates as a side effect) ; - if mini cache is in use, will be writethrough so no clean required ; - can be used with interrupts enabled (to avoid latency over time of clean) ; - can be re-entered ; - see remarks at top of XScale ops for discussion of strategy ; Push "a2-a4, v1, v2, lr" LDR lr, =ZeroPage LDR a1, [lr, #DCache_CleanBaseAddress] LDR a2, =ZeroPage+DCache_CleanNextAddress LDR a3, [lr, #DCache_Size] LDRB a4, [lr, #DCache_LineLen] MOV v2, #0 SWP v1, v2, [a2] ; read current CleanNextAddr, zero it (semaphore) TEQ v1, #0 ; but if it is already zero, we have re-entered ADDEQ v1, a1, a3, LSL #1 ; if re-entered, start clean at Base+2*Cache_Size ADDEQ v2, v1, a3, LSL #1 ; if re-entered, do a clean of 2*Cache_Size ADDNE v2, v1, a3 ; if not re-entered, do a clean of Cache_Size 10 MCR p15, 0, v1, c7, c2, 5 ; allocate address from cleaner space ADD v1, v1, a4 TEQ v1, v2 BNE %BT10 ADD v2, a1, a3, LSL #1 ; compare end address with Base+2*Cache_Size CMP v1, v2 MOVEQ v1, a1 ; if equal, not re-entered and Next wraps back STRLS v1, [a2] ; if lower or same, not re-entered, so update Next MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "a2-a4, v1, v2, pc" [ XScaleMiniCache Cache_MiniInvalidateAll_WB_Cal_LD ROUT ; ; similar to Cache_CleanAll_WB_Cal_LD, but must do direct reads (cannot use allocate address MCR), and ; 'cleans' to achieve invalidate as side effect (mini cache will be configured writethrough) ; Push "a2-a4, v1, v2, lr" LDR lr, =ZeroPage LDR a1, [lr, #MCache_CleanBaseAddress] LDR a2, =ZeroPage+MCache_CleanNextAddr LDR a3, [lr, #MCache_Size] LDRB a4, [lr, #MCache_LineLen] MOV v2, #0 SWP v1, v2, [a2] ; read current CleanNextAddr, zero it (semaphore) TEQ v1, #0 ; but if it is already zero, we have re-entered ADDEQ v1, a1, a3, LSL #1 ; if re-entered, start clean at Base+2*Cache_Size ADDEQ v2, v1, a3, LSL #1 ; if re-entered, do a clean of 2*Cache_Size ADDNE v2, v1, a3 ; if not re-entered, do a clean of Cache_Size 10 LDR lr, [v1], a4 ; read a line of cleaner data TEQ v1, v2 BNE %BT10 ADD v2, a1, a3, LSL #1 ; compare end address with Base+2*Size CMP v1, v2 MOVEQ v1, a1 ; if equal, not re-entered and Next wraps back STRLS v1, [a2] ; if lower or same, not re-entered, so update Next ; note, no drain WBuffer, since we are really only invalidating a writethrough cache Pull "a2-a4, v1, v2, pc" ] ; XScaleMiniCache Cache_CleanInvalidateAll_WB_Cal_LD ROUT ; ; - cleans main cache (and invalidates wrt OS stuff as a side effect) ; - if mini cache in use (will be writethrough), 'cleans' in order to invalidate as side effect ; Push "lr" BL Cache_CleanAll_WB_Cal_LD [ XScaleMiniCache BL Cache_MiniInvalidateAll_WB_Cal_LD ] MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB CPWAIT Pull "pc" Cache_InvalidateAll_WB_Cal_LD ROUT ; ; no clean, assume caller knows what's happening ; MCR p15, 0, a1, c7, c7, 0 ; invalidate DCache, (MiniCache), ICache and BTB CPWAIT MOV pc, lr Cache_RangeThreshold_WB_Cal_LD ROUT LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr TLB_InvalidateAll_WB_Cal_LD ROUT MMU_ChangingUncached_WB_Cal_LD MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB CPWAIT MOV pc, lr TLB_InvalidateEntry_WB_Cal_LD ROUT MMU_ChangingUncachedEntry_WB_Cal_LD MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry CPWAIT MOV pc, lr WriteBuffer_Drain_WB_Cal_LD ROUT MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) MOV pc, lr IMB_Full_WB_Cal_LD Push "lr" BL Cache_CleanAll_WB_Cal_LD ; clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB CPWAIT Pull "pc" IMB_Range_WB_Cal_LD ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; arbitrary-ish range threshold ADD a2, a2, a1 BHS IMB_Full_WB_Cal_LD Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, lr CMP a1, a2 BLO %BT10 [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "pc" IMB_List_WB_Cal_LD ROUT CMP a3, #32*1024 ; arbitrary-ish range threshold BHS IMB_Full_WB_Cal_LD Push "v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c10, 1 ; clean DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, v1, c7, c5, 1 ; invalidate ICache entry ] ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "v1-v2,pc" MMU_Changing_WB_Cal_LD ROUT Push "lr" BL Cache_CleanAll_WB_Cal_LD MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB CPWAIT Pull "pc" MMU_ChangingEntry_WB_Cal_LD ROUT ; ;there is no clean&invalidate DCache instruction, however we can do clean ;entry followed by invalidate entry without an interrupt hole, because they ;are for the same virtual address (and that virtual address will not be ;involved in interrupts, since it is involved in remapping) ; Push "a2, lr" ADD a2, a1, #PageSize LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, lr CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] SUB a1, a1, #PageSize MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry CPWAIT Pull "a2, pc" MMU_ChangingEntries_WB_Cal_LD ROUT ; ;same comments as MMU_ChangingEntry_WB_Cal_LD ; Push "a2, a3, lr" MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] MOV a1, lr ; restore start address 20 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a1, a1, #PageSize CMP a1, a2 BLO %BT20 CPWAIT Pull "a2, a3, pc" ; 30 BL Cache_CleanInvalidateAll_WB_Cal_LD MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB CPWAIT Pull "a2, a3, pc" MMU_ChangingUncachedEntries_WB_Cal_LD ROUT CMP a2, #32 ; arbitrary-ish threshold BHS %FT20 Push "lr" MOV lr, a2 10 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry SUBS lr, lr, #1 ADD a1, a1, #PageSize BNE %BT10 CPWAIT Pull "pc" ; 20 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB CPWAIT MOV pc, lr ] ; MEMM_Type = "ARM600" [ MEMM_Type = "VMSAv6" ; Need appropriate myIMB, etc. implementations if this is to be removed ; -------------------------------------------------------------------------- ; ----- ARMops for Cortex-A8 and the like ---------------------------------- ; -------------------------------------------------------------------------- ; WB_CR7_Lx refers to ARMs with writeback data cache, cleaned with ; register 7, and (potentially) multiple cache levels ; ; DCache_LineLen = log2(line len)-2 for smallest data/unified cache line length ; ICache_LineLen = log2(line len)-2 for smallest instruction cache line length ; DCache_RangeThreshold = clean threshold for data cache ; Cache_Lx_Info = Cache level ID register ; Cache_Lx_DTable = Cache size identification register for all 7 data/unified caches ; Cache_Lx_ITable = Cache size identification register for all 7 instruction caches ; ARMv7 cache maintenance routines are a bit long-winded, so we use this macro ; to reduce the risk of mistakes creeping in due to code duplication ; ; $op: Operation to perform ('clean', 'invalidate', 'cleaninvalidate') ; $levels: Which levels to apply to ('lou', 'loc', 'louis') ; Uses r0-r8 & lr as temp ; Performs the indicated op on the indicated data & unified caches ; ; Code based around the alternate/faster code given in the ARMv7 ARM (section ; B2.2.4, alternate/faster code only in doc revision 9), but tightened up a bit ; ; Note that HAL_InvalidateCache_ARMvF uses its own implementation of this ; algorithm, since it must cope with different temporary registers and it needs ; to read the cache info straight from the CP15 registers ; MACRO MaintainDataCache_WB_CR7_Lx $op, $levels LDR lr, =ZeroPage LDR r0, [lr, #Cache_Lx_Info]! ADD lr, lr, #Cache_Lx_DTable-Cache_Lx_Info [ "$levels"="lou" ANDS r3, r0, #&38000000 MOV r3, r3, LSR #26 ; Cache level value (naturally aligned) | [ "$levels"="loc" ANDS r3, r0, #&07000000 MOV r3, r3, LSR #23 ; Cache level value (naturally aligned) | [ "$levels"="louis" ANDS r3, r0, #&00E00000 MOV r3, r3, LSR #20 ; Cache level value (naturally aligned) | ! 1, "Unrecognised levels" ] ] ] BEQ %FT50 MOV r8, #0 ; Current cache level 10 ; Loop1 ADD r2, r8, r8, LSR #1 ; Work out 3 x cachelevel MOV r1, r0, LSR r2 ; bottom 3 bits are the Cache type for this level AND r1, r1, #7 ; get those 3 bits alone CMP r1, #2 BLT %FT40 ; no cache or only instruction cache at this level LDR r1, [lr, r8, LSL #1] ; read CCSIDR to r1 AND r2, r1, #CCSIDR_LineSize_mask ; extract the line length field ADD r2, r2, #4 ; add 4 for the line length offset (log2 16 bytes) LDR r7, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r7, r7, r1, LSR #CCSIDR_Associativity_pos ; r7 is the max number on the way size (right aligned) CLZ r5, r7 ; r5 is the bit position of the way size increment LDR r4, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos AND r4, r4, r1, LSR #CCSIDR_NumSets_pos ; r4 is the max number of the index size (right aligned) 20 ; Loop2 MOV r1, r4 ; r1 working copy of the max index size (right aligned) 30 ; Loop3 ORR r6, r8, r7, LSL r5 ; factor in the way number and cache number into r6 ORR r6, r6, r1, LSL r2 ; factor in the index number [ "$op"="clean" MCR p15, 0, r6, c7, c10, 2 ; Clean | [ "$op"="invalidate" MCR p15, 0, r6, c7, c6, 2 ; Invalidate | [ "$op"="cleaninvalidate" MCR p15, 0, r6, c7, c14, 2 ; Clean & invalidate | ! 1, "Unrecognised op" ] ] ] SUBS r1, r1, #1 ; decrement the index BGE %BT30 SUBS r7, r7, #1 ; decrement the way number BGE %BT20 myDSB ,r7 ; Cortex-A7 errata 814220: DSB required when changing cache levels when using set/way operations. This also counts as our end-of-maintenance DSB. 40 ; Skip ADD r8, r8, #2 CMP r3, r8 BGT %BT10 50 ; Finished MEND Cache_CleanAll_WB_CR7_Lx ROUT ; Clean cache by traversing all sets and ways for all data caches Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx clean, loc Pull "r1-r8,pc" Cache_CleanInvalidateAll_WB_CR7_Lx ROUT ; ; similar to Cache_CleanAll, but does clean&invalidate of Dcache, and invalidates ICache ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx cleaninvalidate, loc MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" Cache_InvalidateAll_WB_CR7_Lx ROUT ; ; no clean, assume caller knows what's happening ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx invalidate, loc MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" Cache_RangeThreshold_WB_CR7_Lx ROUT LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr ; In: r1 = cache level (0-based) ; Out: r0 = Flags ; bits 0-2: cache type: ; 000 -> none ; 001 -> instruction ; 010 -> data ; 011 -> split ; 100 -> unified ; 1xx -> reserved ; Other bits: reserved ; r1 = D line length ; r2 = D size ; r3 = I line length ; r4 = I size ; r0-r4 = zero if cache level not present Cache_Examine_WB_CR7_Lx ROUT Entry "r5" LDR r5, =ZeroPage LDR r0, [r5, #Cache_Lx_Info]! ADD r5, r5, #Cache_Lx_DTable-Cache_Lx_Info BIC r0, r0, #&00E00000 ; Shift the CLIDR until we hit a zero entry or the desired level ; (could shift by exactly the amount we want... but ARM say not to do ; that since they may decide to re-use bits) 10 TEQ r1, #0 TSTNE r0, #7 SUBNE r1, r1, #1 MOVNE r0, r0, LSR #3 ADDNE r5, r5, #4 BNE %BT10 ANDS r0, r0, #7 MOV r1, #0 MOV r2, #0 MOV r3, #0 MOV r4, #0 EXIT EQ TST r0, #6 ; Data or unified cache present? BEQ %FT20 LDR lr, [r5] LDR r1, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos LDR r2, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r1, r1, lr, LSR #CCSIDR_NumSets_pos AND r2, r2, lr, LSR #CCSIDR_Associativity_pos ADD r1, r1, #1 ADD r2, r2, #1 MUL r2, r1, r2 AND r1, lr, #CCSIDR_LineSize_mask ASSERT CCSIDR_LineSize_pos = 0 MOV lr, #16 MOV r1, lr, LSL r1 MUL r2, r1, r2 20 TEQ r0, #4 ; Unified cache? MOVEQ r3, r1 MOVEQ r4, r2 TST r0, #1 ; Instruction cache present? EXIT EQ LDR lr, [r5, #Cache_Lx_ITable-Cache_Lx_DTable] LDR r3, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos LDR r4, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r3, r3, lr, LSR #CCSIDR_NumSets_pos AND r4, r4, lr, LSR #CCSIDR_Associativity_pos ADD r3, r3, #1 ADD r4, r4, #1 MUL r4, r3, r4 AND r3, lr, #CCSIDR_LineSize_mask ASSERT CCSIDR_LineSize_pos = 0 MOV lr, #16 MOV r3, lr, LSL r3 MUL r4, r3, r4 EXIT MMU_ChangingUncached_WB_CR7_Lx myDSB ,a1 ; Ensure the page table write has actually completed myISB ,a1,,y ; Also required TLB_InvalidateAll_WB_CR7_Lx ROUT MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry_WB_CR7_Lx [ NoARMv7 Push "a2" myDSB ,a2 ; Ensure the page table write has actually completed myISB ,a2,,y ; Also required Pull "a2" | myDSB myISB ] TLB_InvalidateEntry_WB_CR7_Lx ROUT MCR p15, 0, a1, c8, c7, 1 ; invalidate ITLB & DTLB entry MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1 ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr WriteBuffer_Drain_WB_CR7_Lx ROUT myDSB ,a1 ; DSB is the new name for write buffer draining myISB ,a1,,y ; Also do ISB for extra paranoia MOV pc, lr IMB_Full_WB_CR7_Lx ROUT ; ; do: clean DCache; drain WBuffer, invalidate ICache/branch predictor ; Luckily, we only need to clean as far as the level of unification ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx clean, lou MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; IMB_Range_WB_CR7_Lx ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take ADD a2, a2, a1 BHS IMB_Full_WB_CR7_Lx Push "a1,a3,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] ; log2(line len)-2 MOV a3, #4 MOV lr, a3, LSL lr 10 MCR p15, 0, a1, c7, c11, 1 ; clean DCache entry by VA to PoU ADD a1, a1, lr CMP a1, a2 BLO %BT10 myDSB ,a1 ; Wait for clean to complete Pull "a1" ; Get start address back LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ MOV lr, a3, LSL lr 10 MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1 ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "a3,pc" ; a1 = pointer to list of (start, end) address pairs ; a2 = pointer to end of list ; a3 = total amount of memory to be synchronised ; IMB_List_WB_CR7_Lx ROUT CMP a3, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take BHS IMB_Full_WB_CR7_Lx Push "a1,a3,v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] ; log2(line len)-2 MOV a3, #4 MOV lr, a3, LSL lr 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c11, 1 ; clean DCache entry by VA to PoU ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 myDSB ,a1 ; Wait for clean to complete Pull "a1" ; Get start address back LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ MOV lr, a3, LSL lr 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c5, 1 ; invalidate ICache entry ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1 ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "a3,v1-v2,pc" MMU_Changing_WB_CR7_Lx ROUT Push "lr" myDSB ,a1 ; Ensure the page table write has actually completed myISB ,a1,,y ; Also required BL Cache_CleanInvalidateAll_WB_CR7_Lx MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB myDSB ,a1,,y ; Wait for TLB invalidation to complete myISB ,a1,,y ; Ensure that the effects are visible Pull "pc" ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_WB_CR7_Lx ROUT Push "a2, lr" myDSB ,lr ; Ensure the page table write has actually completed myISB ,lr,,y ; Also required LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] ; log2(line len)-2 MOV a2, #4 MOV lr, a2, LSL lr ADD a2, a1, #PageSize 10 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry to PoC ADD a1, a1, lr CMP a1, a2 BNE %BT10 myDSB ,lr ; Wait for clean to complete LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ MOV a1, #4 MOV lr, a1, LSL lr SUB a1, a2, #PageSize ; Get start address back 10 MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry to PoC ADD a1, a1, lr CMP a1, a2 BNE %BT10 SUB a1, a1, #PageSize MCR p15, 0, a1, c8, c7, 1 ; invalidate DTLB and ITLB MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1 myISB ,a1,,y Pull "a2, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_WB_CR7_Lx ROUT Push "a2, a3, lr" myDSB ,lr ; Ensure the page table write has actually completed myISB ,lr,,y ; Also required MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] ; log2(line len)-2 MOV lr, #4 MOV a3, lr, LSL a3 MOV lr, a1 10 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT10 myDSB ,a3 ; Wait for clean to complete LDR a3, =ZeroPage LDRB a3, [a3, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ MOV a1, #4 MOV a3, a1, LSL a3 MOV a1, lr ; Get start address back 10 MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT10 20 MCR p15, 0, lr, c8, c7, 1 ; invalidate DTLB & ITLB entry ADD lr, lr, #PageSize CMP lr, a2 BNE %BT20 MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1 myISB ,a1,,y Pull "a2, a3, pc" ; 30 BL Cache_CleanInvalidateAll_WB_CR7_Lx MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB myDSB ,a1,,y ; Wait TLB invalidation to complete myISB ,a1,,y ; Ensure that the effects are visible Pull "a2, a3, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT Push "a2,lr" myDSB ,lr ; Ensure the page table write has actually completed myISB ,lr,,y ; Also required CMP a2, #32 ; arbitrary-ish threshold MCRHS p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB BHS %FT20 10 MCR p15, 0, a1, c8, c7, 1 ; invalidate DTLB & ITLB entry ADD a1, a1, #PageSize SUBS a2, a2, #1 BNE %BT10 20 MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,lr,,y myISB ,lr,,y Pull "a2,pc" ; -------------------------------------------------------------------------- ; ----- ARMops for PL310 L2 cache controller-------------------------------- ; -------------------------------------------------------------------------- ; These are a hybrid of the standard ARMv7 ARMops (WB_CR7_Lx) and the PL310 ; cache maintenance ops. Currently they're only used on Cortex-A9 systems, so ; may need modifications to work with other systems. ; Specifically, the code assumes the PL310 is being used in non-exclusive mode. MACRO PL310Sync $regs, $temp ; Errata 753970 requires us to write to a different location when ; performing a sync operation for r3p0 LDR $temp, [$regs, #PL310_REG0_CACHE_ID] AND $temp, $temp, #&3f TEQ $temp, #PL310_R3P0 MOV $temp, #0 STREQ $temp, [$regs, #PL310_REG7_CACHE_SYNC_753970] STRNE $temp, [$regs, #PL310_REG7_CACHE_SYNC] 10 LDR $temp, [$regs, #PL310_REG7_CACHE_SYNC] TST $temp, #1 BNE %BT10 MEND Cache_CleanInvalidateAll_PL310 ROUT ; Errata 727915 workaround - use CLEAN_INV_INDEX instead of CLEAN_INV_WAY Entry "a2-a4" LDR a2, =ZeroPage LDR a2, [a2, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Clean ARM caches BL Cache_CleanAll_WB_CR7_Lx ; Determine PL310 way, index count LDR a1, [a2, #PL310_REG1_AUX_CONTROL] AND a3, a1, #1<<16 AND a1, a1, #7<<17 MOV a3, a3, LSL #15 MOV a1, a1, LSR #17 LDR a4, =&FF<<5 ORR a3, a3, #7<<28 ; a3 = max way number (inclusive) ORR a4, a4, a4, LSL a1 ; a4 = max index number (inclusive) ; Ensure no operation currently in progress 05 LDR lr, [a2, #PL310_REG7_CLEAN_INV_INDEX] TST lr, #1 BNE %BT05 10 ORR a1, a3, a4 20 STR a1, [a2, #PL310_REG7_CLEAN_INV_INDEX] 30 LDR lr, [a2, #PL310_REG7_CLEAN_INV_INDEX] TST lr, #1 BNE %BT30 SUBS a1, a1, #1<<28 ; next way BCS %BT20 ; underflow? SUBS a4, a4, #1<<5 ; next index BGE %BT10 ; Sync PL310 PL310Sync a2, a1 ; Clean & invalidate ARM caches PullEnv B Cache_CleanInvalidateAll_WB_CR7_Lx Cache_CleanAll_PL310 ROUT Entry "a2" LDR a2, =ZeroPage LDR a2, [a2, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Clean ARM caches BL Cache_CleanAll_WB_CR7_Lx ; Clean PL310 LDR a1, [a2, #PL310_REG1_AUX_CONTROL] TST a1, #1<<16 MOV a1, #&FF ORRNE a1, a1, #&FF00 ; Mask of all ways STR a1, [a2, #PL310_REG7_CLEAN_WAY] 10 LDR a1, [a2, #PL310_REG7_CLEAN_WAY] TEQ a1, #0 BNE %BT10 ; Sync PL310 PL310Sync a2, a1 EXIT Cache_InvalidateAll_PL310 ROUT Entry "a2" LDR a2, =ZeroPage LDR a2, [a2, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Invalidate PL310 LDR a1, [a2, #PL310_REG1_AUX_CONTROL] TST a1, #1<<16 MOV a1, #&FF ORRNE a1, a1, #&FF00 ; Mask of all ways STR a1, [a2, #PL310_REG7_INV_WAY] 10 LDR a1, [a2, #PL310_REG7_INV_WAY] TEQ a1, #0 BNE %BT10 ; Sync PL310 PL310Sync a2, a1 ; Invalidate ARM caches PullEnv B Cache_InvalidateAll_WB_CR7_Lx Cache_RangeThreshold_PL310 ROUT MOV a1, #1024*1024 MOV pc, lr Cache_Examine_PL310 ROUT ; Assume that the PL310 is the level 2 cache CMP r1, #1 BLT Cache_Examine_WB_CR7_Lx MOVGT r0, #0 MOVGT r1, #0 MOVGT r2, #0 MOVGT r3, #0 MOVGT r4, #0 MOVGT pc, lr LDR r0, =ZeroPage LDR r0, [r0, #Cache_HALDevice] LDR r0, [r0, #HALDevice_Address] LDR r0, [r0, #PL310_REG1_AUX_CONTROL] AND r2, r0, #&E0000 ; Get way size TST r0, #1:SHL:16 ; Check associativity MOV r2, r2, LSR #17 MOVEQ r1, #8*1024*8 ; 8KB base way size with 8 way associativity MOVNE r1, #8*1024*16 ; 8KB base way size with 16 way associativity MOV r2, r1, LSL r2 ; Assume this really is a PL310 (32 byte line size, unified architecture) MOV r0, #4 MOV r1, #32 MOV r3, #32 MOV r4, r2 MOV pc, lr WriteBuffer_Drain_PL310 ROUT Entry LDR lr, =ZeroPage LDR lr, [lr, #Cache_HALDevice] LDR lr, [lr, #HALDevice_Address] ; Drain ARM write buffer myDSB ,a1 ; DSB is the new name for write buffer draining myISB ,a1,,y ; Also do ISB for extra paranoia ; Drain PL310 write buffer PL310Sync lr, a1 EXIT MMU_Changing_PL310 ROUT Entry myDSB ,a1 ; Ensure the page table write has actually completed myISB ,a1,,y ; Also required BL Cache_CleanInvalidateAll_PL310 MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB myDSB ,a1,,y ; Wait for TLB invalidation to complete myISB ,a1,,y ; Ensure that the effects are visible EXIT ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_PL310 ROUT Entry "a1-a4" ; MMU_ChangingEntry_WB_CR7_Lx performs a clean & invalidate before invalidating the TLBs. ; This means we must behave in a similar way to the PL310 clean & invalidate: ; * Clean ARM ; * Clean & invalidate PL310 ; * Clean & invalidate ARM (i.e. do the MMU changing op) ; Convert logical addr to physical. ; Use the ARMv7 CP15 registers for convenience. PHPSEI MCR p15, 0, a1, c7, c8, 0 ; ATS1CPR myISB ,a4 MRC p15, 0, a4, c7, c4, 0 ; Get result PLP TST a4, #1 BNE %FT50 ; Lookup failed - assume this means that the page doesn't need cleaning from the PL310 ; Mask out the memory attributes that were returned by the lookup [ NoARMT2 BIC a4, a4, #&FF BIC a4, a4, #&F00 | BFC a4, #0, #12 ] ; Clean ARM myDSB ,lr myISB ,lr,,y LDR lr, =ZeroPage ADD a2, a1, #PageSize ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] ; log2(line len)-2 MOV lr, #4 MOV a3, lr, LSL a3 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT10 myDSB ,a3 ; Wait for clean to complete ; Clean & invalidate PL310 LDR a2, =ZeroPage LDR a2, [a2, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Ensure we haven't re-entered an in-progress op 20 LDR lr, [a2, #PL310_REG7_CLEAN_INV_PA] TST lr, #1 BNE %BT20 ; Clean & invalidate each line/index of the page ADD a1, a4, #&FE0 ; last line within the page 30 STR a4, [a2, #PL310_REG7_CLEAN_INV_PA] 40 LDR lr, [a2, #PL310_REG7_CLEAN_INV_PA] TST lr, #1 BNE %BT40 TEQ a4, a1 ADD a4, a4, #1<<5 ; next index BNE %BT30 ; Sync PL310Sync a2, a1 50 ; Clean & invalidate ARM (+ do MMU op) PullEnv B MMU_ChangingEntry_WB_CR7_Lx ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_PL310 ROUT Entry "a2-a3" ; Keep this one simple and just split it into a series of per-page operations ; This will result in some unnecessary TLB invalidate & PL310 sync thrashing, so in the future a more advanced implementation might be nice. CMP a2, #1024*1024/PageSize ; Arbitrary threshold for full clean BHS %FT20 MOV a3, a1 10 MOV a1, a3 BL MMU_ChangingEntry_PL310 SUBS a2, a2, #1 ADD a3, a3, #PageSize BNE %BT10 EXIT 20 ; Full clean required BL Cache_CleanInvalidateAll_PL310 MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB myDSB ,a1,,y ; Wait TLB invalidation to complete myISB ,a1,,y ; Ensure that the effects are visible EXIT ] ; MEMM_Type = "VMSAv6" ; -------------------------------------------------------------------------- LookForHALCacheController ROUT Entry "r0-r3,r8,r12" ; Look for any known cache controllers that the HAL has registered, and ; replace our ARMop routines with the appropriate routines for that ; controller LDR r0, =(0:SHL:16)+HALDeviceType_SysPeri+HALDeviceSysPeri_CacheC MOV r1, #0 LDR r12, =ZeroPage STR r1, [r12, #Cache_HALDevice] ; In case none found 10 MOV r8, #OSHW_DeviceEnumerate SWI XOS_Hardware EXIT VS CMP r1, #-1 EXIT EQ ; Do we recognise this controller? ASSERT HALDevice_ID = 2 [ NoARMv4 LDR lr, [r2] MOV lr, lr, LSR #16 | LDRH lr, [r2, #HALDevice_ID] ] ADR r8, KnownHALCaches 20 LDR r12, [r8], #8+Proc_MMU_ChangingUncachedEntries-Proc_Cache_CleanInvalidateAll CMP r12, #-1 BEQ %BT10 CMP lr, r12 BNE %BT20 ; Cache recognised. Disable IRQs for safety, and then try enabling it. Push "r2" MOV r0, r2 MSR CPSR_c, #SVC32_mode+I32_bit MOV lr, pc LDR pc, [r2, #HALDevice_Activate] CMP r0, #1 Pull "r2" MSRNE CPSR_c, #SVC32_mode BNE %BT10 ; Cache enabled OK - remember the device pointer and patch our maintenance ops LDR r0, =ZeroPage STR r2, [r0, #Cache_HALDevice] ADD r0, r0, #Proc_Cache_CleanInvalidateAll MOV r1, #Proc_MMU_ChangingUncachedEntries-Proc_Cache_CleanInvalidateAll 30 LDR r3, [r8, #-4]! TEQ r3, #0 STRNE r3, [r0, r1] SUBS r1, r1, #4 BGE %BT30 ; It's now safe to restore IRQs MSR CPSR_c, #SVC32_mode EXIT KnownHALCaches ROUT [ MEMM_Type = "VMSAv6" DCD HALDeviceID_CacheC_PL310 01 DCD Cache_CleanInvalidateAll_PL310 DCD Cache_CleanAll_PL310 DCD Cache_InvalidateAll_PL310 DCD Cache_RangeThreshold_PL310 DCD Cache_Examine_PL310 DCD 0 ; TLB_InvalidateAll DCD 0 ; TLB_InvalidateEntry DCD WriteBuffer_Drain_PL310 DCD 0 ; IMB_Full DCD 0 ; IMB_Range DCD 0 ; IMB_List DCD MMU_Changing_PL310 DCD MMU_ChangingEntry_PL310 DCD 0 ; MMU_ChangingUncached DCD 0 ; MMU_ChangingUncachedEntry DCD MMU_ChangingEntries_PL310 DCD 0 ; MMU_ChangingUncachedEntries ASSERT . - %BT01 = 4+Proc_MMU_ChangingUncachedEntries-Proc_Cache_CleanInvalidateAll ] DCD -1 ; -------------------------------------------------------------------------- ; ARMops exposed by OS_MMUControl 2 ARMopPtrTable DCD ZeroPage + Proc_Cache_CleanInvalidateAll DCD ZeroPage + Proc_Cache_CleanAll DCD ZeroPage + Proc_Cache_InvalidateAll DCD ZeroPage + Proc_Cache_RangeThreshold DCD ZeroPage + Proc_TLB_InvalidateAll DCD ZeroPage + Proc_TLB_InvalidateEntry DCD ZeroPage + Proc_WriteBuffer_Drain DCD ZeroPage + Proc_IMB_Full DCD ZeroPage + Proc_IMB_Range DCD ZeroPage + Proc_IMB_List DCD ZeroPage + Proc_MMU_Changing DCD ZeroPage + Proc_MMU_ChangingEntry DCD ZeroPage + Proc_MMU_ChangingUncached DCD ZeroPage + Proc_MMU_ChangingUncachedEntry DCD ZeroPage + Proc_MMU_ChangingEntries DCD ZeroPage + Proc_MMU_ChangingUncachedEntries ARMopPtrTable_End ; IMPORT Write0_Translated ARM_PrintProcessorType LDR a1, =ZeroPage LDRB a1, [a1, #ProcessorType] TEQ a1, #ARMunk MOVEQ pc, lr Push "lr" ADR a2, PNameTable LDHA a1, a2, a1, a3 ADD a1, a2, a1 [ International BL Write0_Translated | SWI XOS_Write0 ] SWI XOS_NewLine SWI XOS_NewLine Pull "pc" PNameTable DCW PName_ARM600 - PNameTable DCW PName_ARM610 - PNameTable DCW PName_ARM700 - PNameTable DCW PName_ARM710 - PNameTable DCW PName_ARM710a - PNameTable DCW PName_SA110 - PNameTable ; pre rev T DCW PName_SA110 - PNameTable ; rev T or later DCW PName_ARM7500 - PNameTable DCW PName_ARM7500FE - PNameTable DCW PName_SA1100 - PNameTable DCW PName_SA1110 - PNameTable DCW PName_ARM720T - PNameTable DCW PName_ARM920T - PNameTable DCW PName_ARM922T - PNameTable DCW PName_X80200 - PNameTable DCW PName_X80321 - PNameTable DCW PName_ARM1176JZF_S - PNameTable DCW PName_Cortex_A5 - PNameTable DCW PName_Cortex_A7 - PNameTable DCW PName_Cortex_A8 - PNameTable DCW PName_Cortex_A9 - PNameTable DCW PName_Cortex_A17 - PNameTable ; A12 rebranded as A17 DCW PName_Cortex_A15 - PNameTable DCW PName_Cortex_A17 - PNameTable PName_ARM600 = "600:ARM 600 Processor",0 PName_ARM610 = "610:ARM 610 Processor",0 PName_ARM700 = "700:ARM 700 Processor",0 PName_ARM710 = "710:ARM 710 Processor",0 PName_ARM710a = "710a:ARM 710a Processor",0 PName_SA110 = "SA110:SA-110 Processor",0 PName_ARM7500 = "7500:ARM 7500 Processor",0 PName_ARM7500FE = "7500FE:ARM 7500FE Processor",0 PName_SA1100 = "SA1100:SA-1100 Processor",0 PName_SA1110 = "SA1110:SA-1110 Processor",0 PName_ARM720T = "720T:ARM 720T Processor",0 PName_ARM920T = "920T:ARM 920T Processor",0 PName_ARM922T = "922T:ARM 922T Processor",0 PName_X80200 = "X80200:80200 Processor",0 PName_X80321 = "X80321:80321 Processor",0 PName_ARM1176JZF_S = "ARM1176JZF_S:ARM1176JZF-S Processor",0 PName_Cortex_A5 = "CA5:Cortex-A5 Processor",0 PName_Cortex_A7 = "CA7:Cortex-A7 Processor",0 PName_Cortex_A8 = "CA8:Cortex-A8 Processor",0 PName_Cortex_A9 = "CA9:Cortex-A9 Processor",0 PName_Cortex_A15 = "CA15:Cortex-A15 Processor",0 PName_Cortex_A17 = "CA17:Cortex-A17 Processor",0 ALIGN ; Lookup tables from DA flags PCB (bits 14:12,5,4, packed down to 4:2,1,0) ; to XCB bits in page table descriptors. XCB_CB * 0:SHL:0 XCB_NB * 1:SHL:0 XCB_NC * 1:SHL:1 XCB_P * 1:SHL:2 [ MEMM_Type = "VMSAv6" XCB_TU * 1:SHL:5 ; For VMSAv6, deal with temp uncacheable via the table ] ALIGN 32 [ MEMM_Type = "ARM600" ; WT read-allocate cache (eg ARM720T) XCBTableWT ; C+B CNB NCB NCNB = L2_C+L2_B, L2_C, L2_B, 0 ; Default = L2_C+L2_B, L2_C, L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, L2_C, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, L2_C, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, L2_C, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, L2_C, L2_B, 0 ; X, X, X, X = L2_C+L2_B, L2_C, L2_B, 0 ; X, X, X, X = L2_C+L2_B, L2_C, L2_B, 0 ; X, X, X, X ; SA-110 in Risc PC - WB only read-allocate cache, non-merging WB XCBTableSA110 ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_B, 0, L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; ARMv5 WB/WT read-allocate cache, non-merging WB (eg ARM920T) XCBTableWBR ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; SA-1110 - WB only read allocate cache, merging WB, mini D-cache XCBTableSA1110 ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_B, 0, 0, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C , 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; XScale - WB/WT read or write-allocate cache, merging WB, mini D-cache ; defaulting to read-allocate XCBTableXScaleRA ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, L2_X+L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_X+L2_C , 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; XScale - WB/WT read or write-allocate cache, merging WB, mini D-cache ; defaulting to write-allocate XCBTableXScaleWA ; C+B CNB NCB NCNB = L2_X+L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, L2_X+L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_X+L2_C , 0, L2_B, 0 ; Alt DCache, X, X, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; XScale - WB/WT read-allocate cache, merging WB, no mini D-cache/extended pages XCBTableXScaleNoExt ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, 0, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ] ; MEMM_Type = "ARM600" [ MEMM_Type = "VMSAv6" ; VMSAv6/v7 L2 memory attributes (short descriptor format, TEX remap disabled, identical inner/outer attributes) L2_SO_S * 0 ; Strongly-ordered, shareable L2_Dev_S * L2_B ; Device, shareable L2_Nrm_WT * L2_C ; Normal, WT/RA, S bit determines shareability L2_Nrm_WBRA * L2_C+L2_B ; Normal, WB/RA, S bit determines shareability L2_Nrm_NC * 1:SHL:L2_TEXShift ; Normal, non-cacheable (but bufferable), S bit determines shareability L2_Nrm_WBWA * (1:SHL:L2_TEXShift)+L2_C+L2_B ; Normal, WB/WA, S bit determines shareability L2_Dev_nS * 2:SHL:L2_TEXShift ; Device, non-shareable ; Generic XCB table for VMSAv6/v7 ; * NCNB is roughly equivalent to "strongly ordered". ; * NCB with non-merging write buffer is equivalent to "Device". ; * NCB with merging write buffer is also mapped to "Device". "Normal" is ; tempting but may result in issues with read-sensitive devices (see below). ; * For NCB with devices which aren't read-sensitive, we introduce a new ; "Merging write buffer with idempotent memory" policy which maps to the ; Normal, non-cacheable type. This will degrade nicely on older OS's and CPUs, ; avoiding some isses if we were to make NCB with merging write buffer default ; to Normal memory. This policy is also the new default, so that all existing ; NCB RAM uses it (so unaligned loads, etc. will work). No existing code seems ; to be using NCB for IO devices (only for IO RAM like VRAM), so this change ; should be safe (previously, all NCB policies would have mapped to Device ; memory) ; * CNB has no equivalent - there's no control over whether the write buffer is ; used for cacheable regions, so we have to downgrade to NCNB. ; The caches should behave sensibly when given unsupported attributes ; (downgrade WB to WT to NC), but we may end up doing more cache maintenance ; than needed if the hardware downgrades some areas to NC. XCBTableVMSAv6 ; C+B CNB NCB NCNB = L2_Nrm_WBRA, L2_SO_S, L2_Dev_S, L2_SO_S ; Default = L2_Nrm_WT, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X = L2_Nrm_WBRA, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X = L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X = L2_Nrm_WBRA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Alt DCache, X, X, X = L2_Nrm_WBRA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X = L2_Nrm_WBRA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X = L2_Nrm_WBRA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X ; This second set of entries deals with when pages are made ; temporarily uncacheable - we need to change the cacheability without ; changing the memory type. = L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; Default = L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X = L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X = L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X = L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Alt DCache, X, X, X = L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X = L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X = L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X ] ; MEMM_Type = "VMSAv6" END