; Copyright 2000 Pace Micro Technology plc ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; GET Hdr:ListOpts ; GET Hdr:Macros ; GET Hdr:System ; $GetCPU ; $GetMEMM ; GET hdr.Options ; GET Hdr:PublicWS ; GET Hdr:KernelWS ; GET hdr.Copro15ops ; GET hdr.ARMops v7 RN 10 ; EXPORT Init_ARMarch ; EXPORT ARM_Analyse ; EXPORT ARM_PrintProcessorType ; AREA KernelCode,CODE,READONLY ; ARM keep changing their mind about ID field layout. ; Here's a summary, courtesy of the ARM ARM: ; ; pre-ARM 7: xxxx0xxx ; ARM 7: xxxx7xxx where bit 23 indicates v4T/~v3 ; post-ARM 7: xxxanxxx where n<>0 or 7 and a = architecture (1=v4,2=v4T,3=v5,4=v5T,5=v5TE,6=v5TEJ,7=v6) ; ; int Init_ARMarch(void) ; Returns architecture, as above in a1. Also EQ if ARMv3, NE if ARMv4 or later. ; Corrupts only ip, no RAM usage. Init_ARMarch ARM_read_ID ip ANDS a1, ip, #&0000F000 MOVEQ pc, lr ; ARM 3 or ARM 6 TEQ a1, #&00007000 BNE %FT20 TST ip, #&00800000 ; ARM 7 - check for Thumb MOVNE a1, #ARMv4T MOVEQ a1, #ARMv3 MOV pc, lr 20 ANDS a1, ip, #&000F0000 ; post-ARM 7 MOV a1, a1, LSR #16 MOV pc, lr ARM_Analyse MOV a2, lr BL Init_ARMarch MOV lr, a2 [ MEMM_Type = "VMSAv6" CMP a1, #ARMvF BEQ ARM_Analyse_Fancy ; New ARM; use the feature regs to perform all the setup ] Push "v1,v2,v5,v6,v7,lr" ARM_read_ID v1 ARM_read_cachetype v2 LDR v6, =ZeroPage ADRL v7, KnownCPUTable FindARMloop LDMIA v7!, {a1, a2} ; See if it's a known ARM CMP a1, #-1 BEQ %FT20 AND a2, v1, a2 TEQ a1, a2 ADDNE v7, v7, #8 BNE FindARMloop TEQ v2, v1 ; If we don't have cache attributes, read from table LDREQ v2, [v7] 20 TEQ v2, v1 BEQ %BT20 ; Cache unknown: panic CMP a1, #-1 LDRNEB a2, [v7, #4] MOVEQ a2, #ARMunk STRB a2, [v6, #ProcessorType] ASSERT CT_Isize_pos = 0 MOV a1, v2 ADD a2, v6, #ICache_Info BL EvaluateCache MOV a1, v2, LSR #CT_Dsize_pos ADD a2, v6, #DCache_Info BL EvaluateCache AND a1, v2, #CT_ctype_mask MOV a1, a1, LSR #CT_ctype_pos STRB a1, [v6, #Cache_Type] MOV v5, #CPUFlag_32bitOS [ HiProcVecs ORR v5, v5, #CPUFlag_HiProcVecs ] TST v2, #CT_S ORRNE v5, v5, #CPUFlag_SplitCache+CPUFlag_SynchroniseCodeAreas [ CacheOff ORR v5, v5, #CPUFlag_SynchroniseCodeAreas | ARM_read_control a1 ; if Z bit set then we have branch prediction, TST a1, #MMUC_Z ; so we need OS_SynchroniseCodeAreas even if not ORRNE v5, v5, #CPUFlag_SynchroniseCodeAreas ; split caches ] ; Test abort timing (base restored or base updated) MOV a1, #&8000 LDR a2, [a1], #4 ; Will abort - DAb handler will continue execution TEQ a1, #&8000 ORREQ v5, v5, #CPUFlag_BaseRestored ; Check store of PC 30 STR pc, [sp, #-4]! ADR a2, %BT30 + 8 LDR a1, [sp], #4 TEQ a1, a2 ORREQ v5, v5, #CPUFlag_StorePCplus8 35 BL Init_ARMarch STRB a1, [v6, #ProcessorArch] TEQ a1, #ARMv3 ; assume long multiply available ORRNE v5, v5, #CPUFlag_LongMul ; if v4 or later TEQNE a1, #ARMv4 ; assume 26-bit available ORRNE v5, v5, #CPUFlag_No26bitMode ; iff v3 or v4 (not T) TEQNE a1, #ARMv5 ; assume Thumb available ORRNE v5, v5, #CPUFlag_Thumb ; iff not v3,v4,v5 MSR CPSR_f, #Q32_bit MRS lr, CPSR TST lr, #Q32_bit ORRNE v5, v5, #CPUFlag_DSP TEQ a1, #ARMv6 ORREQ v5, v5, #CPUFlag_LoadStoreEx ; Implicit clear of CPUFlag_NoSWP for <= ARMv6 LDRB v4, [v6, #ProcessorType] TEQ v4, #ARMunk ; Modify deduced flags ADRNEL lr, KnownCPUFlags ADDNE lr, lr, v4, LSL #3 LDMNEIA lr, {a2, a3} ORRNE v5, v5, a2 BICNE v5, v5, a3 [ XScaleJTAGDebug TST v5, #CPUFlag_XScale BEQ %FT40 MRC p14, 0, a2, c10, c0 ; Read debug control register TST a2, #&80000000 ORRNE v5, v5, #CPUFlag_XScaleJTAGconnected MOVEQ a2, #&C000001C ; enable hot debug MCREQ p14, 0, a2, c10, c0 BNE %FT40 40 ] ORR v5, v5, #CPUFlag_ExtraReasonCodesFixed STR v5, [v6, #ProcessorFlags] ; Now, a1 = processor architecture (ARMv3, ARMv4 ...) ; v4 = processor type (ARM600, ARM610, ...) ; v5 = processor flags LDRB a2, [v6, #Cache_Type] [ MEMM_Type = "ARM600" CMP a1, #ARMv4 BLO Analyse_ARMv3 ; eg. ARM710 TEQ a2, #CT_ctype_WT TSTEQ v5, #CPUFlag_SplitCache BEQ Analyse_WriteThroughUnified ; eg. ARM7TDMI derivative TEQ a2, #CT_ctype_WB_Crd BEQ Analyse_WB_Crd ; eg. StrongARM TEQ a2, #CT_ctype_WB_Cal_LD BEQ Analyse_WB_Cal_LD ; assume XScale ] ; MEMM_Type = "ARM600" TEQ a2, #CT_ctype_WB_CR7_LDa BEQ Analyse_WB_CR7_LDa ; eg. ARM9 ; others ... WeirdARMPanic B WeirdARMPanic ; stiff :) [ MEMM_Type = "ARM600" Analyse_ARMv3 ADRL a1, NullOp ADRL a2, Cache_Invalidate_ARMv3 ADRL a3, DSB_ReadWrite_ARMv3 ADRL a4, TLB_Invalidate_ARMv3 ADRL ip, TLB_InvalidateEntry_ARMv3 STR a1, [v6, #Proc_Cache_CleanAll] STR a1, [v6, #Proc_Cache_CleanRange] STR a2, [v6, #Proc_Cache_CleanInvalidateAll] STR a2, [v6, #Proc_Cache_CleanInvalidateRange] STR a2, [v6, #Proc_Cache_InvalidateAll] STR a2, [v6, #Proc_Cache_InvalidateRange] STR a1, [v6, #Proc_ICache_InvalidateAll] STR a1, [v6, #Proc_ICache_InvalidateRange] STR a3, [v6, #Proc_DSB_ReadWrite] STR a3, [v6, #Proc_DSB_Write] STR a1, [v6, #Proc_DSB_Read] STR a3, [v6, #Proc_DMB_ReadWrite] STR a3, [v6, #Proc_DMB_Write] STR a1, [v6, #Proc_DMB_Read] STR a4, [v6, #Proc_TLB_InvalidateAll] STR ip, [v6, #Proc_TLB_InvalidateEntry] STR a1, [v6, #Proc_IMB_Full] STR a1, [v6, #Proc_IMB_Range] STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_ARMv3 ADRL a2, MMU_ChangingEntry_ARMv3 ADRL a3, MMU_ChangingUncached_ARMv3 ADRL a4, MMU_ChangingUncachedEntry_ARMv3 STR a1, [v6, #Proc_MMU_Changing] STR a2, [v6, #Proc_MMU_ChangingEntry] STR a3, [v6, #Proc_MMU_ChangingUncached] STR a4, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_ARMv3 ADRL a2, MMU_ChangingUncachedEntries_ARMv3 ADRL a3, Cache_RangeThreshold_ARMv3 ADRL a4, Cache_Examine_Simple STR a1, [v6, #Proc_MMU_ChangingEntries] STR a2, [v6, #Proc_MMU_ChangingUncachedEntries] STR a3, [v6, #Proc_Cache_RangeThreshold] STR a4, [v6, #Proc_Cache_Examine] ADRL a1, XCBTableWT STR a1, [v6, #MMU_PCBTrans] B %FT90 Analyse_WriteThroughUnified ADRL a1, NullOp ADRL a2, Cache_InvalidateUnified TST v5, #CPUFlag_NoWBDrain ADRNEL a3, DSB_ReadWrite_OffOn ADREQL a3, DSB_ReadWrite ADRL a4, TLB_Invalidate_Unified ADRL ip, TLB_InvalidateEntry_Unified STR a1, [v6, #Proc_Cache_CleanAll] STR a1, [v6, #Proc_Cache_CleanRange] STR a2, [v6, #Proc_Cache_CleanInvalidateAll] STR a2, [v6, #Proc_Cache_CleanInvalidateRange] STR a2, [v6, #Proc_Cache_InvalidateAll] STR a2, [v6, #Proc_Cache_InvalidateRange] STR a1, [v6, #Proc_ICache_InvalidateAll] STR a1, [v6, #Proc_ICache_InvalidateRange] STR a3, [v6, #Proc_DSB_ReadWrite] STR a3, [v6, #Proc_DSB_Write] STR a1, [v6, #Proc_DSB_Read] STR a3, [v6, #Proc_DMB_ReadWrite] STR a3, [v6, #Proc_DMB_Write] STR a1, [v6, #Proc_DMB_Read] STR a4, [v6, #Proc_TLB_InvalidateAll] STR ip, [v6, #Proc_TLB_InvalidateEntry] STR a1, [v6, #Proc_IMB_Full] STR a1, [v6, #Proc_IMB_Range] STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_Writethrough ADRL a2, MMU_ChangingEntry_Writethrough ADRL a3, MMU_ChangingUncached ADRL a4, MMU_ChangingUncachedEntry STR a1, [v6, #Proc_MMU_Changing] STR a2, [v6, #Proc_MMU_ChangingEntry] STR a3, [v6, #Proc_MMU_ChangingUncached] STR a4, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_Writethrough ADRL a2, MMU_ChangingUncachedEntries ADRL a3, Cache_RangeThreshold_Writethrough ADRL a4, Cache_Examine_Simple STR a1, [v6, #Proc_MMU_ChangingEntries] STR a2, [v6, #Proc_MMU_ChangingUncachedEntries] STR a3, [v6, #Proc_Cache_RangeThreshold] STR a4, [v6, #Proc_Cache_Examine] ADRL a1, XCBTableWT STR a1, [v6, #MMU_PCBTrans] B %FT90 ] ; MEMM_Type = "ARM600" Analyse_WB_CR7_LDa TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard caches here (eg. ARM920) ADRL a1, Cache_CleanInvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanInvalidateRange_WB_CR7_LDa STR a1, [v6, #Proc_Cache_CleanInvalidateRange] ADRL a1, Cache_CleanAll_WB_CR7_LDa STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_CleanRange_WB_CR7_LDa STR a1, [v6, #Proc_Cache_CleanRange] ADRL a1, Cache_InvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_InvalidateRange_WB_CR7_LDa STR a1, [v6, #Proc_Cache_InvalidateRange] ADRL a1, Cache_RangeThreshold_WB_CR7_LDa STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_Simple STR a1, [v6, #Proc_Cache_Examine] ADRL a1, ICache_InvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_ICache_InvalidateAll] ADRL a1, ICache_InvalidateRange_WB_CR7_LDa STR a1, [v6, #Proc_ICache_InvalidateRange] ADRL a1, TLB_InvalidateAll_WB_CR7_LDa STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_CR7_LDa STR a1, [v6, #Proc_TLB_InvalidateEntry] [ MEMM_Type = "ARM600" ; <= ARMv5, just use the drain write buffer MCR ADRL a1, DSB_ReadWrite_WB_CR7_LDa ADRL a2, NullOp STR a1, [v6, #Proc_DSB_ReadWrite] STR a1, [v6, #Proc_DSB_Write] STR a2, [v6, #Proc_DSB_Read] STR a1, [v6, #Proc_DMB_ReadWrite] STR a1, [v6, #Proc_DMB_Write] STR a2, [v6, #Proc_DMB_Read] | ; ARMv6(+), use the ARMv6 barrier MCRs ADRL a1, DSB_ReadWrite_ARMv6 STR a1, [v6, #Proc_DSB_ReadWrite] STR a1, [v6, #Proc_DSB_Write] STR a1, [v6, #Proc_DSB_Read] ADRL a1, DMB_ReadWrite_ARMv6 STR a1, [v6, #Proc_DMB_ReadWrite] STR a1, [v6, #Proc_DMB_Write] STR a1, [v6, #Proc_DMB_Read] ] ADRL a1, IMB_Full_WB_CR7_LDa STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_CR7_LDa STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_CR7_LDa STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_CR7_LDa STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_CR7_LDa STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] LDRB a2, [v6, #DCache_Associativity] MOV a3, #256 MOV a4, #8 ; to find log2(ASSOC), rounded up Analyse_WB_CR7_LDa_L1 MOV a3, a3, LSR #1 SUB a4, a4, #1 CMP a2, a3 BLO Analyse_WB_CR7_LDa_L1 ADDHI a4, a4, #1 RSB a2, a4, #32 MOV a3, #1 MOV a3, a3, LSL a2 STR a3, [v6, #DCache_IndexBit] LDR a4, [v6, #DCache_NSets] LDRB a2, [v6, #DCache_LineLen] SUB a4, a4, #1 MUL a4, a2, a4 STR a4, [v6, #DCache_IndexSegStart] MOV a2, #64*1024 ; arbitrary-ish STR a2, [v6, #DCache_RangeThreshold] [ MEMM_Type = "ARM600" ADRL a1, XCBTableWBR ; assume read-allocate WB/WT cache STR a1, [v6, #MMU_PCBTrans] ] B %FT90 [ MEMM_Type = "ARM600" Analyse_WB_Crd TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard ADRL a1, Cache_CleanInvalidateAll_WB_Crd STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanInvalidateRange_WB_Crd STR a1, [v6, #Proc_Cache_CleanInvalidateRange] ADRL a1, Cache_CleanAll_WB_Crd STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_CleanRange_WB_Crd STR a1, [v6, #Proc_Cache_CleanRange] ADRL a1, Cache_InvalidateAll_WB_Crd STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_InvalidateRange_WB_Crd STR a1, [v6, #Proc_Cache_InvalidateRange] ADRL a1, Cache_RangeThreshold_WB_Crd STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_Simple STR a1, [v6, #Proc_Cache_Examine] ADRL a1, ICache_InvalidateAll_WB_Crd STR a1, [v6, #Proc_ICache_InvalidateAll] ADRL a1, ICache_InvalidateRange_WB_Crd STR a1, [v6, #Proc_ICache_InvalidateRange] ADRL a1, TLB_InvalidateAll_WB_Crd STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_Crd STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, DSB_ReadWrite_WB_Crd ADRL a2, NullOp STR a1, [v6, #Proc_DSB_ReadWrite] STR a1, [v6, #Proc_DSB_Write] STR a2, [v6, #Proc_DSB_Read] STR a1, [v6, #Proc_DMB_ReadWrite] STR a1, [v6, #Proc_DMB_Write] STR a2, [v6, #Proc_DMB_Read] ADRL a1, IMB_Full_WB_Crd STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_Crd STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_Crd STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_Crd STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_Crd STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_Crd STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_Crd STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_Crd STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_Crd STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] LDR a2, =DCacheCleanAddress STR a2, [v6, #DCache_CleanBaseAddress] STR a2, [v6, #DCache_CleanNextAddress] MOV a2, #64*1024 ;arbitrary-ish threshold STR a2, [v6, #DCache_RangeThreshold] LDRB a2, [v6, #ProcessorType] TEQ a2, #SA110 TEQNE a2, #SA110_preRevT ADREQL a2, XCBTableSA110 BEQ Analyse_WB_Crd_finish TEQ a2, #SA1100 TEQNE a2, #SA1110 ADREQL a2, XCBTableSA1110 ADRNEL a2, XCBTableWBR Analyse_WB_Crd_finish STR a2, [v6, #MMU_PCBTrans] B %FT90 Analyse_WB_Cal_LD TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard ADRL a1, Cache_CleanInvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanInvalidateRange_WB_Cal_LD STR a1, [v6, #Proc_Cache_CleanInvalidateRange] ADRL a1, Cache_CleanAll_WB_Cal_LD STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_CleanRange_WB_Cal_LD STR a1, [v6, #Proc_Cache_CleanRange] ADRL a1, Cache_InvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_InvalidateRange_WB_Cal_LD STR a1, [v6, #Proc_Cache_InvalidateRange] ADRL a1, Cache_RangeThreshold_WB_Cal_LD STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_Simple STR a1, [v6, #Proc_Cache_Examine] ADRL a1, ICache_InvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_ICache_InvalidateAll] ADRL a1, ICache_InvalidateRange_WB_Cal_LD STR a1, [v6, #Proc_ICache_InvalidateRange] ADRL a1, TLB_InvalidateAll_WB_Cal_LD STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_Cal_LD STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, DSB_ReadWrite_WB_Cal_LD ADRL a2, NullOp ; Assuming barriers are only used for non-cacheable memory, a read barrier routine isn't necessary on XScale because all non-cacheable reads complete in-order with read/write accesses to other NC locations STR a1, [v6, #Proc_DSB_ReadWrite] STR a1, [v6, #Proc_DSB_Write] STR a2, [v6, #Proc_DSB_Read] STR a1, [v6, #Proc_DMB_ReadWrite] STR a1, [v6, #Proc_DMB_Write] STR a2, [v6, #Proc_DMB_Read] ADRL a1, IMB_Full_WB_Cal_LD STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_Cal_LD STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_Cal_LD STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_Cal_LD STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_Cal_LD STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] LDR a2, =DCacheCleanAddress STR a2, [v6, #DCache_CleanBaseAddress] STR a2, [v6, #DCache_CleanNextAddress] [ XScaleMiniCache ! 1, "You need to arrange for XScale mini-cache clean area to be mini-cacheable" LDR a2, =DCacheCleanAddress + 4 * 32*1024 STR a2, [v6, #MCache_CleanBaseAddress] STR a2, [v6, #MCache_CleanNextAddress] ] ; arbitrary-ish values, mini cache makes global op significantly more expensive [ XScaleMiniCache MOV a2, #128*1024 | MOV a2, #32*1024 ] STR a2, [v6, #DCache_RangeThreshold] ; enable full coprocessor access LDR a2, =&3FFF MCR p15, 0, a2, c15, c1 LDR a2, [v6, #ProcessorFlags] TST a2, #CPUFlag_ExtendedPages ADREQL a2, XCBTableXScaleNoExt ADRNEL a2, XCBTableXScaleWA ; choose between RA and WA here STR a2, [v6, #MMU_PCBTrans] B %FT90 ] ; MEMM_Type = "ARM600" [ MEMM_Type = "VMSAv6" Analyse_WB_CR7_Lx TST v5, #CPUFlag_SplitCache BEQ WeirdARMPanic ; currently, only support harvard caches here ; Read smallest instruction & data/unified cache line length MRC p15, 0, a1, c0, c0, 1 ; Cache type register MOV v2, a1, LSR #16 AND a4, a1, #&F AND v2, v2, #&F MOV a1, #4 MOV a4, a1, LSL a4 MOV v2, a1, LSL v2 STRB a4, [v6, #ICache_LineLen] STRB v2, [v6, #DCache_LineLen] ; Read the cache info into Cache_Lx_* MRC p15, 1, a1, c0, c0, 1 ; Cache level ID register MOV v2, v6 ; Work around DTable/ITable alignment issues STR a1, [v2, #Cache_Lx_Info]! ADD a2, v2, #Cache_Lx_DTable-Cache_Lx_Info MOV a3, #0 10 ANDS v1, a1, #6 ; Data or unified cache at this level? BEQ %FT11 MCRNE p15, 2, a3, c0, c0, 0 ; Program cache size selection register myISB ,v1 MRCNE p15, 1, v1, c0, c0, 0 ; Get size info (data/unified) 11 STR v1, [a2] ADD a3, a3, #1 ANDS v1, a1, #1 ; Instruction cache at this level? BEQ %FT12 MCRNE p15, 2, a3, c0, c0, 0 ; Program cache size selection register myISB ,v1 MRCNE p15, 1, v1, c0, c0, 0 ; Get size info (instruction) 12 STR v1, [a2, #Cache_Lx_ITable-Cache_Lx_DTable] ; Shift the cache level ID register along to get the type of the next ; cache level ; However, we need to stop once we reach the first blank entry, because ; ARM have been sneaky and started to reuse some of the bits from the ; high end of the register (the Cortex-A8 TRM lists bits 21-23 as being ; for cache level 8, but the ARMv7 ARM lists them as being for the level ; of unification for inner shareable memory). The ARMv7 ARM does warn ; about making sure you stop once you find the first blank entry, but ; it doesn't say why! TST a1, #7 ADD a3, a3, #1 MOVNE a1, a1, LSR #3 CMP a3, #Cache_Lx_MaxLevel*2 ; Stop at the last level we support ADD a2, a2, #4 BLT %BT10 ADRL a1, DSB_ReadWrite_ARMv7 ADRL a2, DSB_Write_ARMv7 STR a1, [v6, #Proc_DSB_ReadWrite] STR a2, [v6, #Proc_DSB_Write] STR a1, [v6, #Proc_DSB_Read] ADRL a1, DMB_ReadWrite_ARMv7 ADRL a2, DMB_Write_ARMv7 STR a1, [v6, #Proc_DMB_ReadWrite] STR a2, [v6, #Proc_DMB_Write] STR a1, [v6, #Proc_DMB_Read] [ SMP ; If we're a multicore chip, use MP-safe ARMops MRC p15, 0, a1, c0, c0, 5 ; MPIDR AND a1, a1, #&c0000000 TEQ a1, #&80000000 BNE %FT50 ; Set high DCache_RangeThreshold to discourage global clean/invalidate ops (no broadcast) MVN a1, #0 STR a1, [v6, #DCache_RangeThreshold] ADRL a1, Cache_CleanInvalidateAll_ARMv7MP STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanInvalidateRange_ARMv7MP STR a1, [v6, #Proc_Cache_CleanInvalidateRange] ADRL a1, Cache_CleanAll_ARMv7MP STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_CleanRange_ARMv7MP STR a1, [v6, #Proc_Cache_CleanRange] ADRL a1, Cache_InvalidateAll_ARMv7MP STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_InvalidateRange_ARMv7MP STR a1, [v6, #Proc_Cache_InvalidateRange] ADRL a1, Cache_RangeThreshold_ARMv7MP STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_ARMv7MP STR a1, [v6, #Proc_Cache_Examine] ADRL a1, ICache_InvalidateAll_ARMv7MP STR a1, [v6, #Proc_ICache_InvalidateAll] ADRL a1, ICache_InvalidateRange_ARMv7MP STR a1, [v6, #Proc_ICache_InvalidateRange] ADRL a1, TLB_InvalidateAll_ARMv7MP STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_ARMv7MP STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, IMB_Full_ARMv7MP STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_ARMv7MP STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_ARMv7MP STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_ARMv7MP STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_ARMv7MP STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_ARMv7MP STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_ARMv7MP STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_ARMv7MP STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_ARMv7MP STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] B %FT90 50 ] ; Calculate DCache_RangeThreshold MOV a1, #128*1024 ; Arbitrary-ish STR a1, [v6, #DCache_RangeThreshold] ADRL a1, Cache_CleanInvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_Cache_CleanInvalidateAll] ADRL a1, Cache_CleanInvalidateRange_WB_CR7_Lx STR a1, [v6, #Proc_Cache_CleanInvalidateRange] ADRL a1, Cache_CleanAll_WB_CR7_Lx STR a1, [v6, #Proc_Cache_CleanAll] ADRL a1, Cache_CleanRange_WB_CR7_Lx STR a1, [v6, #Proc_Cache_CleanRange] ADRL a1, Cache_InvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_Cache_InvalidateAll] ADRL a1, Cache_InvalidateRange_WB_CR7_Lx STR a1, [v6, #Proc_Cache_InvalidateRange] ADRL a1, Cache_RangeThreshold_WB_CR7_Lx STR a1, [v6, #Proc_Cache_RangeThreshold] ADRL a1, Cache_Examine_WB_CR7_Lx STR a1, [v6, #Proc_Cache_Examine] ADRL a1, ICache_InvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_ICache_InvalidateAll] ADRL a1, ICache_InvalidateRange_WB_CR7_Lx STR a1, [v6, #Proc_ICache_InvalidateRange] ADRL a1, TLB_InvalidateAll_WB_CR7_Lx STR a1, [v6, #Proc_TLB_InvalidateAll] ADRL a1, TLB_InvalidateEntry_WB_CR7_Lx STR a1, [v6, #Proc_TLB_InvalidateEntry] ADRL a1, IMB_Full_WB_CR7_Lx STR a1, [v6, #Proc_IMB_Full] ADRL a1, IMB_Range_WB_CR7_Lx STR a1, [v6, #Proc_IMB_Range] ADRL a1, IMB_List_WB_CR7_Lx STR a1, [v6, #Proc_IMB_List] ADRL a1, MMU_Changing_WB_CR7_Lx STR a1, [v6, #Proc_MMU_Changing] ADRL a1, MMU_ChangingEntry_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingEntry] ADRL a1, MMU_ChangingUncached_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingUncached] ADRL a1, MMU_ChangingUncachedEntry_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingUncachedEntry] ADRL a1, MMU_ChangingEntries_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingEntries] ADRL a1, MMU_ChangingUncachedEntries_WB_CR7_Lx STR a1, [v6, #Proc_MMU_ChangingUncachedEntries] B %FT90 ] ; MEMM_Type = "VMSAv6" 90 [ MEMM_Type = "VMSAv6" ; Reuse Init_PCBTrans to set up PageTable_PageFlags, MMU_PPLAccess, ; MMU_PCBTrans, MMU_PPLTrans MOV a1, v6 PTOp Init_PCBTrans | TST v5, #CPUFlag_ExtendedPages ADRNEL a1, PPLTransX_ShortDesc ADREQL a1, PPLTrans_ShortDesc [ ARM6support ARM_6 lr ADREQL a1, PPLTransARM6 ] STR a1, [v6, #MMU_PPLTrans] ADRL a1, PPLAccess_ShortDesc [ ARM6support ADREQL a1, PPLAccessARM6 ] STR a1, [v6, #MMU_PPLAccess] ] Pull "v1,v2,v5,v6,v7,pc" ; This routine works out the values LINELEN, ASSOCIATIVITY, NSETS and CACHE_SIZE defined ; in section B2.3.3 of the ARMv5 ARM. EvaluateCache AND a3, a1, #CT_assoc_mask+CT_M TEQ a3, #(CT_assoc_0:SHL:CT_assoc_pos)+CT_M BEQ %FT80 MOV ip, #1 ASSERT CT_len_pos = 0 AND a4, a1, #CT_len_mask ADD a4, a4, #3 MOV a4, ip, LSL a4 ; LineLen = 1 << (len+3) STRB a4, [a2, #ICache_LineLen-ICache_Info] MOV a3, #2 TST a1, #CT_M ADDNE a3, a3, #1 ; Multiplier = 2 + M AND a4, a1, #CT_assoc_mask RSB a4, ip, a4, LSR #CT_assoc_pos MOV a4, a3, LSL a4 ; Associativity = Multiplier << (assoc-1) STRB a4, [a2, #ICache_Associativity-ICache_Info] AND a4, a1, #CT_size_mask MOV a4, a4, LSR #CT_size_pos MOV a3, a3, LSL a4 MOV a3, a3, LSL #8 ; Size = Multiplier << (size+8) STR a3, [a2, #ICache_Size-ICache_Info] ADD a4, a4, #6 AND a3, a1, #CT_assoc_mask SUB a4, a4, a3, LSR #CT_assoc_pos AND a3, a1, #CT_len_mask ASSERT CT_len_pos = 0 SUB a4, a4, a3 MOV a4, ip, LSL a4 ; NSets = 1 << (size + 6 - assoc - len) STR a4, [a2, #ICache_NSets-ICache_Info] MOV pc, lr 80 MOV a1, #0 STR a1, [a2, #ICache_NSets-ICache_Info] STR a1, [a2, #ICache_Size-ICache_Info] STRB a1, [a2, #ICache_LineLen-ICache_Info] STRB a1, [a2, #ICache_Associativity-ICache_Info] MOV pc, lr ; Create a list of CPUs, 16 bytes per entry: ; ID bits (1 word) ; Test mask for ID (1 word) ; Cache type register value (1 word) ; Processor type (1 byte) ; Architecture type (1 byte) ; Reserved (2 bytes) GBLA tempcpu MACRO CPUDesc $proc, $id, $mask, $arch, $type, $s, $dsz, $das, $dln, $isz, $ias, $iln LCLA type type SETA (CT_ctype_$type:SHL:CT_ctype_pos)+($s:SHL:CT_S_pos) tempcpu CSzDesc $dsz, $das, $dln type SETA type+(tempcpu:SHL:CT_Dsize_pos) [ :LNOT:($s=0 :LAND: "$isz"="") tempcpu CSzDesc $isz, $ias, $iln ] type SETA type+(tempcpu:SHL:CT_Isize_pos) ASSERT ($id :AND: :NOT: $mask) = 0 DCD $id, $mask, type DCB $proc, $arch, 0, 0 MEND MACRO $var CSzDesc $sz, $as, $ln $var SETA (CT_size_$sz:SHL:CT_size_pos)+(CT_assoc_$as:SHL:CT_assoc_pos)+(CT_len_$ln:SHL:CT_len_pos) $var SETA $var+(CT_M_$sz:SHL:CT_M_pos) MEND ; CPUDesc table for ARMv3-ARMv6 KnownCPUTable ; /------Cache Type register fields-----\. ; ID reg Mask Arch Type S Dsz Das Dln Isz Ias Iln [ MEMM_Type = "ARM600" CPUDesc ARM600, &000600, &00FFF0, ARMv3, WT, 0, 4K, 64, 4 CPUDesc ARM610, &000610, &00FFF0, ARMv3, WT, 0, 4K, 64, 4 CPUDesc ARMunk, &000000, &00F000, ARMv3, WT, 0, 4K, 64, 4 CPUDesc ARM700, &007000, &FFFFF0, ARMv3, WT, 0, 8K, 4, 8 CPUDesc ARM710, &007100, &FFFFF0, ARMv3, WT, 0, 8K, 4, 8 CPUDesc ARM710a, &047100, &FDFFF0, ARMv3, WT, 0, 8K, 4, 4 CPUDesc ARM7500, &027100, &FFFFF0, ARMv3, WT, 0, 4K, 4, 4 CPUDesc ARM7500FE, &077100, &FFFFF0, ARMv3, WT, 0, 4K, 4, 4 CPUDesc ARMunk, &007000, &80F000, ARMv3, WT, 0, 8K, 4, 4 CPUDesc ARM720T, &807200, &FFFFF0, ARMv4T, WT, 0, 8K, 4, 4 CPUDesc ARMunk, &807000, &80F000, ARMv4T, WT, 0, 8K, 4, 4 CPUDesc SA110_preRevT, &01A100, &0FFFFC, ARMv4, WB_Crd, 1, 16K, 32, 8, 16K, 32, 8 CPUDesc SA110, &01A100, &0FFFF0, ARMv4, WB_Crd, 1, 16K, 32, 8, 16K, 32, 8 CPUDesc SA1100, &01A110, &0FFFF0, ARMv4, WB_Crd, 1, 8K, 32, 8, 16K, 32, 8 CPUDesc SA1110, &01B110, &0FFFF0, ARMv4, WB_Crd, 1, 8K, 32, 8, 16K, 32, 8 CPUDesc ARM920T, &029200, &0FFFF0, ARMv4T, WB_CR7_LDa, 1, 16K, 64, 8, 16K, 64, 8 CPUDesc ARM922T, &029220, &0FFFF0, ARMv4T, WB_CR7_LDa, 1, 8K, 64, 8, 8K, 64, 8 CPUDesc X80200, &052000, &0FFFF0, ARMv5TE, WB_Cal_LD, 1, 32K, 32, 8, 32K, 32, 8 CPUDesc X80321, &69052400, &FFFFF700, ARMv5TE, WB_Cal_LD, 1, 32K, 32, 8, 32K, 32, 8 ] ; MEMM_Type = "ARM600" DCD -1 [ MEMM_Type = "VMSAv6" ; Simplified CPUDesc table for ARMvF ; The cache size data is ignored for ARMv7. KnownCPUTable_Fancy CPUDesc ARM1176JZF_S, &00B760, &00FFF0, ARMvF, WB_CR7_LDc, 1, 16K, 4, 8, 16K, 4, 8 CPUDesc Cortex_A5, &00C050, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32,16, 16K, 32,16 CPUDesc Cortex_A7, &00C070, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32,16, 16K, 32,16 CPUDesc Cortex_A8, &00C080, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32,16, 16K, 32,16 CPUDesc Cortex_A9, &00C090, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A12, &00C0D0, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A15, &00C0F0, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A17, &00C0E0, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A53, &00D030, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A57, &00D070, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 CPUDesc Cortex_A72, &00D080, &00FFF0, ARMvF, WB_CR7_Lx, 1, 32K, 32,16, 32K, 32,16 DCD -1 ] ; MEMM_Type = "VMSAv6" ; Peculiar characteristics of individual ARMs not deducable otherwise. First field is ; flags to set, second flags to clear. KnownCPUFlags DCD 0, 0 ; ARM 600 DCD 0, 0 ; ARM 610 DCD 0, 0 ; ARM 700 DCD 0, 0 ; ARM 710 DCD 0, 0 ; ARM 710a DCD CPUFlag_AbortRestartBroken+CPUFlag_InterruptDelay, 0 ; SA 110 pre revT DCD CPUFlag_InterruptDelay, 0 ; SA 110 revT or later DCD 0, 0 ; ARM 7500 DCD 0, 0 ; ARM 7500FE DCD CPUFlag_InterruptDelay, 0 ; SA 1100 DCD CPUFlag_InterruptDelay, 0 ; SA 1110 DCD CPUFlag_NoWBDrain, 0 ; ARM 720T DCD 0, 0 ; ARM 920T DCD 0, 0 ; ARM 922T DCD CPUFlag_ExtendedPages+CPUFlag_XScale, 0 ; X80200 DCD CPUFlag_XScale, 0 ; X80321 DCD 0, 0 ; ARM1176JZF_S DCD 0, 0 ; Cortex_A5 DCD 0, 0 ; Cortex_A7 DCD 0, 0 ; Cortex_A8 DCD 0, 0 ; Cortex_A9 DCD 0, 0 ; Cortex_A12 DCD CPUFlag_NoDCacheDisable, 0 ; Cortex_A15 DCD 0, 0 ; Cortex_A17 DCD CPUFlag_NoDCacheDisable, 0 ; Cortex_A53 DCD 0, 0 ; Cortex_A57 DCD 0, 0 ; Cortex_A72 [ MEMM_Type = "VMSAv6" ; -------------------------------------------------------------------------- ; ----- ARM_Analyse_Fancy -------------------------------------------------- ; -------------------------------------------------------------------------- ; ; For ARMv7 ARMs (arch=&F), we can detect everything via the feature registers ; TODO - There's some stuff in here that can be tidied up/removed ; Things we need to set up: ; ProcessorType (as listed in hdr.ARMops) ; Cache_Type (CT_ctype_* from hdr:MEMM.ARM600) ; ProcessorArch (as reported by Init_ARMarch) ; ProcessorFlags (CPUFlag_* from hdr.ARMops) ; Proc_* (Cache/TLB/IMB/MMU function pointers) ; MMU_PCBTrans (Points to lookup table for translating page table cache options) ; ICache_*, DCache_* (ICache, DCache properties - optional, since not used externally?) ARM_Analyse_Fancy Push "v1,v2,v5,v6,v7,lr" ARM_read_ID v1 LDR v6, =ZeroPage ADRL v7, KnownCPUTable_Fancy 10 LDMIA v7!, {a1, a2} CMP a1, #-1 BEQ %FT20 AND a2, v1, a2 TEQ a1, a2 ADDNE v7, v7, #8 BNE %BT10 20 LDR v2, [v7] CMP a1, #-1 LDRNEB a2, [v7, #4] MOVEQ a2, #ARMunk STRB a2, [v6, #ProcessorType] AND a1, v2, #CT_ctype_mask MOV a1, a1, LSR #CT_ctype_pos STRB a1, [v6, #Cache_Type] ; STM should always store PC+8 ; Should always be base restored abort model ; 26bit has been obsolete for a long time MOV v5, #CPUFlag_StorePCplus8+CPUFlag_BaseRestored+CPUFlag_32bitOS+CPUFlag_No26bitMode [ HiProcVecs ORR v5, v5, #CPUFlag_HiProcVecs ] ; Work out whether the cache info is in ARMv6 or ARMv7 style ; Top 3 bits of the cache type register give the register format ARM_read_cachetype v2 MOV a1, v2, LSR #29 TEQ a1, #4 BEQ %FT25 TEQ a1, #0 BNE WeirdARMPanic ; ARMv6 format cache type register. ; CPUs like the ARM1176JZF-S are available with a range of cache sizes, ; so it's not safe to rely on the values in the CPU table. Fortunately ; all ARMv6 CPUs implement the register (by contrast, for the "plain" ; ARM case, no ARMv3 CPUs, some ARMv4 CPUs and all ARMv5 CPUs, so it ; needs to drop back to the table in some cases). MOV a1, v2, LSR #CT_Isize_pos ADD a2, v6, #ICache_Info BL EvaluateCache MOV a1, v2, LSR #CT_Dsize_pos ADD a2, v6, #DCache_Info BL EvaluateCache TST v2, #CT_S ORRNE v5, v5, #CPUFlag_SynchroniseCodeAreas+CPUFlag_SplitCache B %FT27 25 ; ARMv7 format cache type register. ; This should(!) mean that we have the cache level ID register, ; and all the other ARMv7 cache registers. ; Do we have a split cache? MRC p15, 1, a1, c0, c0, 1 AND a2, a1, #7 TEQ a2, #3 ORREQ v5, v5, #CPUFlag_SynchroniseCodeAreas+CPUFlag_SplitCache 27 [ CacheOff ORR v5, v5, #CPUFlag_SynchroniseCodeAreas | ARM_read_control a1 ; if Z bit set then we have branch prediction, TST a1, #MMUC_Z ; so we need OS_SynchroniseCodeAreas even if not ORRNE v5, v5, #CPUFlag_SynchroniseCodeAreas ; split caches ] BL Init_ARMarch STRB a1, [v6, #ProcessorArch] MRC p15, 0, a1, c0, c2, 2 TST a1, #&FF0000 ; MultU_instrs OR MultS_instrs ORRNE v5, v5, #CPUFlag_LongMul MRC p15, 0, a1, c0, c1, 0 TST a1, #&F0 ; State1 ORRNE v5, v5, #CPUFlag_Thumb MRC p15, 0, a1, c0, c2, 3 TST a1, #&F ; Saturate_instrs ORRNE v5, v5, #CPUFlag_DSP MRC p15, 0, a1, c0, c2, 0 TST a1, #&F ; Swap_instrs MRC p15, 0, a1, c0, c2, 4 TSTEQ a1, #&F0000000 ; SWP_frac ORREQ v5, v5, #CPUFlag_NoSWP MRC p15, 0, a2, c0, c2, 3 AND a2, a2, #&00F000 ; SynchPrim_instrs AND a1, a1, #&F00000 ; SynchPrim_instrs_frac ORR a1, a2, a1, LSR #12 TEQ a1, #2_00010000:SHL:8 ORREQ v5, v5, #CPUFlag_LoadStoreEx TEQ a1, #2_00010011:SHL:8 TEQNE a1, #2_00100000:SHL:8 ORREQ v5, v5, #CPUFlag_LoadStoreEx :OR: CPUFlag_LoadStoreClearExSizes ; Other flags not checked for above: ; CPUFlag_InterruptDelay ; CPUFlag_VectorReadException ; CPUFlag_ExtendedPages ; CPUFlag_NoWBDrain ; CPUFlag_AbortRestartBroken ; CPUFlag_XScale ; CPUFlag_XScaleJTAGconnected LDRB v4, [v6, #ProcessorType] TEQ v4, #ARMunk ; Modify deduced flags ADRNEL lr, KnownCPUFlags ADDNE lr, lr, v4, LSL #3 LDMNEIA lr, {a2, a3} ORRNE v5, v5, a2 BICNE v5, v5, a3 ORR v5, v5, #CPUFlag_ExtraReasonCodesFixed STR v5, [v6, #ProcessorFlags] ; Cache analysis LDRB a2, [v6, #Cache_Type] TEQ a2, #CT_ctype_WB_CR7_LDa ; eg. ARM9 TEQNE a2, #CT_ctype_WB_CR7_LDc ; eg. ARM1176JZF-S - differs only in cache lockdown BEQ Analyse_WB_CR7_LDa TEQ a2, #CT_ctype_WB_CR7_Lx BEQ Analyse_WB_CR7_Lx ; eg. Cortex-A8, Cortex-A9 ; others ... B WeirdARMPanic ; stiff :) ] ; MEMM_Type = "VMSAv6" ; -------------------------------------------------------------------------- ; ----- ARMops ------------------------------------------------------------- ; -------------------------------------------------------------------------- ; ; ARMops are the routines required by the kernel for cache/MMU control ; the kernel vectors to the appropriate ops for the given ARM at boot ; ; The Rules: ; - These routines may corrupt a1 and lr only ; - (lr can of course only be corrupted whilst still returning to correct ; link address) ; - stack is available, at least 16 words can be stacked ; - a NULL op would be a simple MOV pc, lr ; ; In: r1 = cache level (0-based) ; Out: r0 = Flags ; bits 0-2: cache type: ; 000 -> none ; 001 -> instruction ; 010 -> data ; 011 -> split ; 100 -> unified ; 1xx -> reserved ; Other bits: reserved ; r1 = D line length ; r2 = D size ; r3 = I line length ; r4 = I size ; r0-r4 = zero if cache level not present Cache_Examine_Simple TEQ r1, #0 MOVNE r0, #0 MOVNE r1, #0 MOVNE r2, #0 MOVNE r3, #0 MOVNE r4, #0 MOVNE pc, lr LDR r4, =ZeroPage LDR r0, [r4, #ProcessorFlags] TST r0, #CPUFlag_SplitCache MOVNE r0, #3 MOVEQ r0, #4 LDRB r1, [r4, #DCache_LineLen] LDR r2, [r4, #DCache_Size] LDRB r3, [r4, #ICache_LineLen] LDR r4, [r4, #ICache_Size] NullOp MOV pc, lr [ MEMM_Type = "ARM600" ; -------------------------------------------------------------------------- ; ----- ARMops for ARMv3 --------------------------------------------------- ; -------------------------------------------------------------------------- ; ; ARMv3 ARMs include ARM710, ARM610, ARM7500 ; Cache_Invalidate_ARMv3 MCR p15, 0, a1, c7, c0 MOV pc, lr DSB_ReadWrite_ARMv3 ;swap always forces unbuffered write, stalling till WB empty SUB sp, sp, #4 SWP a1, a1, [sp] ADD sp, sp, #4 MOV pc, lr TLB_Invalidate_ARMv3 MCR p15, 0, a1, c5, c0 MOV pc, lr ; a1 = page entry to invalidate (page aligned address) ; TLB_InvalidateEntry_ARMv3 MCR p15, 0, a1, c6, c0 MOV pc, lr MMU_Changing_ARMv3 [ CacheablePageTables SUB sp, sp, #4 SWP a1, a1, [sp] ADD sp, sp, #4 ] MCR p15, 0, a1, c5, c0 ; invalidate TLB MCR p15, 0, a1, c7, c0 ; invalidate cache MOV pc, lr MMU_ChangingUncached_ARMv3 [ CacheablePageTables SUB sp, sp, #4 SWP a1, a1, [sp] ADD sp, sp, #4 ] MCR p15, 0, a1, c5, c0 ; invalidate TLB MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_ARMv3 [ CacheablePageTables Push "a1" SWP a1, a1, [sp] ADD sp, sp, #4 ] MCR p15, 0, a1, c6, c0 ; invalidate TLB entry MCR p15, 0, a1, c7, c0 ; invalidate cache MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_ARMv3 ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_Changing_ARMv3 Push "a2" [ CacheablePageTables SWP a2, a2, [sp] ] 10 MCR p15, 0, a1, c6, c0 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 MCR p15, 0, a1, c7, c0 ; invalidate cache Pull "a2" MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry_ARMv3 [ CacheablePageTables Push "a1" SWP a1, a1, [sp] ADD sp, sp, #4 ] MCR p15, 0, a1, c6, c0 ; invalidate TLB entry MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_ARMv3 ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_ChangingUncached_ARMv3 Push "a2" [ CacheablePageTables SWP a2, a2, [sp] ] 10 MCR p15, 0, a1, c6, c0 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 Pull "a2" MOV pc, lr Cache_RangeThreshold_ARMv3 ! 0, "arbitrary Cache_RangeThreshold_ARMv3" MOV a1, #16*PageSize MOV pc, lr LTORG ; -------------------------------------------------------------------------- ; ----- generic ARMops for simple ARMs, ARMv4 onwards ---------------------- ; -------------------------------------------------------------------------- ; ; eg. ARM7TDMI based ARMs, unified, writethrough cache ; Cache_InvalidateUnified MOV a1, #0 MCR p15, 0, a1, c7, c7 MOV pc, lr DSB_ReadWrite_OffOn ; used if ARM has no drain WBuffer MCR op Push "a2" ARM_read_control a1 BIC a2, a1, #MMUC_W ARM_write_control a2 ARM_write_control a1 Pull "a2" MOV pc, lr DSB_ReadWrite ; used if ARM has proper drain WBuffer MCR op MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 MOV pc, lr TLB_Invalidate_Unified MOV a1, #0 MCR p15, 0, a1, c8, c7 MOV pc, lr ; a1 = page entry to invalidate (page aligned address) ; TLB_InvalidateEntry_Unified MCR p15, 0, a1, c8, c7, 1 MOV pc, lr MMU_Changing_Writethrough [ CacheablePageTables ; Yuck - this is probably going to be quite slow. Something to fix ; properly if/when we port to a system that uses this type of CPU. Push "lr" LDR a1, =ZeroPage ARMop DSB_ReadWrite,,,a1 Pull "lr" ] MOV a1, #0 MCR p15, 0, a1, c8, c7 ; invalidate TLB MCR p15, 0, a1, c7, c7 ; invalidate cache MOV pc, lr MMU_ChangingUncached [ CacheablePageTables Push "lr" LDR a1, =ZeroPage ARMop DSB_ReadWrite,,,a1 Pull "lr" ] MOV a1, #0 MCR p15, 0, a1, c8, c7 ; invalidate TLB MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_Writethrough [ CacheablePageTables Push "a1,lr" LDR a1, =ZeroPage ARMop DSB_ReadWrite,,,a1 Pull "a1,lr" ] MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry MOV a1, #0 MCR p15, 0, a1, c7, c7 ; invalidate cache MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_Writethrough ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_Changing_Writethrough Push "a2" [ CacheablePageTables Push "a1,lr" LDR a1, =ZeroPage ARMop DSB_ReadWrite,,,a1 Pull "a1,lr" ] 10 MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 MCR p15, 0, a2, c7, c7 ; invalidate cache Pull "a2" MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry [ CacheablePageTables Push "a1,lr" LDR a1, =ZeroPage ARMop DSB_ReadWrite,,,a1 Pull "a1,lr" ] MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry MOV pc, lr ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries ROUT CMP a2, #16 ; arbitrary-ish threshold BHS MMU_ChangingUncached Push "a2" [ CacheablePageTables Push "a1,lr" LDR a1, =ZeroPage ARMop DSB_ReadWrite,,,a1 Pull "a1,lr" ] 10 MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry SUBS a2, a2, #1 ; next page ADD a1, a1, #PageSize BNE %BT10 Pull "a2" MOV pc, lr Cache_RangeThreshold_Writethrough ! 0, "arbitrary Cache_RangeThreshold_Writethrough" MOV a1, #16*PageSize MOV pc, lr ] ; MEMM_Type = "ARM600" ; -------------------------------------------------------------------------- ; ----- ARMops for ARM9 and the like --------------------------------------- ; -------------------------------------------------------------------------- ; WB_CR7_LDa refers to ARMs with writeback data cache, cleaned with ; register 7, lockdown available (format A) ; ; Note that ARM920 etc have writeback/writethrough data cache selectable ; by MMU regions. For simpliciity, we assume cacheable pages are mostly ; writeback. Any writethrough pages will have redundant clean operations ; applied when moved, for example, but this is a small overhead (cleaning ; a clean line is very quick on ARM 9). Cache_CleanAll_WB_CR7_LDa ROUT ; ; only guarantees to clean lines not involved in interrupts (so we can ; clean without disabling interrupts) ; ; Clean cache by traversing all segment and index values ; As a concrete example, for ARM 920 (16k+16k caches) we would have: ; ; DCache_LineLen = 32 (32 byte cache line, segment field starts at bit 5) ; DCache_IndexBit = &04000000 (index field starts at bit 26) ; DCache_IndexSegStart = &000000E0 (start at index=0, segment = 7) ; Push "a2, ip" LDR ip, =ZeroPage LDRB a1, [ip, #DCache_LineLen] ; segment field starts at this bit LDR a2, [ip, #DCache_IndexBit] ; index field starts at this bit LDR ip, [ip, #DCache_IndexSegStart] ; starting value, with index at min, seg at max 10 MCR p15, 0, ip, c7, c10, 2 ; clean DCache entry by segment/index ADDS ip, ip, a2 ; next index, counting up, CS if wrapped back to 0 BCC %BT10 SUBS ip, ip, a1 ; next segment, counting down, CC if wrapped back to max BCS %BT10 ; if segment wrapped, then we've finished MOV ip, #0 MCR p15, 0, ip, c7, c10, 4 ; drain WBuffer Pull "a2, ip" MOV pc, lr Cache_CleanInvalidateAll_WB_CR7_LDa ROUT ; ; similar to Cache_CleanAll, but does clean&invalidate of Dcache, and invalidates ICache ; Push "a2, ip" LDR ip, =ZeroPage LDRB a1, [ip, #DCache_LineLen] ; segment field starts at this bit LDR a2, [ip, #DCache_IndexBit] ; index field starts at this bit LDR ip, [ip, #DCache_IndexSegStart] ; starting value, with index at min, seg at max 10 MCR p15, 0, ip, c7, c14, 2 ; clean&invalidate DCache entry by segment/index ADDS ip, ip, a2 ; next index, counting up, CS if wrapped back to 0 BCC %BT10 SUBS ip, ip, a1 ; next segment, counting down, CC if wrapped back to max BCS %BT10 ; if segment wrapped, then we've finished MOV ip, #0 MCR p15, 0, ip, c7, c10, 4 ; drain WBuffer MCR p15, 0, ip, c7, c5, 0 ; invalidate ICache Pull "a2, ip" MOV pc, lr ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; [ MEMM_Type = "ARM600" Cache_CleanInvalidateRange_WB_CR7_LDa ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanInvalidateAll_WB_CR7_LDa Cache_CleanRange_WB_CR7_LDa ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanAll_WB_CR7_LDa Cache_InvalidateRange_WB_CR7_LDa ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3, LSL #1 ;assume clean+invalidate slower than just invalidate BHS %FT30 ADD a2, a2, a1 ;end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanInvalidateAll_WB_CR7_LDa | ; Bodge for ARM11 ; The OS assumes that address-based cache maintenance operations will operate ; on pages which are currently marked non-cacheable (so that we can make a page ; non-cacheable and then clean/invalidate the cache, to ensure prefetch or ; anything else doesn't pull any data for the page back into the cache once ; we've cleaned it). For ARMv7+ this is guaranteed behaviour, but prior to that ; it's implementation defined, and the ARM11 in particular seems to ignore ; address-based maintenance which target non-cacheable addresses. ; As a workaround, perform a full clean & invalidate instead ; ; Note that this also provides us protection against erratum 720013 (or possibly ; it's that erratum which I was experiencing when I first made this change) Cache_CleanInvalidateRange_WB_CR7_LDa * Cache_CleanInvalidateAll_WB_CR7_LDa Cache_CleanRange_WB_CR7_LDa * Cache_CleanAll_WB_CR7_LDa Cache_InvalidateRange_WB_CR7_LDa * Cache_CleanInvalidateAll_WB_CR7_LDa ] Cache_InvalidateAll_WB_CR7_LDa ROUT ; ; no clean, assume caller knows what's happening ; MOV a1, #0 MCR p15, 0, a1, c7, c7, 0 ; invalidate ICache and DCache MOV pc, lr Cache_RangeThreshold_WB_CR7_LDa ROUT LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr ICache_InvalidateAll_WB_CR7_LDa ROUT MOV a1, #0 MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache + branch predictors MOV pc, lr [ MEMM_Type = "ARM600" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; ICache_InvalidateRange_WB_CR7_LDa ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; arbitrary-ish range threshold ADD a2, a2, a1 BHS ICache_InvalidateAll_WB_CR7_LDa Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] 10 MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "pc" | ; ARM11 erratum 720013: I-cache invalidation can fail ; One workaround (for MVA ops) is to perform the operation twice, but that would ; presumably need interrupts to be disabled to be fully safe. So go with the ; other workaround of doing a full invalidate instead. ICache_InvalidateRange_WB_CR7_LDa * ICache_InvalidateAll_WB_CR7_LDa ] MMU_ChangingUncached_WB_CR7_LDa ROUT [ CacheablePageTables MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "VMSAv6" MCR p15, 0, a1, c7, c5, 4 ; ISB ] ] TLB_InvalidateAll_WB_CR7_LDa MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry_WB_CR7_LDa ROUT [ CacheablePageTables Push "a1" MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "VMSAv6" MCR p15, 0, a1, c7, c5, 4 ; ISB ] Pull "a1" ] TLB_InvalidateEntry_WB_CR7_LDa MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MOV pc, lr DSB_ReadWrite_WB_CR7_LDa ROUT MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MOV pc, lr IMB_Full_WB_CR7_LDa ROUT ; ; do: clean DCache; drain WBuffer, invalidate ICache ; Push "lr" BL Cache_CleanAll_WB_CR7_LDa ; also drains Wbuffer MOV a1, #0 MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache Pull "pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; IMB_Range_WB_CR7_LDa ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; arbitrary-ish range threshold ADD a2, a2, a1 BHS IMB_Full_WB_CR7_LDa Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry by VA [ MEMM_Type = "ARM600" MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, lr CMP a1, a2 BLO %BT10 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "ARM600" MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors | MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache + branch predictors (erratum 720013) ] Pull "pc" ; a1 = pointer to list of (start, end) address pairs ; a2 = pointer to end of list ; a3 = total amount of memory to be synchronised ; IMB_List_WB_CR7_LDa ROUT CMP a3, #32*1024 ; arbitrary-ish range threshold BHS IMB_Full_WB_CR7_LDa Push "v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c10, 1 ; clean DCache entry by VA [ MEMM_Type = "ARM600" MCR p15, 0, v1, c7, c5, 1 ; invalidate ICache entry ] ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "ARM600" MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors | MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache + branch predictors (erratum 720013) ] Pull "v1-v2,pc" MMU_Changing_WB_CR7_LDa ROUT [ CacheablePageTables Push "a1" MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "VMSAv6" MCR p15, 0, a1, c7, c5, 4 ; ISB ] Pull "a1" ] MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB B Cache_CleanInvalidateAll_WB_CR7_LDa ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_WB_CR7_LDa ROUT [ CacheablePageTables Push "a1" MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "VMSAv6" MCR p15, 0, a1, c7, c5, 4 ; ISB ] Pull "a1" ] [ MEMM_Type = "ARM600" Push "a2, lr" MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a2, a1, #PageSize LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MOV lr, #0 MCR p15, 0, lr, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "a2, pc" | ; See above re: ARM11 cache cleaning not working on non-cacheable pages MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry B Cache_CleanInvalidateAll_WB_CR7_LDa ] ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_WB_CR7_LDa ROUT Push "a2, a3, lr" [ CacheablePageTables MOV a3, #0 MCR p15, 0, a3, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "VMSAv6" MCR p15, 0, a3, c7, c5, 4 ; ISB ] ] MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a1, a1, #PageSize CMP a1, a2 BLO %BT10 [ MEMM_Type = "ARM600" MOV a1, lr ; restore start address 20 MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT20 MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer MCR p15, 0, a1, c7, c5, 6 ; flush branch predictors Pull "a2, a3, pc" ; | ; See above re: ARM11 cache cleaning not working on non-cacheable pages B %FT40 ] 30 MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB 40 BL Cache_CleanInvalidateAll_WB_CR7_LDa Pull "a2, a3, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_WB_CR7_LDa ROUT [ CacheablePageTables Push "a1" MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ MEMM_Type = "VMSAv6" MCR p15, 0, a1, c7, c5, 4 ; ISB ] Pull "a1" ] CMP a2, #32 ; arbitrary-ish threshold BHS %FT20 Push "a2" 10 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a1, a1, #PageSize SUBS a2, a2, #1 BNE %BT10 Pull "a2" MOV pc, lr ; 20 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB MOV pc, lr [ MEMM_Type = "ARM600" ; -------------------------------------------------------------------------- ; ----- ARMops for StrongARM and the like ---------------------------------- ; -------------------------------------------------------------------------- ; WB_Crd is Writeback data cache, clean by reading data from cleaner area ; Currently no support for mini data cache on some StrongARM variants. Mini ; cache is always writeback and must have cleaning support, so is very ; awkward to use for cacheable screen, say. ; Global cache cleaning requires address space for private cleaner areas (not accessed ; for any other reason). Cleaning is normally with interrupts enabled (to avoid a latency ; hit), which means that the cleaner data is not invalidated afterwards. This is fine for ; RISC OS - where the private area is not used for anything else, and any re-use of the ; cache under interrupts is safe (eg. a page being moved is *never* involved in any ; active interrupts). ; Mostly, cleaning toggles between two separate cache-sized areas, which gives minimum ; cleaning cost while guaranteeing proper clean even if previous clean data is present. If ; the clean routine is re-entered, an independent, double sized clean is initiated. This ; guarantees proper cleaning (regardless of multiple re-entrancy) whilst hardly complicating ; the routine at all. The overhead is small, since by far the most common cleaning will be ; non-re-entered. The upshot is that the cleaner address space available must be at least 4 ; times the cache size: ; 1 : used alternately, on 1st, 3rd, ... non-re-entered cleans ; 2 : used alternately, on 2nd, 4th, ... non-re-entered cleans ; 3 : used only for first half of a re-entered clean ; 4 : used only for second half of a re-entered clean ; ; DCache_CleanBaseAddress : start address of total cleaner space ; DCache_CleanNextAddress : start address for next non-re-entered clean, or 0 if re-entered Cache_CleanAll_WB_Crd ROUT ; ; - cleans data cache (and invalidates it as a side effect) ; - can be used with interrupts enabled (to avoid latency over time of clean) ; - can be re-entered ; - see remarks at top of StrongARM ops for discussion of strategy ; Push "a2-a4, v1, v2, lr" LDR lr, =ZeroPage LDR a1, [lr, #DCache_CleanBaseAddress] LDR a2, =DCache_CleanNextAddress LDR a3, [lr, #DCache_Size] LDRB a4, [lr, #DCache_LineLen] MOV v2, #0 SWP v1, v2, [a2] ; read current CleanNextAddr, zero it (semaphore) TEQ v1, #0 ; but if it is already zero, we have re-entered ADDEQ v1, a1, a3, LSL #1 ; if re-entered, start clean at Base+2*Cache_Size ADDEQ v2, v1, a3, LSL #1 ; if re-entered, do a clean of 2*Cache_Size ADDNE v2, v1, a3 ; if not re-entered, do a clean of Cache_Size 10 LDR lr, [v1], a4 TEQ v1, v2 BNE %BT10 ADD v2, a1, a3, LSL #1 ; compare end address with Base+2*Cache_Size CMP v1, v2 MOVEQ v1, a1 ; if equal, not re-entered and Next wraps back STRLS v1, [a2] ; if lower or same, not re-entered, so update Next MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer Pull "a2-a4, v1, v2, pc" Cache_CleanInvalidateAll_WB_Crd ROUT IMB_Full_WB_Crd ; ;does not truly invalidate DCache, but effectively invalidates (flushes) all lines not ;involved in interrupts - this is sufficient for OS requirements, and means we don't ;have to disable interrupts for possibly slow clean ; Push "lr" BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "pc" Cache_InvalidateAll_WB_Crd ; ; no clean, assume caller knows what is happening ; MCR p15, 0, a1, c7, c7, 0 ;flush ICache and DCache MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MOV pc, lr Cache_RangeThreshold_WB_Crd LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr MMU_ChangingUncached_WB_Crd [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer ] TLB_InvalidateAll_WB_Crd MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB MOV pc, lr MMU_ChangingUncachedEntry_WB_Crd [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer ] TLB_InvalidateEntry_WB_Crd MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry MCR p15, 0, a1, c8, c5, 0 ;flush ITLB MOV pc, lr DSB_ReadWrite_WB_Crd MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MOV pc, lr IMB_Range_WB_Crd ROUT SUB a2, a2, a1 CMP a2, #64*1024 ;arbitrary-ish range threshold ADD a2, a2, a1 BHS IMB_Full_WB_Crd Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "pc" IMB_List_WB_Crd ROUT CMP a3, #64*1024 ;arbitrary-ish range threshold BHS IMB_Full_WB_Crd Push "v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c10, 1 ;clean DCache entry ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "v1-v2,pc" MMU_Changing_WB_Crd Push "lr" [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer ] MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "pc" MMU_ChangingEntry_WB_Crd ROUT ; ;there is no clean&invalidate DCache instruction, however we can do clean ;entry followed by invalidate entry without an interrupt hole, because they ;are for the same virtual address (and that virtual address will not be ;involved in interrupts, since it is involved in remapping) ; [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer ] Push "a2, lr" ADD a2, a1, #PageSize LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry MCR p15, 0, a1, c8, c5, 0 ;flush ITLB 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry MCR p15, 0, a1, c7, c6, 1 ;flush DCache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 SUB a1, a1, #PageSize MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, pc" MMU_ChangingEntries_WB_Crd ROUT ; ;same comments as MMU_ChangingEntry_WB_Crd ; [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer ] Push "a2, a3, lr" MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry ADD a1, a1, #PageSize CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c8, c5, 0 ;flush ITLB MOV a1, lr ;restore start address 20 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry MCR p15, 0, a1, c7, c6, 1 ;flush DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT20 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, a3, pc" ; 30 MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, a3, pc" Cache_CleanRange_WB_Crd ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer Pull "a2, a3, pc" ; 30 BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) Pull "a2, a3, pc" Cache_InvalidateRange_WB_Crd ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3, LSL #1 ;assume clean+invalidate slower than just invalidate BHS %FT30 ADD a2, a2, a1 ;end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c6, 1 ;flush DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, a3, pc" ; 30 BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, a3, pc" Cache_CleanInvalidateRange_WB_Crd ROUT ; ;same comments as MMU_ChangingEntry_WB_Crd ; Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ;clean DCache entry MCR p15, 0, a1, c7, c6, 1 ;flush DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ;drain WBuffer MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, a3, pc" ; 30 BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ;flush ICache Pull "a2, a3, pc" MMU_ChangingUncachedEntries_WB_Crd ROUT [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer ] CMP a2, #32 ;arbitrary-ish threshold BHS %FT20 Push "lr" MOV lr, a2 10 MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry ADD a1, a1, #PageSize SUBS lr, lr, #1 BNE %BT10 MCR p15, 0, a1, c8, c5, 0 ;flush ITLB Pull "pc" ; 20 MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB MOV pc, lr ICache_InvalidateAll_WB_Crd ROUT ICache_InvalidateRange_WB_Crd MCR p15, 0, a1, c7, c5, 0 ;flush ICache MOV pc, lr LTORG ; ARMops for XScale, mjs Feb 2001 ; ; WB_Cal_LD is writeback, clean with allocate, lockdown ; ; If the mini data cache is used (XScaleMiniCache true), it is assumed to be ; configured writethrough (eg. used for RISC OS screen memory). This saves an ugly/slow ; mini cache clean for things like IMB_Full. ; ; Sadly, for global cache invalidate with mini cache, things are awkward. We can't clean the ; main cache then do the global invalidate MCR, unless we tolerate having _all_ interrupts ; off (else the main cache may be slightly dirty from interrupts, and the invalidate ; will lose data). So we must reluctantly 'invalidate' the mini cache by the ugly/slow ; mechanism as if we were cleaning it :-( Intel should provide a separate global invalidate ; (and perhaps a line allocate) for the mini cache. ; ; We do not use lockdown. ; ; For simplicity, we assume cacheable pages are mostly writeback. Any writethrough ; pages will be invalidated as if they were writeback, but there is little overhead ; (cleaning a clean line or allocating a line from cleaner area are both fast). ; Global cache cleaning requires address space for private cleaner areas (not accessed ; for any other reason). Cleaning is normally with interrupts enabled (to avoid a latency ; hit), which means that the cleaner data is not invalidated afterwards. This is fine for ; RISC OS - where the private area is not used for anything else, and any re-use of the ; cache under interrupts is safe (eg. a page being moved is *never* involved in any ; active interrupts). ; Mostly, cleaning toggles between two separate cache-sized areas, which gives minimum ; cleaning cost while guaranteeing proper clean even if previous clean data is present. If ; the clean routine is re-entered, an independent, double sized clean is initiated. This ; guarantees proper cleaning (regardless of multiple re-entrancy) whilst hardly complicating ; the routine at all. The overhead is small, since by far the most common cleaning will be ; non-re-entered. The upshot is that the cleaner address space available must be at least 4 ; times the cache size: ; 1 : used alternately, on 1st, 3rd, ... non-re-entered cleans ; 2 : used alternately, on 2nd, 4th, ... non-re-entered cleans ; 3 : used only for first half of a re-entered clean ; 4 : used only for second half of a re-entered clean ; ; If the mini cache is used, it has its own equivalent cleaner space and algorithm. ; Parameters for each cache are: ; ; Cache_CleanBaseAddress : start address of total cleaner space ; Cache_CleanNextAddress : start address for next non-re-entered clean, or 0 if re-entered GBLL XScaleMiniCache ; *must* be configured writethrough if used XScaleMiniCache SETL {FALSE} ; MACRO to do Intel approved CPWAIT, to guarantee any previous MCR's have taken effect ; corrupts a1 ; MACRO CPWAIT MRC p15, 0, a1, c2, c0, 0 ; arbitrary read of CP15 MOV a1, a1 ; wait for it ; SUB pc, pc, #4 omitted, because all ops have a pc load to return to caller MEND Cache_CleanAll_WB_Cal_LD ROUT ; ; - cleans main cache (and invalidates as a side effect) ; - if mini cache is in use, will be writethrough so no clean required ; - can be used with interrupts enabled (to avoid latency over time of clean) ; - can be re-entered ; - see remarks at top of XScale ops for discussion of strategy ; Push "a2-a4, v1, v2, lr" LDR lr, =ZeroPage LDR a1, [lr, #DCache_CleanBaseAddress] LDR a2, =ZeroPage+DCache_CleanNextAddress LDR a3, [lr, #DCache_Size] LDRB a4, [lr, #DCache_LineLen] MOV v2, #0 SWP v1, v2, [a2] ; read current CleanNextAddr, zero it (semaphore) TEQ v1, #0 ; but if it is already zero, we have re-entered ADDEQ v1, a1, a3, LSL #1 ; if re-entered, start clean at Base+2*Cache_Size ADDEQ v2, v1, a3, LSL #1 ; if re-entered, do a clean of 2*Cache_Size ADDNE v2, v1, a3 ; if not re-entered, do a clean of Cache_Size 10 MCR p15, 0, v1, c7, c2, 5 ; allocate address from cleaner space ADD v1, v1, a4 TEQ v1, v2 BNE %BT10 ADD v2, a1, a3, LSL #1 ; compare end address with Base+2*Cache_Size CMP v1, v2 MOVEQ v1, a1 ; if equal, not re-entered and Next wraps back STRLS v1, [a2] ; if lower or same, not re-entered, so update Next MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "a2-a4, v1, v2, pc" [ XScaleMiniCache Cache_MiniInvalidateAll_WB_Cal_LD ROUT ; ; similar to Cache_CleanAll_WB_Cal_LD, but must do direct reads (cannot use allocate address MCR), and ; 'cleans' to achieve invalidate as side effect (mini cache will be configured writethrough) ; Push "a2-a4, v1, v2, lr" LDR lr, =ZeroPage LDR a1, [lr, #MCache_CleanBaseAddress] LDR a2, =ZeroPage+MCache_CleanNextAddr LDR a3, [lr, #MCache_Size] LDRB a4, [lr, #MCache_LineLen] MOV v2, #0 SWP v1, v2, [a2] ; read current CleanNextAddr, zero it (semaphore) TEQ v1, #0 ; but if it is already zero, we have re-entered ADDEQ v1, a1, a3, LSL #1 ; if re-entered, start clean at Base+2*Cache_Size ADDEQ v2, v1, a3, LSL #1 ; if re-entered, do a clean of 2*Cache_Size ADDNE v2, v1, a3 ; if not re-entered, do a clean of Cache_Size 10 LDR lr, [v1], a4 ; read a line of cleaner data TEQ v1, v2 BNE %BT10 ADD v2, a1, a3, LSL #1 ; compare end address with Base+2*Size CMP v1, v2 MOVEQ v1, a1 ; if equal, not re-entered and Next wraps back STRLS v1, [a2] ; if lower or same, not re-entered, so update Next ; note, no drain WBuffer, since we are really only invalidating a writethrough cache Pull "a2-a4, v1, v2, pc" ] ; XScaleMiniCache Cache_CleanInvalidateAll_WB_Cal_LD ROUT ; ; - cleans main cache (and invalidates wrt OS stuff as a side effect) ; - if mini cache in use (will be writethrough), 'cleans' in order to invalidate as side effect ; Push "lr" BL Cache_CleanAll_WB_Cal_LD [ XScaleMiniCache BL Cache_MiniInvalidateAll_WB_Cal_LD ] MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB CPWAIT Pull "pc" Cache_InvalidateAll_WB_Cal_LD ROUT ; ; no clean, assume caller knows what's happening ; MCR p15, 0, a1, c7, c7, 0 ; invalidate DCache, (MiniCache), ICache and BTB CPWAIT MOV pc, lr Cache_RangeThreshold_WB_Cal_LD ROUT LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr MMU_ChangingUncached_WB_Cal_LD ROUT [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) ] TLB_InvalidateAll_WB_Cal_LD MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB CPWAIT MOV pc, lr MMU_ChangingUncachedEntry_WB_Cal_LD ROUT [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) ] TLB_InvalidateEntry_WB_Cal_LD MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry CPWAIT MOV pc, lr DSB_ReadWrite_WB_Cal_LD ROUT MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) MOV pc, lr IMB_Full_WB_Cal_LD Push "lr" BL Cache_CleanAll_WB_Cal_LD ; clean DCache (wrt to non-interrupt stuff) MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB CPWAIT Pull "pc" IMB_Range_WB_Cal_LD ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; arbitrary-ish range threshold ADD a2, a2, a1 BHS IMB_Full_WB_Cal_LD Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, lr CMP a1, a2 BLO %BT10 [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "pc" IMB_List_WB_Cal_LD ROUT CMP a3, #32*1024 ; arbitrary-ish range threshold BHS IMB_Full_WB_Cal_LD Push "v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 MCR p15, 0, v1, c7, c10, 1 ; clean DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, v1, c7, c5, 1 ; invalidate ICache entry ] ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "v1-v2,pc" MMU_Changing_WB_Cal_LD ROUT [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) ] Push "lr" MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB BL Cache_CleanAll_WB_Cal_LD MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB CPWAIT Pull "pc" MMU_ChangingEntry_WB_Cal_LD ROUT ; ;there is no clean&invalidate DCache instruction, however we can do clean ;entry followed by invalidate entry without an interrupt hole, because they ;are for the same virtual address (and that virtual address will not be ;involved in interrupts, since it is involved in remapping) ; [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) ] Push "a2, lr" ADD a2, a1, #PageSize LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, lr CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] CPWAIT Pull "a2, pc" MMU_ChangingEntries_WB_Cal_LD ROUT ; ;same comments as MMU_ChangingEntry_WB_Cal_LD ; [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) ] Push "a2, a3, lr" MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry ADD a1, a1, #PageSize CMP a1, a2 BLO %BT10 MOV a1, lr ; restore start address 20 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, a3 CMP a1, a2 BLO %BT20 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] CPWAIT Pull "a2, a3, pc" ; 30 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB BL Cache_CleanInvalidateAll_WB_Cal_LD CPWAIT Pull "a2, a3, pc" Cache_CleanRange_WB_Cal_LD ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanAll_WB_Cal_LD Cache_InvalidateRange_WB_Cal_LD ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3, LSL #1 ;assume clean+invalidate slower than just invalidate BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] CPWAIT Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanInvalidateAll_WB_Cal_LD Cache_CleanInvalidateRange_WB_Cal_LD ROUT ; ;same comments as MMU_ChangingEntry_WB_Cal_LD ; Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 MCR p15, 0, a1, c7, c10, 1 ; clean DCache entry MCR p15, 0, a1, c7, c6, 1 ; invalidate DCache entry [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, a3 CMP a1, a2 BLO %BT10 MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] CPWAIT Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanInvalidateAll_WB_Cal_LD MMU_ChangingUncachedEntries_WB_Cal_LD ROUT [ CacheablePageTables MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT) ] CMP a2, #32 ; arbitrary-ish threshold BHS %FT20 Push "lr" MOV lr, a2 10 MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry SUBS lr, lr, #1 ADD a1, a1, #PageSize BNE %BT10 CPWAIT Pull "pc" ; 20 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB CPWAIT MOV pc, lr ICache_InvalidateRange_WB_Cal_LD ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] 10 [ :LNOT:XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry ] ADD a1, a1, a3 CMP a1, a2 BLO %BT10 [ XScaleJTAGDebug MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB | MCR p15, 0, a1, c7, c5, 6 ; invalidate BTB ] CPWAIT Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B ICache_InvalidateAll_WB_Cal_LD ICache_InvalidateAll_WB_Cal_LD MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache and BTB CPWAIT MOV pc, lr ] ; MEMM_Type = "ARM600" [ MEMM_Type = "VMSAv6" ; Need appropriate myIMB, etc. implementations if this is to be removed ; -------------------------------------------------------------------------- ; ----- ARMops for Cortex-A8 and the like ---------------------------------- ; -------------------------------------------------------------------------- ; WB_CR7_Lx refers to ARMs with writeback data cache, cleaned with ; register 7, and (potentially) multiple cache levels ; ; DCache_LineLen = smallest data/unified cache line length ; ICache_LineLen = smallest instruction cache line length ; DCache_RangeThreshold = clean threshold for data cache ; Cache_Lx_Info = Cache level ID register ; Cache_Lx_DTable = Cache size identification register for all 7 data/unified caches ; Cache_Lx_ITable = Cache size identification register for all 7 instruction caches ; ARMv7 cache maintenance routines are a bit long-winded, so we use this macro ; to reduce the risk of mistakes creeping in due to code duplication ; ; $op: Operation to perform ('clean', 'invalidate', 'cleaninvalidate') ; $levels: Which levels to apply to ('lou', 'loc', 'louis') ; Uses r0-r8 & lr as temp ; Performs the indicated op on the indicated data & unified caches ; ; Code based around the alternate/faster code given in the ARMv7 ARM (section ; B2.2.4, alternate/faster code only in doc revision 9), but tightened up a bit ; ; Note that HAL_InvalidateCache_ARMvF uses its own implementation of this ; algorithm, since it must cope with different temporary registers and it needs ; to read the cache info straight from the CP15 registers ; MACRO MaintainDataCache_WB_CR7_Lx $op, $levels LDR lr, =ZeroPage LDR r0, [lr, #Cache_Lx_Info]! ADD lr, lr, #Cache_Lx_DTable-Cache_Lx_Info [ "$levels"="lou" ANDS r3, r0, #&38000000 MOV r3, r3, LSR #26 ; Cache level value (naturally aligned) | [ "$levels"="loc" ANDS r3, r0, #&07000000 MOV r3, r3, LSR #23 ; Cache level value (naturally aligned) | [ "$levels"="louis" ANDS r3, r0, #&00E00000 MOV r3, r3, LSR #20 ; Cache level value (naturally aligned) | ! 1, "Unrecognised levels" ] ] ] BEQ %FT50 MOV r8, #0 ; Current cache level 10 ; Loop1 ADD r2, r8, r8, LSR #1 ; Work out 3 x cachelevel MOV r1, r0, LSR r2 ; bottom 3 bits are the Cache type for this level AND r1, r1, #7 ; get those 3 bits alone CMP r1, #2 BLT %FT40 ; no cache or only instruction cache at this level LDR r1, [lr, r8, LSL #1] ; read CCSIDR to r1 AND r2, r1, #CCSIDR_LineSize_mask ; extract the line length field ADD r2, r2, #4 ; add 4 for the line length offset (log2 16 bytes) LDR r7, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r7, r7, r1, LSR #CCSIDR_Associativity_pos ; r7 is the max number on the way size (right aligned) CLZ r5, r7 ; r5 is the bit position of the way size increment LDR r4, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos AND r4, r4, r1, LSR #CCSIDR_NumSets_pos ; r4 is the max number of the index size (right aligned) 20 ; Loop2 MOV r1, r4 ; r1 working copy of the max index size (right aligned) 30 ; Loop3 ORR r6, r8, r7, LSL r5 ; factor in the way number and cache number into r6 ORR r6, r6, r1, LSL r2 ; factor in the index number [ "$op"="clean" DCCSW r6 ; Clean | [ "$op"="invalidate" DCISW r6 ; Invalidate | [ "$op"="cleaninvalidate" DCCISW r6 ; Clean & invalidate | ! 1, "Unrecognised op" ] ] ] SUBS r1, r1, #1 ; decrement the index BGE %BT30 SUBS r7, r7, #1 ; decrement the way number BGE %BT20 DSB ; Cortex-A7 errata 814220: DSB required when changing cache levels when using set/way operations. This also counts as our end-of-maintenance DSB. 40 ; Skip ADD r8, r8, #2 CMP r3, r8 BGT %BT10 50 ; Finished MEND Cache_CleanAll_WB_CR7_Lx ROUT ; Clean cache by traversing all sets and ways for all data caches Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx clean, loc Pull "r1-r8,pc" Cache_CleanInvalidateAll_WB_CR7_Lx ROUT ; ; similar to Cache_CleanAll, but does clean&invalidate of Dcache, and invalidates ICache ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx cleaninvalidate, loc ICIALLU ; invalidate ICache + branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" Cache_InvalidateAll_WB_CR7_Lx ROUT ; ; no clean, assume caller knows what's happening ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx invalidate, loc ICIALLU ; invalidate ICache + branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" Cache_RangeThreshold_WB_CR7_Lx ROUT LDR a1, =ZeroPage LDR a1, [a1, #DCache_RangeThreshold] MOV pc, lr ; In: r1 = cache level (0-based) ; Out: r0 = Flags ; bits 0-2: cache type: ; 000 -> none ; 001 -> instruction ; 010 -> data ; 011 -> split ; 100 -> unified ; 1xx -> reserved ; Other bits: reserved ; r1 = D line length ; r2 = D size ; r3 = I line length ; r4 = I size ; r0-r4 = zero if cache level not present Cache_Examine_WB_CR7_Lx ROUT Entry "r5" LDR r5, =ZeroPage LDR r0, [r5, #Cache_Lx_Info]! ADD r5, r5, #Cache_Lx_DTable-Cache_Lx_Info BIC r0, r0, #&00E00000 ; Shift the CLIDR until we hit a zero entry or the desired level ; (could shift by exactly the amount we want... but ARM say not to do ; that since they may decide to re-use bits) 10 TEQ r1, #0 TSTNE r0, #7 SUBNE r1, r1, #1 MOVNE r0, r0, LSR #3 ADDNE r5, r5, #4 BNE %BT10 ANDS r0, r0, #7 MOV r1, #0 MOV r2, #0 MOV r3, #0 MOV r4, #0 EXIT EQ TST r0, #6 ; Data or unified cache present? BEQ %FT20 LDR lr, [r5] LDR r1, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos LDR r2, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r1, r1, lr, LSR #CCSIDR_NumSets_pos AND r2, r2, lr, LSR #CCSIDR_Associativity_pos ADD r1, r1, #1 ADD r2, r2, #1 MUL r2, r1, r2 AND r1, lr, #CCSIDR_LineSize_mask ASSERT CCSIDR_LineSize_pos = 0 MOV lr, #16 MOV r1, lr, LSL r1 MUL r2, r1, r2 20 TEQ r0, #4 ; Unified cache? MOVEQ r3, r1 MOVEQ r4, r2 TST r0, #1 ; Instruction cache present? EXIT EQ LDR lr, [r5, #Cache_Lx_ITable-Cache_Lx_DTable] LDR r3, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos LDR r4, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r3, r3, lr, LSR #CCSIDR_NumSets_pos AND r4, r4, lr, LSR #CCSIDR_Associativity_pos ADD r3, r3, #1 ADD r4, r4, #1 MUL r4, r3, r4 AND r3, lr, #CCSIDR_LineSize_mask ASSERT CCSIDR_LineSize_pos = 0 MOV lr, #16 MOV r3, lr, LSL r3 MUL r4, r3, r4 EXIT MMU_ChangingUncached_WB_CR7_Lx DSB ; Ensure the page table write has actually completed ISB ; Also required TLB_InvalidateAll_WB_CR7_Lx ROUT TLBIALL ; invalidate ITLB and DTLB BPIALL ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry_WB_CR7_Lx DSB ISB TLB_InvalidateEntry_WB_CR7_Lx ROUT TLBIMVA a1 ; invalidate ITLB & DTLB entry BPIALL ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr IMB_Full_WB_CR7_Lx ROUT ; ; do: clean DCache; drain WBuffer, invalidate ICache/branch predictor ; Luckily, we only need to clean as far as the level of unification ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx clean, lou ICIALLU ; invalidate ICache DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; IMB_Range_WB_CR7_Lx ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take ADD a2, a2, a1 BHS IMB_Full_WB_CR7_Lx Push "a1,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 DCCMVAU a1 ; clean DCache entry by VA to PoU ADD a1, a1, lr CMP a1, a2 BLO %BT10 DSB ; Wait for clean to complete Pull "a1" ; Get start address back LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ 10 ICIMVAU a1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 BPIALL ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "pc" ; a1 = pointer to list of (start, end) address pairs ; a2 = pointer to end of list ; a3 = total amount of memory to be synchronised ; IMB_List_WB_CR7_Lx ROUT CMP a3, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take BHS IMB_Full_WB_CR7_Lx Push "a1,v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 DCCMVAU v1 ; clean DCache entry by VA to PoU ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 DSB ; Wait for clean to complete Pull "a1" ; Get start address back LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ 05 LDMIA a1!, {v1-v2} 10 ICIMVAU v1 ; invalidate ICache entry ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 BPIALL ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "v1-v2,pc" MMU_Changing_WB_CR7_Lx ROUT DSB ; Ensure the page table write has actually completed ISB ; Also required TLBIALL ; invalidate ITLB and DTLB DSB ; Wait for TLB invalidation to complete ISB ; Ensure that the effects are visible B Cache_CleanInvalidateAll_WB_CR7_Lx ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_WB_CR7_Lx ROUT Push "a2, lr" DSB ; Ensure the page table write has actually completed ISB ; Also required TLBIMVA a1 ; invalidate DTLB and ITLB DSB ; Wait for TLB invalidation to complete ISB ; Ensure that the effects are visible LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] ADD a2, a1, #PageSize 10 DCCIMVAC a1 ; clean&invalidate DCache entry to PoC ADD a1, a1, lr CMP a1, a2 BNE %BT10 DSB ; Wait for clean to complete LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ SUB a1, a2, #PageSize ; Get start address back 10 ICIMVAU a1 ; invalidate ICache entry to PoU ADD a1, a1, lr CMP a1, a2 BNE %BT10 BPIALL ; invalidate branch predictors DSB ISB Pull "a2, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_WB_CR7_Lx ROUT Push "a2, a3, lr" DSB ; Ensure the page table write has actually completed ISB ; Also required MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT90 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 TLBIMVA a1 ; invalidate DTLB & ITLB entry ADD a1, a1, #PageSize CMP a1, a2 BNE %BT10 DSB ISB MOV a1, lr ; Get start address back 20 DCCIMVAC a1 ; clean&invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT20 DSB ; Wait for clean to complete LDR a3, =ZeroPage LDRB a3, [a3, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ MOV a1, lr ; Get start address back 30 ICIMVAU a1 ; invalidate ICache entry to PoU ADD a1, a1, a3 CMP a1, a2 BNE %BT30 BPIALL ; invalidate branch predictors DSB ISB Pull "a2, a3, pc" ; 90 TLBIALL ; invalidate ITLB and DTLB DSB ; Wait TLB invalidation to complete ISB ; Ensure that the effects are visible BL Cache_CleanInvalidateAll_WB_CR7_Lx Pull "a2, a3, pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_CleanRange_WB_CR7_Lx ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 DCCMVAC a1 ; clean DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT10 DSB ; Wait for clean to complete ISB Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanAll_WB_CR7_Lx ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_InvalidateRange_WB_CR7_Lx ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3, LSL #1 ;assume clean+invalidate slower than just invalidate BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 DCIMVAC a1 ; invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BLO %BT10 LDR a3, =ZeroPage LDRB a3, [a3, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ MOV a1, lr ; Get start address back 10 ICIMVAU a1 ; invalidate ICache entry to PoU ADD a1, a1, a3 CMP a1, a2 BLO %BT10 BPIALL ; invalidate branch predictors DSB ISB Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanInvalidateAll_WB_CR7_Lx ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_CleanInvalidateRange_WB_CR7_Lx ROUT Push "a2, a3, lr" LDR lr, =ZeroPage SUB a2, a2, a1 LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP a2, a3 BHS %FT30 ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 DCCIMVAC a1 ; clean&invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BLO %BT10 DSB ; Wait for clean to complete LDR a3, =ZeroPage LDRB a3, [a3, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ MOV a1, lr ; Get start address back 10 ICIMVAU a1 ; invalidate ICache entry to PoU ADD a1, a1, a3 CMP a1, a2 BLO %BT10 BPIALL ; invalidate branch predictors DSB ISB Pull "a2, a3, pc" ; 30 Pull "a2, a3, lr" B Cache_CleanInvalidateAll_WB_CR7_Lx ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT Push "a2,lr" DSB ; Ensure the page table write has actually completed ISB ; Also required CMP a2, #32 ; arbitrary-ish threshold BLO %FT10 TLBIALL ; invalidate ITLB and DTLB B %FT20 10 TLBIMVA a1 ; invalidate DTLB & ITLB entry ADD a1, a1, #PageSize SUBS a2, a2, #1 BNE %BT10 20 BPIALL ; invalidate branch predictors DSB ISB Pull "a2,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; ICache_InvalidateRange_WB_CR7_Lx ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take ADD a2, a2, a1 BHS ICache_InvalidateAll_WB_CR7_Lx Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] 10 ICIMVAU a1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 BPIALL ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "pc" ICache_InvalidateAll_WB_CR7_Lx ROUT ICIALLU ; invalidate ICache BPIALL ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr LTORG [ SMP ; -------------------------------------------------------------------------- ; ----- ARMops for ARMv7+ with multiprocessing extensions ------------------ ; -------------------------------------------------------------------------- ; These are MP-safe versions of the standard ARMv7 ARMops (WB_CR7_Lx). ; Where possible they use maintenance ops that broadcast to the other cores in ; the system. Cache_CleanAll_ARMv7MP * Cache_CleanAll_WB_CR7_Lx ; DOES NOT BROADCAST Cache_CleanInvalidateAll_ARMv7MP * Cache_CleanInvalidateAll_WB_CR7_Lx ; DOES NOT BROADCAST Cache_InvalidateAll_ARMv7MP * Cache_InvalidateAll_WB_CR7_Lx ; DOES NOT BROADCAST Cache_Examine_ARMv7MP * Cache_Examine_WB_CR7_Lx ; Call appropriate ranged/full ICache invalidate code ; $type = ARMop type suffix ; $start = reg containing start addr ; a2 = end addr ; $pull = stacked registers (except lr, which must be stacked) MACRO ICacheInvalidate $type, $start, $pull ASSERT "$start" <> "a2" [ "$start" = "lr" MOV a1, lr ] SUB lr, a2, $start CMP lr, #32*1024 [ "$start" <> "lr" :LAND: "$start" <> "a1" MOVLO a1, $start ] [ "$pull" <> "" ; If $pull contains a1 or a2 then we'll have to postpone pulling them until after the ranged call. But there's no straightforward way of checking for that, so we'll take the easy way out and always postpone pulling for ranged calls. Pull "$pull,lr",HS BHS ICache_InvalidateAll_$type Push "pc" B ICache_InvalidateRange_$type._alt Pull "$pull,pc" | BLO ICache_InvalidateRange_$type._alt Pull "lr" B ICache_InvalidateAll_$type ] MEND Cache_RangeThreshold_ARMv7MP ROUT MVN a1, #0 ; Claim 4G-1 to discourage global ops (since they don't broadcast) MOV pc, lr MMU_ChangingUncached_ARMv7MP DSB ; Ensure the page table write has actually completed ISB ; Also required TLB_InvalidateAll_ARMv7MP ROUT TLBIALLIS ; invalidate ITLB and DTLB BPIALLIS ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr ; a1 = page affected (page aligned address) ; MMU_ChangingUncachedEntry_ARMv7MP DSB ISB TLB_InvalidateEntry_ARMv7MP TLBIMVAAIS a1 ; invalidate ITLB & DTLB entry BPIALLIS ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr IMB_Full_ARMv7MP ROUT ; Only cleans local DCache ; ; do: clean DCache; drain WBuffer, invalidate ICache/branch predictor ; Luckily, we only need to clean as far as the level of unification ; Push "r1-r8,lr" MaintainDataCache_WB_CR7_Lx clean, lou ICIALLUIS ; invalidate ICache DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "r1-r8,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; IMB_Range_ARMv7MP ROUT Push "a1,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 DCCMVAU a1 ; clean DCache entry by VA to PoU ADD a1, a1, lr CMP a1, a2 BLO %BT10 DSB ; Wait for clean to complete Pull "a1" ; Get start address back ICacheInvalidate ARMv7MP, a1 ; a1 = pointer to list of (start, end) address pairs ; a2 = pointer to end of list ; a3 = total amount of memory to be synchronised ; IMB_List_ARMv7MP ROUT Push "a1,v1-v2,lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 05 LDMIA a1!, {v1-v2} 10 DCCMVAU v1 ; clean DCache entry by VA to PoU ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 DSB ; Wait for clean to complete Pull "a1" ; Get start address back CMP a3, #32*1024 ; see if global ICache invalidate sensible Pull "v1-v2,lr",HS BHS ICache_InvalidateAll_ARMv7MP LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, on some CPUs I&D will differ 05 LDMIA a1!, {v1-v2} 10 ICIMVAU v1 ; invalidate ICache entry ADD v1, v1, lr CMP v1, v2 BLO %BT10 CMP a1, a2 BNE %BT05 BPIALLIS ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "v1-v2,pc" MMU_Changing_ARMv7MP ROUT ; Only cleans local caches DSB ; Ensure the page table write has actually completed ISB ; Also required TLBIALLIS ; invalidate ITLB and DTLB DSB ; Wait for TLB invalidation to complete ISB ; Ensure that the effects are visible B Cache_CleanInvalidateAll_ARMv7MP ; a1 = page affected (page aligned address) ; MMU_ChangingEntry_ARMv7MP ROUT Push "a2, lr" DSB ; Ensure the page table write has actually completed ISB ; Also required TLBIMVAAIS a1 ; invalidate DTLB and ITLB DSB ; Wait for TLB invalidation to complete ISB ; Ensure that the effects are visible LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] ADD a2, a1, #PageSize 10 DCCIMVAC a1 ; clean&invalidate DCache entry to PoC ADD a1, a1, lr CMP a1, a2 BNE %BT10 DSB ; Wait for clean to complete SUB a1, a2, #PageSize ; Get start address back Push "pc" B ICache_InvalidateRange_ARMv7MP_alt ; Skip the range check, we know 4K is small enough to want a ranged clean Pull "a2, pc" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_ARMv7MP ROUT Push "a2, a3, lr" DSB ; Ensure the page table write has actually completed ISB ; Also required MOV a2, a2, LSL #Log2PageSize LDR lr, =ZeroPage ADD a2, a2, a1 ;clean end address (exclusive) LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 TLBIMVAAIS a1 ; invalidate DTLB & ITLB entry ADD a1, a1, #PageSize CMP a1, a2 BNE %BT10 DSB ISB MOV a1, lr ; Get start address back 20 DCCIMVAC a1 ; clean&invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT20 DSB ; Wait for clean to complete ICacheInvalidate ARMv7MP, lr, "a2,a3" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_CleanRange_ARMv7MP ROUT Push "lr" LDR lr, =ZeroPage LDRB lr, [lr, #DCache_LineLen] 10 DCCMVAC a1 ; clean DCache entry to PoC ADD a1, a1, lr CMP a1, a2 BNE %BT10 DSB ; Wait for clean to complete ISB Pull "pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_InvalidateRange_ARMv7MP ROUT Push "a3, lr" LDR lr, =ZeroPage LDRB a3, [lr, #DCache_LineLen] MOV lr, a1 10 DCIMVAC a1 ; invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT10 ICacheInvalidate ARMv7MP, lr, "a3" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_CleanInvalidateRange_ARMv7MP ROUT Push "a3, lr" LDR lr, =ZeroPage LDRB a3, [lr, #DCache_LineLen] ; log2(line len)-2 MOV lr, a1 10 DCCIMVAC a1 ; clean&invalidate DCache entry to PoC ADD a1, a1, a3 CMP a1, a2 BNE %BT10 DSB ; Wait for clean to complete ICacheInvalidate ARMv7MP, lr, "a3" ; a1 = first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingUncachedEntries_ARMv7MP ROUT Push "a2,lr" DSB ; Ensure the page table write has actually completed ISB ; Also required CMP a2, #32 ; arbitrary-ish threshold BLO %FT10 TLBIALLIS ; invalidate ITLB and DTLB B %FT20 10 TLBIMVAAIS a1 ; invalidate DTLB & ITLB entry ADD a1, a1, #PageSize SUBS a2, a2, #1 BNE %BT10 20 BPIALLIS ; invalidate branch predictors DSB ISB Pull "a2,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; ICache_InvalidateRange_ARMv7MP ROUT SUB a2, a2, a1 CMP a2, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take ADD a2, a2, a1 BHS ICache_InvalidateAll_ARMv7MP Push "lr" ICache_InvalidateRange_ARMv7MP_alt LDR lr, =ZeroPage LDRB lr, [lr, #ICache_LineLen] 10 ICIMVAU a1 ; invalidate ICache entry ADD a1, a1, lr CMP a1, a2 BLO %BT10 BPIALLIS ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible Pull "pc" ICache_InvalidateAll_ARMv7MP ROUT ICIALLUIS ; invalidate ICache BPIALLIS ; invalidate branch predictors DSB ; Wait for cache/branch invalidation to complete ISB ; Ensure that the effects of the completed cache/branch invalidation are visible MOV pc, lr ] ; SMP ; -------------------------------------------------------------------------- ; ----- ARMops for PL310 L2 cache controller-------------------------------- ; -------------------------------------------------------------------------- ; These are a hybrid of the standard ARMv7 ARMops (WB_CR7_Lx) and the PL310 ; cache maintenance ops. Currently they're only used on Cortex-A9 systems, so ; may need modifications to work with other systems. ; Specifically, the code assumes the PL310 is being used in non-exclusive mode. ; ; To make the code fully re-entrant and MP-safe, we avoid using the background ; operations (INV_WAY, CLEAN_WAY, CLEAN_INV_WAY). MACRO PL310Sync $regs, $temp ; Errata 753970 requires us to write to a different location when ; performing a sync operation for r3p0 LDR $temp, [$regs, #PL310_REG0_CACHE_ID] AND $temp, $temp, #&3f TEQ $temp, #PL310_R3P0 MOV $temp, #0 STREQ $temp, [$regs, #PL310_REG7_CACHE_SYNC_753970] STRNE $temp, [$regs, #PL310_REG7_CACHE_SYNC] MEND [ :LNOT: SMP PL310Threshold * 1024*1024 ; Arbitrary threshold for full clean ] Cache_CleanInvalidateAll_PL310 ROUT ; Errata 727915 workaround - use CLEAN_INV_INDEX instead of CLEAN_INV_WAY ; Also, CLEAN_INV_WAY is a background op, while CLEAN_INV_INDEX is atomic. Entry "a2-a4" LDR a2, =ZeroPage LDR a2, [a2, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Clean ARM caches [ SMP BL Cache_CleanAll_ARMv7MP | BL Cache_CleanAll_WB_CR7_Lx ] ; Determine PL310 way, index count LDR a1, [a2, #PL310_REG1_AUX_CONTROL] AND a3, a1, #1<<16 AND a1, a1, #7<<17 MOV a3, a3, LSL #15 MOV a1, a1, LSR #17 LDR a4, =&FF<<5 ORR a3, a3, #7<<28 ; a3 = max way number (inclusive) ORR a4, a4, a4, LSL a1 ; a4 = max index number (inclusive) 10 ORR a1, a3, a4 20 STR a1, [a2, #PL310_REG7_CLEAN_INV_INDEX] SUBS a1, a1, #1<<28 ; next way BCS %BT20 ; underflow? SUBS a4, a4, #1<<5 ; next index BGE %BT10 ; Ensure the ops are actually complete DSB ; Clean & invalidate ARM caches PullEnv [ SMP B Cache_CleanInvalidateAll_ARMv7MP | B Cache_CleanInvalidateAll_WB_CR7_Lx ] Cache_CleanAll_PL310 ROUT Entry "a2-a4" LDR a2, =ZeroPage LDR a2, [a2, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Clean ARM caches [ SMP BL Cache_CleanAll_ARMv7MP | BL Cache_CleanAll_WB_CR7_Lx ] ; Determine PL310 way, index count LDR a1, [a2, #PL310_REG1_AUX_CONTROL] AND a3, a1, #1<<16 AND a1, a1, #7<<17 MOV a3, a3, LSL #15 MOV a1, a1, LSR #17 LDR a4, =&FF<<5 ORR a3, a3, #7<<28 ; a3 = max way number (inclusive) ORR a4, a4, a4, LSL a1 ; a4 = max index number (inclusive) 10 ORR a1, a3, a4 20 STR a1, [a2, #PL310_REG7_CLEAN_INDEX] SUBS a1, a1, #1<<28 ; next way BCS %BT20 ; underflow? SUBS a4, a4, #1<<5 ; next index BGE %BT10 ; Ensure the ops are actually complete DSB EXIT ; This op will be rarely (if ever) used, just implement as clean + invalidate Cache_InvalidateAll_PL310 * Cache_CleanInvalidateAll_PL310 [ SMP Cache_RangeThreshold_PL310 * Cache_RangeThreshold_ARMv7MP | Cache_RangeThreshold_PL310 ROUT MOV a1, #PL310Threshold MOV pc, lr ] Cache_Examine_PL310 ROUT ; Assume that the PL310 is the level 2 cache CMP r1, #1 BLT Cache_Examine_WB_CR7_Lx MOVGT r0, #0 MOVGT r1, #0 MOVGT r2, #0 MOVGT r3, #0 MOVGT r4, #0 MOVGT pc, lr LDR r0, =ZeroPage LDR r0, [r0, #Cache_HALDevice] LDR r0, [r0, #HALDevice_Address] LDR r0, [r0, #PL310_REG1_AUX_CONTROL] AND r2, r0, #&E0000 ; Get way size TST r0, #1:SHL:16 ; Check associativity MOV r2, r2, LSR #17 MOVEQ r1, #8*1024*8 ; 8KB base way size with 8 way associativity MOVNE r1, #8*1024*16 ; 8KB base way size with 16 way associativity MOV r2, r1, LSL r2 ; Assume this really is a PL310 (32 byte line size, unified architecture) MOV r0, #4 MOV r1, #32 MOV r3, #32 MOV r4, r2 MOV pc, lr DSB_ReadWrite_PL310 ROUT Entry LDR lr, =ZeroPage LDR lr, [lr, #Cache_HALDevice] LDR lr, [lr, #HALDevice_Address] ; Drain ARM write buffer DSB SY ; Drain PL310 write buffer PL310Sync lr, a1 ; Ensure the PL310 sync is complete DSB SY EXIT DSB_Write_PL310 ROUT Entry LDR lr, =ZeroPage LDR lr, [lr, #Cache_HALDevice] LDR lr, [lr, #HALDevice_Address] ; Drain ARM write buffer DSB ST ; Drain PL310 write buffer PL310Sync lr, a1 ; Ensure the PL310 sync is complete DSB ST EXIT DMB_ReadWrite_PL310 ROUT Entry LDR lr, =ZeroPage LDR lr, [lr, #Cache_HALDevice] LDR lr, [lr, #HALDevice_Address] ; Drain ARM write buffer DMB SY ; Drain PL310 write buffer PL310Sync lr, a1 ; Ensure the PL310 sync is complete DMB SY EXIT DMB_Write_PL310 ROUT Entry LDR lr, =ZeroPage LDR lr, [lr, #Cache_HALDevice] LDR lr, [lr, #HALDevice_Address] ; Drain ARM write buffer DMB ST ; Drain PL310 write buffer PL310Sync lr, a1 ; Ensure the PL310 sync is complete DMB ST EXIT MMU_Changing_PL310 ROUT DSB ; Ensure the page table write has actually completed ISB ; Also required [ SMP TLBIALLIS ; invalidate ITLB and DTLB | TLBIALL ; invalidate ITLB and DTLB ] DSB ; Wait for TLB invalidation to complete ISB ; Ensure that the effects are visible B Cache_CleanInvalidateAll_PL310 ; a1 = virtual address of page affected (page aligned address) ; MMU_ChangingEntry_PL310 ROUT Push "a1-a2,lr" ; Do the TLB maintenance [ SMP BL MMU_ChangingUncachedEntry_ARMv7MP | BL MMU_ChangingUncachedEntry_WB_CR7_Lx ] ; Keep the rest simple by just calling through to MMU_ChangingEntries MOV a2, #1 B %FT10 ; a1 = virtual address of first page affected (page aligned address) ; a2 = number of pages ; MMU_ChangingEntries_PL310 Push "a1-a2,lr" ; Do the TLB maintenance [ SMP BL MMU_ChangingUncachedEntries_ARMv7MP | BL MMU_ChangingUncachedEntries_WB_CR7_Lx ] 10 ; Arrive here from MMU_ChangingEntry_PL310 LDR a1, [sp] ; Do PL310 clean & invalidate ADD a2, a1, a2, LSL #Log2PageSize BL Cache_CleanInvalidateRange_PL310 Pull "a1-a2,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_CleanInvalidateRange_PL310 ROUT Entry "a2-a4,v1" ; For simplicity, align to page boundaries LDR a4, =PageSize-1 ADD a2, a2, a4 BIC a1, a1, a4 BIC a3, a2, a4 SUB v1, a3, a1 [ :LNOT: SMP CMP v1, #PL310Threshold BHS %FT90 ] MOV a4, a1 ; Behave in a similar way to the PL310 full clean & invalidate: ; * Clean ARM ; * Clean & invalidate PL310 ; * Clean & invalidate ARM ; a4 = base virtual address ; a3 = end virtual address ; v1 = length ; Clean ARM LDR a1, =ZeroPage [ :LNOT: SMP LDR lr, [a1, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP lr, v1 ADRLE lr, %FT30 BLE Cache_CleanAll_WB_CR7_Lx ] ; Clean each page in turn LDRB a2, [a1, #DCache_LineLen] 20 DCCMVAC a4 ; clean DCache entry to PoC ADD a4, a4, a2 CMP a4, a3 BNE %BT20 DSB ; Wait for clean to complete SUB a4, a3, v1 30 ; Clean & invalidate PL310 LDR a1, =ZeroPage LDR a2, [a1, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Clean & invalidate each line/index of the pages 50 ; Convert logical addr to physical. ; Use the ARMv7 CP15 registers for convenience. PHPSEI MCR p15, 0, a4, c7, c8, 0 ; ATS1CPR ISB MRC p15, 0, a1, c7, c4, 0 ; Get result PLP TST a1, #1 ADD a4, a4, #PageSize BNE %FT75 ; Lookup failed - assume this means that the page doesn't need cleaning from the PL310 ; Point to last line in page, and mask out attributes returned by the ; lookup ORR a1, a1, #&FE0 BIC a1, a1, #&01F 60 STR a1, [a2, #PL310_REG7_CLEAN_INV_PA] TST a1, #&FE0 SUB a1, a1, #1<<5 ; next index BNE %BT60 75 CMP a4, a3 BNE %BT50 ; Sync DSB ; Clean & invalidate ARM SUB a1, a3, v1 MOV a2, a3 [ SMP BL Cache_CleanInvalidateRange_ARMv7MP | BL Cache_CleanInvalidateRange_WB_CR7_Lx ] EXIT [ :LNOT: SMP 90 ; Full clean required PullEnv B Cache_CleanInvalidateAll_PL310 ] ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) ; Cache_CleanRange_PL310 ROUT Entry "a2-a4,v1" ; For simplicity, align to page boundaries LDR a4, =PageSize-1 ADD a2, a2, a4 BIC a1, a1, a4 BIC a3, a2, a4 SUB v1, a3, a1 [ :LNOT: SMP CMP v1, #PL310Threshold BHS %FT90 ] MOV a4, a1 ; a4 = base virtual address ; a3 = end virtual address ; v1 = length ; Clean ARM LDR a1, =ZeroPage [ :LNOT: SMP LDR lr, [a1, #DCache_RangeThreshold] ;check whether cheaper to do global clean CMP lr, v1 ADRLE lr, %FT30 BLE Cache_CleanAll_WB_CR7_Lx ] ; Clean each page in turn LDRB a2, [a1, #DCache_LineLen] 20 DCCMVAC a4 ; clean DCache entry to PoC ADD a4, a4, a2 CMP a4, a3 BNE %BT20 DSB ; Wait for clean to complete SUB a4, a3, v1 30 ; Clean PL310 LDR a1, =ZeroPage LDR a2, [a1, #Cache_HALDevice] LDR a2, [a2, #HALDevice_Address] ; Clean & invalidate each line/index of the pages 50 ; Convert logical addr to physical. ; Use the ARMv7 CP15 registers for convenience. PHPSEI MCR p15, 0, a4, c7, c8, 0 ; ATS1CPR ISB MRC p15, 0, a1, c7, c4, 0 ; Get result PLP TST a1, #1 ADD a4, a4, #PageSize BNE %FT75 ; Lookup failed - assume this means that the page doesn't need cleaning from the PL310 ; Point to last line in page, and mask out attributes returned by the ; lookup ORR a1, a1, #&FE0 BIC a1, a1, #&01F 60 STR a1, [a2, #PL310_REG7_CLEAN_PA] TST a1, #&FE0 SUB a1, a1, #1<<5 ; next index BNE %BT60 75 CMP a4, a3 BNE %BT50 ; Sync DMB EXIT [ :LNOT: SMP 90 ; Full clean required PullEnv B Cache_CleanInvalidateAll_PL310 ] Cache_InvalidateRange_PL310 * Cache_CleanInvalidateRange_PL310 ; TODO: Need a ranged invalidate implementation that doesn't round to page size ; -------------------------------------------------------------------------- ; ----- Generic ARMv6 and ARMv7 barrier operations ------------------------- ; -------------------------------------------------------------------------- ; Although the ARMv6 barriers are supported on ARMv7, they are deprected, and ; they do give less control than the native ARMv7 barriers. So we prefer to use ; the ARMv7 barriers wherever possible. DSB_ReadWrite_ARMv6 ROUT MOV a1, #0 MCR p15, 0, a1, c7, c10, 4 MOV pc, lr DMB_ReadWrite_ARMv6 ROUT MOV a1, #0 MCR p15, 0, a1, c7, c10, 5 MOV pc, lr DSB_ReadWrite_ARMv7 ROUT DSB SY MOV pc, lr DSB_Write_ARMv7 ROUT DSB ST MOV pc, lr DMB_ReadWrite_ARMv7 ROUT DMB SY MOV pc, lr DMB_Write_ARMv7 ROUT DMB ST MOV pc, lr ] ; MEMM_Type = "VMSAv6" LTORG ; -------------------------------------------------------------------------- LookForHALCacheController ROUT Entry "r0-r3,r8,r12" ; Look for any known cache controllers that the HAL has registered, and ; replace our ARMop routines with the appropriate routines for that ; controller LDR r0, =(0:SHL:16)+HALDeviceType_SysPeri+HALDeviceSysPeri_CacheC MOV r1, #0 LDR r12, =ZeroPage STR r1, [r12, #Cache_HALDevice] ; In case none found 10 MOV r8, #OSHW_DeviceEnumerate SWI XOS_Hardware EXIT VS CMP r1, #-1 EXIT EQ ; Do we recognise this controller? ASSERT HALDevice_ID = 2 [ NoARMv4 LDR lr, [r2] MOV lr, lr, LSR #16 | LDRH lr, [r2, #HALDevice_ID] ] ADR r8, KnownHALCaches 20 LDR r12, [r8], #8+Proc_MMU_ChangingUncachedEntries-Proc_Cache_CleanInvalidateAll CMP r12, #-1 BEQ %BT10 CMP lr, r12 BNE %BT20 ; Cache recognised. Disable IRQs for safety, and then try enabling it. Push "r2" MOV r0, r2 MSR CPSR_c, #SVC32_mode+I32_bit MOV lr, pc LDR pc, [r2, #HALDevice_Activate] CMP r0, #1 Pull "r2" MSRNE CPSR_c, #SVC32_mode BNE %BT10 ; Cache enabled OK - remember the device pointer and patch our maintenance ops LDR r0, =ZeroPage STR r2, [r0, #Cache_HALDevice] ADD r0, r0, #Proc_Cache_CleanInvalidateAll MOV r1, #Proc_MMU_ChangingUncachedEntries-Proc_Cache_CleanInvalidateAll 30 LDR r3, [r8, #-4]! TEQ r3, #0 STRNE r3, [r0, r1] SUBS r1, r1, #4 BGE %BT30 ; It's now safe to restore IRQs MSR CPSR_c, #SVC32_mode EXIT KnownHALCaches ROUT [ MEMM_Type = "VMSAv6" DCD HALDeviceID_CacheC_PL310 01 DCD Cache_CleanInvalidateAll_PL310 DCD Cache_CleanInvalidateRange_PL310 DCD Cache_CleanAll_PL310 DCD Cache_CleanRange_PL310 DCD Cache_InvalidateAll_PL310 DCD Cache_InvalidateRange_PL310 DCD Cache_RangeThreshold_PL310 DCD Cache_Examine_PL310 DCD 0 ; ICache_InvalidateAll DCD 0 ; ICache_InvalidateRange DCD 0 ; TLB_InvalidateAll DCD 0 ; TLB_InvalidateEntry DCD DSB_ReadWrite_PL310 DCD DSB_Write_PL310 DCD 0 ; DSB_Read DCD DMB_ReadWrite_PL310 DCD DMB_Write_PL310 DCD 0 ; DMB_Read DCD 0 ; IMB_Full DCD 0 ; IMB_Range DCD 0 ; IMB_List DCD MMU_Changing_PL310 DCD MMU_ChangingEntry_PL310 DCD 0 ; MMU_ChangingUncached DCD 0 ; MMU_ChangingUncachedEntry DCD MMU_ChangingEntries_PL310 DCD 0 ; MMU_ChangingUncachedEntries ASSERT . - %BT01 = 4+Proc_MMU_ChangingUncachedEntries-Proc_Cache_CleanInvalidateAll ] DCD -1 ; -------------------------------------------------------------------------- MACRO ARMopPtr $op ASSERT . - ARMopPtrTable = ARMop_$op * 4 DCD ZeroPage + Proc_$op MEND ; ARMops exposed by OS_MMUControl 2 ARMopPtrTable ARMopPtr Cache_CleanInvalidateAll ARMopPtr Cache_CleanAll ARMopPtr Cache_InvalidateAll ARMopPtr Cache_RangeThreshold ARMopPtr TLB_InvalidateAll ARMopPtr TLB_InvalidateEntry ARMopPtr DSB_ReadWrite ARMopPtr IMB_Full ARMopPtr IMB_Range ARMopPtr IMB_List ARMopPtr MMU_Changing ARMopPtr MMU_ChangingEntry ARMopPtr MMU_ChangingUncached ARMopPtr MMU_ChangingUncachedEntry ARMopPtr MMU_ChangingEntries ARMopPtr MMU_ChangingUncachedEntries ARMopPtr DSB_Write ARMopPtr DSB_Read ARMopPtr DMB_ReadWrite ARMopPtr DMB_Write ARMopPtr DMB_Read ARMopPtr Cache_CleanInvalidateRange [ {FALSE} ; Not fully tested yet, so keep out of the public API ARMopPtr Cache_CleanRange ARMopPtr Cache_InvalidateRange ARMopPtr ICache_InvalidateAll ARMopPtr ICache_InvalidateRange ] ARMopPtrTable_End ASSERT ARMopPtrTable_End - ARMopPtrTable = ARMop_Max*4 ; IMPORT Write0_Translated ARM_PrintProcessorType LDR a1, =ZeroPage LDRB a1, [a1, #ProcessorType] TEQ a1, #ARMunk MOVEQ pc, lr Push "lr" ADR a2, PNameTable LDHA a1, a2, a1, a3 ADD a1, a2, a1 [ International BL Write0_Translated | SWI XOS_Write0 ] SWI XOS_NewLine SWI XOS_NewLine Pull "pc" PNameTable DCW PName_ARM600 - PNameTable DCW PName_ARM610 - PNameTable DCW PName_ARM700 - PNameTable DCW PName_ARM710 - PNameTable DCW PName_ARM710a - PNameTable DCW PName_SA110 - PNameTable ; pre rev T DCW PName_SA110 - PNameTable ; rev T or later DCW PName_ARM7500 - PNameTable DCW PName_ARM7500FE - PNameTable DCW PName_SA1100 - PNameTable DCW PName_SA1110 - PNameTable DCW PName_ARM720T - PNameTable DCW PName_ARM920T - PNameTable DCW PName_ARM922T - PNameTable DCW PName_X80200 - PNameTable DCW PName_X80321 - PNameTable DCW PName_ARM1176JZF_S - PNameTable DCW PName_Cortex_A5 - PNameTable DCW PName_Cortex_A7 - PNameTable DCW PName_Cortex_A8 - PNameTable DCW PName_Cortex_A9 - PNameTable DCW PName_Cortex_A17 - PNameTable ; A12 rebranded as A17 DCW PName_Cortex_A15 - PNameTable DCW PName_Cortex_A17 - PNameTable DCW PName_Cortex_A53 - PNameTable DCW PName_Cortex_A57 - PNameTable DCW PName_Cortex_A72 - PNameTable ; A58 rebranded as A72 PName_ARM600 = "600:ARM 600 Processor",0 PName_ARM610 = "610:ARM 610 Processor",0 PName_ARM700 = "700:ARM 700 Processor",0 PName_ARM710 = "710:ARM 710 Processor",0 PName_ARM710a = "710a:ARM 710a Processor",0 PName_SA110 = "SA110:SA-110 Processor",0 PName_ARM7500 = "7500:ARM 7500 Processor",0 PName_ARM7500FE = "7500FE:ARM 7500FE Processor",0 PName_SA1100 = "SA1100:SA-1100 Processor",0 PName_SA1110 = "SA1110:SA-1110 Processor",0 PName_ARM720T = "720T:ARM 720T Processor",0 PName_ARM920T = "920T:ARM 920T Processor",0 PName_ARM922T = "922T:ARM 922T Processor",0 PName_X80200 = "X80200:80200 Processor",0 PName_X80321 = "X80321:80321 Processor",0 PName_ARM1176JZF_S = "ARM1176JZF_S:ARM1176JZF-S Processor",0 PName_Cortex_A5 = "CA5:Cortex-A5 Processor",0 PName_Cortex_A7 = "CA7:Cortex-A7 Processor",0 PName_Cortex_A8 = "CA8:Cortex-A8 Processor",0 PName_Cortex_A9 = "CA9:Cortex-A9 Processor",0 PName_Cortex_A15 = "CA15:Cortex-A15 Processor",0 PName_Cortex_A17 = "CA17:Cortex-A17 Processor",0 PName_Cortex_A53 = "CA53:Cortex-A53 Processor",0 PName_Cortex_A57 = "CA57:Cortex-A57 Processor",0 PName_Cortex_A72 = "CA72:Cortex-A72 Processor",0 ALIGN ; Lookup tables from DA flags PCB (bits 14:12,5,4, packed down to 4:2,1,0) ; to XCB bits in page table descriptors. XCB_CB * 0:SHL:0 XCB_NB * 1:SHL:0 XCB_NC * 1:SHL:1 XCB_P * 1:SHL:2 [ MEMM_Type = "VMSAv6" XCB_TU * 1:SHL:5 ; For VMSAv6, deal with temp uncacheable via the table ] ALIGN 32 [ MEMM_Type = "ARM600" ; WT read-allocate cache (eg ARM720T) XCBTableWT ; C+B CNB NCB NCNB = L2_C+L2_B, L2_C, L2_B, 0 ; Default = L2_C+L2_B, L2_C, L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, L2_C, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, L2_C, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, L2_C, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, L2_C, L2_B, 0 ; X, X, X, X = L2_C+L2_B, L2_C, L2_B, 0 ; X, X, X, X = L2_C+L2_B, L2_C, L2_B, 0 ; X, X, X, X ; SA-110 in Risc PC - WB only read-allocate cache, non-merging WB XCBTableSA110 ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_B, 0, L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; ARMv5 WB/WT read-allocate cache, non-merging WB (eg ARM920T) XCBTableWBR ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; SA-1110 - WB only read allocate cache, merging WB, mini D-cache XCBTableSA1110 ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_B, 0, 0, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C , 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; XScale - WB/WT read or write-allocate cache, merging WB, mini D-cache ; defaulting to read-allocate XCBTableXScaleRA ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, L2_X+L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_X+L2_C , 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; XScale - WB/WT read or write-allocate cache, merging WB, mini D-cache ; defaulting to write-allocate XCBTableXScaleWA ; C+B CNB NCB NCNB = L2_X+L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, L2_X+L2_B, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_X+L2_C , 0, L2_B, 0 ; Alt DCache, X, X, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_X+L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ; XScale - WB/WT read-allocate cache, merging WB, no mini D-cache/extended pages XCBTableXScaleNoExt ; C+B CNB NCB NCNB = L2_C+L2_B, 0, L2_B, 0 ; Default = L2_C , 0, 0, 0 ; WT, WT, Non-merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/RA, WB, Merging, X = L2_C+L2_B, 0, L2_B, 0 ; WB/WA, X, Idempotent, X = L2_C+L2_B, 0, L2_B, 0 ; Alt DCache, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X = L2_C+L2_B, 0, L2_B, 0 ; X, X, X, X ] ; MEMM_Type = "ARM600" [ MEMM_Type = "VMSAv6" [ ShortDesc ; VMSAv6/v7 L2 memory attributes (short descriptor format, TEX remap disabled) L2_SO_S * 0 ; Strongly-ordered, shareable L2_Dev_S * L2_B ; Device, shareable L2_Dev_nS * 2:SHL:L2_TEXShift ; Device, non-shareable ; For Normal memory types, use the form that is explicit about inner and outer ; cacheability. This provides a nice mapping to the way cacheability is ; specified in the TTBR (see SetTTBR) VMSAv6_Cache_NC * 0 VMSAv6_Cache_WBWA * 1 VMSAv6_Cache_WT * 2 VMSAv6_Cache_WBRA * 3 ASSERT L2_C = L2_B:SHL:1 MACRO VMSAv6_Nrm_XCB $inner, $outer L2_Nrm_$inner._$outer * ((4+VMSAv6_Cache_$outer):SHL:L2_TEXShift) + (VMSAv6_Cache_$inner * L2_B) [ "$outer" == "$inner" L2_Nrm_$inner * L2_Nrm_$inner._$outer ] MEND VMSAv6_Nrm_XCB WT, WT ; Normal, WT/RA, S bit determines shareability VMSAv6_Nrm_XCB WBRA, WBRA ; Normal, WB/RA, S bit determines shareability VMSAv6_Nrm_XCB NC, NC ; Normal, non-cacheable (but bufferable), S bit determines shareability VMSAv6_Nrm_XCB WBWA, WBWA ; Normal, WB/WA, S bit determines shareability VMSAv6_Nrm_XCB WT, WBWA ; Normal, inner WT, outer WB/WA, S bit determines shareability ; Generic XCB table for VMSAv6/v7 ; * NCNB is roughly equivalent to "strongly ordered". ; * NCB with non-merging write buffer is equivalent to "Device". ; * NCB with merging write buffer is also mapped to "Device". "Normal" is ; tempting but may result in issues with read-sensitive devices (see below). ; * For NCB with devices which aren't read-sensitive, we introduce a new ; "Merging write buffer with idempotent memory" policy which maps to the ; Normal, non-cacheable type. This will degrade nicely on older OS's and CPUs, ; avoiding some isses if we were to make NCB with merging write buffer default ; to Normal memory. This policy is also the new default, so that all existing ; NCB RAM uses it (so unaligned loads, etc. will work). No existing code seems ; to be using NCB for IO devices (only for IO RAM like VRAM), so this change ; should be safe (previously, all NCB policies would have mapped to Device ; memory) ; * CNB has no equivalent - there's no control over whether the write buffer is ; used for cacheable regions, so we have to downgrade to NCNB. ; The caches should behave sensibly when given unsupported attributes ; (downgrade WB to WT to NC), but we may end up doing more cache maintenance ; than needed if the hardware downgrades some areas to NC. XCBTableVMSAv6 ; C+B CNB NCB NCNB DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Default DCW L2_Nrm_WT, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X DCW L2_Nrm_WBRA, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X DCW L2_Nrm_WT_WBWA,L2_SO_S,L2_Nrm_NC,L2_SO_S ; Alt DCache, X, X, X DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X ; This second set of entries deals with when pages are made ; temporarily uncacheable - we need to change the cacheability without ; changing the memory type. DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Default DCW L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X DCW L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Alt DCache, X, X, X DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X ] [ LongDesc ; Attributes for long-descriptor page table format ; This gives the AttrIndx field of the low attributes ; Our arbitrary AttrIndx mapping LLAttr_Nrm_NC * 0:SHL:LL_Page_LowAttr_AttrIndx0_bit LLAttr_Nrm_WBWA * 1:SHL:LL_Page_LowAttr_AttrIndx0_bit LLAttr_Nrm_WBRA * 2:SHL:LL_Page_LowAttr_AttrIndx0_bit LLAttr_Nrm_WT * 3:SHL:LL_Page_LowAttr_AttrIndx0_bit LLAttr_SO * 4:SHL:LL_Page_LowAttr_AttrIndx0_bit LLAttr_Dev * 5:SHL:LL_Page_LowAttr_AttrIndx0_bit ASSERT LL_Page_LowAttr_AttrIndx2_bit < 8 ; Reverse mapping table (directly before the forwards-mapping table) DCD DynAreaFlags_NotCacheable ; LLAttr_Nrm_NC DCD 0 ; LLAttr_Nrm_WBWA DCD 2:SHL:DynAreaFlags_CPShift ; LLAttr_Nrm_WBRA DCD 1:SHL:DynAreaFlags_CPShift ; LLAttr_Nrm_WT DCD DynAreaFlags_NotCacheable+DynAreaFlags_NotBufferable ; LLAttr_SO DCD DynAreaFlags_NotCacheable+1:SHL:DynAreaFlags_CPShift ; LLAttr_Dev ; Pad out the rest; MAIR Attr field value of 0 corresponds to strongly-ordered memory DCD DynAreaFlags_NotCacheable+DynAreaFlags_NotBufferable ; LLAttr_SO DCD DynAreaFlags_NotCacheable+DynAreaFlags_NotBufferable ; LLAttr_SO XCBTableVMSAv6Long ; C+B CNB NCB NCNB DCB LLAttr_Nrm_WBWA, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; Default DCB LLAttr_Nrm_WT, LLAttr_SO, LLAttr_Dev, LLAttr_SO ; WT, WT, Non-merging, X DCB LLAttr_Nrm_WBRA, LLAttr_SO, LLAttr_Dev, LLAttr_SO ; WB/RA, WB, Merging, X DCB LLAttr_Nrm_WBWA, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; WB/WA, X, Idempotent, X DCB LLAttr_Nrm_WBWA, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; Alt DCache, X, X, X DCB LLAttr_Nrm_WBWA, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; X, X, X, X DCB LLAttr_Nrm_WBWA, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; X, X, X, X DCB LLAttr_Nrm_WBWA, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; X, X, X, X ; This second set of entries deals with when pages are made ; temporarily uncacheable - we need to change the cacheability without ; changing the memory type. DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; Default DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Dev, LLAttr_SO ; WT, WT, Non-merging, X DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Dev, LLAttr_SO ; WB/RA, WB, Merging, X DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; WB/WA, X, Idempotent, X DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; Alt DCache, X, X, X DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; X, X, X, X DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; X, X, X, X DCB LLAttr_Nrm_NC, LLAttr_SO, LLAttr_Nrm_NC, LLAttr_SO ; X, X, X, X ] ] ; MEMM_Type = "VMSAv6" END