; Copyright 2009 Castle Technology Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; Using the DMA controller to clear RAM is much faster than doing it with the CPU (with the cache/write buffer off, at least) GBLL Use_DMA_Clear Use_DMA_Clear SETL {TRUE} init_ram ; There is some setup we want to do here - auto & smart idle modes! LDR v1, =SDRC_Regs MOV a1, #1+(2<<3) STR a1, [v1, #SDRC_SYSCONFIG] [ {FALSE} ; With multiple board types supported, it's now much easier just to rely on u-boot to set everything up properly. ; Make sure all attached RAM is initialised ; Although x-loader will initialise some RAM for us, it might not necessarily use the best settings, and it might not initialise all the RAM that's available (e.g. CS1 on rev C beagleboard) ; This code roughly follows the sdrc_init()/dram_init() flow in u-boot LDR v1, =SDRC_Regs ; CS0 setup: ; - If we're not running from SDRAM, (re)init CS0 MOV a1, pc CMP a1, #CS0_SDRAM BHS %FT10 ; do_sdrc_init(CS0,EARLY_INIT) ; First we reset SDRC MOV a1, #2 STR a1, [v1, #SDRC_SYSCONFIG] ; SOFTRESET MOV a2, #12*1024*1024 20 LDR a1, [v1, #SDRC_SYSSTATUS] TST a1, #1 BNE %FT30 SUBS a2, a2, #1 BNE %BT20 30 MOV a1, #0 STR a1, [v1, #SDRC_SYSCONFIG] ; Setup SDRC to ball mux. Or something. MOV a1, #&100 STR a1, [v1, #SDRC_SHARING] ; Disable powerdown via CKE MOV a1, #&81 STR a1, [v1, #SDRC_POWER_REG] ; Enable DLL, 90 degree phase MOV a1, #&A STR a1, [v1, #SDRC_DLLA_CTRL] ; Wait for lock (indefinitely?) 20 LDR a1, [v1, #SDRC_DLLA_STATUS] TST a1, #4 BEQ %BT20 ; Now initialise CS0 LDR a1, =&02584099 ; 128MB, etc. STR a1, [v1, #SDRC_MCFG_0] LDR a1, =&4E201 STR a1, [v1, #SDRC_RFR_CTRL_0] LDR a1, =(21 :SHL: 27) :OR: (10 :SHL: 22) :OR: (7 :SHL: 18) :OR: (3 :SHL: 15) :OR: (3 :SHL: 12) :OR: (2 :SHL: 9) :OR: (3 :SHL: 6) :OR: 6 STR a1, [v1, #SDRC_ACTIM_CTRLA_0] ; 165MHz timings LDR a1, =(1 :SHL: 12) :OR: 23 :OR: (5 :SHL: 8) :OR: (1 :SHL: 16) STR a1, [v1, #SDRC_ACTIM_CTRLB_0] ; NOP, PRECHARGE, AUTOREFRESH, AUTOREFRESH sequence MOV a1, #0 STR a1, [v1, #SDRC_MANUAL_0] MOV a1, #1 STR a1, [v1, #SDRC_MANUAL_0] MOV a1, #2 STR a1, [v1, #SDRC_MANUAL_0] STR a1, [v1, #SDRC_MANUAL_0] ; CAS latency, burst length, etc. MOV a1, #&32 STR a1, [v1, #SDRC_MR_0] ; Now check the setup ; This is the same technique as mem_ok() in u-boot LDR a1, =CS0_SDRAM MOV a2, #0 LDR a3, =&12345678 STR a2, [a1, #&400] STR a3, [a1] STR a2, [a1, #4] LDR a2, [a1, #&400] LDR a4, [a1] CMP a2, #0 CMPEQ a3, a4 BEQ %FT10 ; It's good ; Else it's bad and we need to disable CS0 MOV a1, #0 STR a1, [v1, #SDRC_MCFG_0] 10 ; Check if CS1 is active ; If not, and we're a rev C beagleboard, activate it LDR a1, [v1, #SDRC_MCFG_1] LDR a2, =&3FF00 TST a1, a2 BNE %FT10 ; Beagleboard rev C check: ; Although the manual fails to mention it, apparently GPIO 171 will be low if we're on rev C ; Else we're rev A/B LDR a2, =L4_GPIO6 LDR a1, [a2, #GPIO_OE] ORR a1, a1, #1:SHL:11 STR a1, [a2, #GPIO_OE] ; Wait a bit just to make sure the input is up to date MOV a1, #32768 5 SUBS a1, a1, #1 BNE %BT5 LDR a1, [a2, #GPIO_DATAIN] TST a1, #1:SHL:11 BNE %FT10 ; Make CS0 and CS1 contiguous, in case it helps RISC OS a bit (e.g. finding large areas of contiguous physical memory for IO) LDR a1, [v1, #SDRC_MCFG_0] LDR a2, =&3FF<<1 ANDS a1, a2, a1, LSR #7 ; CS0 size in MB BEQ %FT5 ; If no CS0, just program offset of 0 CMP a1, #32 MOVLT a1, #32 ; Min 32MB offset if CS0 present ; Round up to power of two SUB a2, a1, #1 TST a2, a1 CLZNE a2, a1 MOVNE a1, #1 MOVNE a1, a1, ROR a2 5 AND a2, a1, #&60 ; 32MB offset MOV a1, a1, LSR #7 ; 128MB offset ORR a1, a1, a2, LSL #3 STR a1, [v1, #SDRC_CS_CFG] ; Initialise CS1 LDR a1, =&02584099 ; 128MB, etc. STR a1, [v1, #SDRC_MCFG_1] LDR a1, =&4E201 STR a1, [v1, #SDRC_RFR_CTRL_1] LDR a1, =(21 :SHL: 27) :OR: (10 :SHL: 22) :OR: (7 :SHL: 18) :OR: (3 :SHL: 15) :OR: (3 :SHL: 12) :OR: (2 :SHL: 9) :OR: (3 :SHL: 6) :OR: 6 STR a1, [v1, #SDRC_ACTIM_CTRLA_1] ; 165MHz timings LDR a1, =(1 :SHL: 12) :OR: 23 :OR: (5 :SHL: 8) :OR: (1 :SHL: 16) STR a1, [v1, #SDRC_ACTIM_CTRLB_1] ; NOP, PRECHARGE, AUTOREFRESH, AUTOREFRESH sequence MOV a1, #0 STR a1, [v1, #SDRC_MANUAL_1] MOV a1, #1 STR a1, [v1, #SDRC_MANUAL_1] MOV a1, #2 STR a1, [v1, #SDRC_MANUAL_1] STR a1, [v1, #SDRC_MANUAL_1] ; CAS latency, burst length, etc. MOV a1, #&32 STR a1, [v1, #SDRC_MR_1] ; Now check the setup LDR a1, =CS0_SDRAM LDR a4, [v1, #SDRC_CS_CFG] AND a3, a4, #7 ; Offset in 128MB units ADD a1, a1, a3, LSL #20+7 AND a3, a4, #&300 ; Offset in 32MB units ADD a1, a1, a3, LSL #20+5-8 ; a1 = CS1 start MOV a2, #0 LDR a3, =&12345678 STR a2, [a1, #&400] STR a3, [a1] STR a2, [a1, #4] LDR a2, [a1, #&400] LDR a4, [a1] CMP a2, #0 CMPEQ a3, a4 BEQ %FT10 ; It's good ; Else it's bad and we need to disable CS1 MOV a1, #0 STR a1, [v1, #SDRC_MCFG_1] 10 ] ; Done! MOV pc, lr ; a1 <= Highest physical address in RAM +1 get_end_of_ram LDR v1, =SDRC_Regs LDR a3, [v1, #SDRC_MCFG_1] LDR a2, =&3FF00<<13 ANDS a3, a2, a3, LSL #13 BEQ %FT10 LDR a2, =CS0_SDRAM LDR a4, [v1, #SDRC_CS_CFG] AND v2, a4, #7 ; Offset in 128MB units ADD a2, a2, v2, LSL #20+7 AND v2, a4, #&300 ; Offset in 32MB units ADD a2, a2, v2, LSL #20+5-8 ADD a1, a3, a2 MOV pc, lr 10 ; No RAM in CS1; therefore must be in CS0 LDR a3, [v1, #SDRC_MCFG_0] LDR a2, =&3FF00<<13 AND a3, a2, a3, LSL #13 ; Get CS0 RAM size LDR a2, =CS0_SDRAM ADD a1, a3, a2 MOV pc, lr clear_ram ; Clear everything up to a1 ; Can clobber all regs except v8 & sb ; If DMA clear is disabled, the clear areas must be multiples of 128 bytes in length. Else they must be multiples of 4 bytes. [ Use_DMA_Clear ; Reset the DMA controller LDR v5, =L4_sDMA MOV v1, #2 STR v1, [v5, #DMA4_OCP_SYSCONFIG] 5 LDR v1, [v5, #DMA4_SYSSTATUS] TST v1, #1 BEQ %BT5 ; Set a sensible FIFO budget (as per SDMACReset) LDR a2, =&100080 STR a2, [v5, #DMA4_GCR] ; Configure channel 0 for the right settings ADD v5, v5, #DMA4_i LDR v1, =&1014000 ; Constant fill, post-increment destination, source synchronised STR v1, [v5, #DMA4_CCRi] MOV v1, #0 STR v1, [v5, #DMA4_CLNK_CTRLi] ; Disable channel linking STR v1, [v5, #DMA4_COLORi] ; Clear colour of 0. Although the clear colour register only holds a 24 bit value, the MSB (for 4-byte DMA) is always written as 0. MOV v1, #1<<4 STR v1, [v5, #DMA4_CICRi] ; frame end interrupt enabled LDR v1, =&2C002 ; 32bit elements, 64 byte bursts, last write non-posted STR v1, [v5, #DMA4_CSDPi] MOV v1, #1 STR v1, [v5, #DMA4_CFNi] ; 1 frame ] MOV v2, a1 LDR v1, =SDRC_Regs LDR a3, [v1, #SDRC_MCFG_0] LDR a2, =&3FF00<<13 ANDS a3, a2, a3, LSL #13 ; Get CS0 RAM size BEQ %FT10 LDR a2, =CS0_SDRAM SUB a1, v2, a2 CMP a1, a3 MOVGT a1, a3 ; Work out how much we're meant to be clearing CMP a1, #0 BEQ %FT10 [ Use_DMA_Clear ; To keep things simple we split the transfer into chunks small enough to fit inside one frame (64MB) and wait for each one to complete ; This means we don't have to worry about the code breaking if the clear area isn't MB aligned (or 128 byte aligned, as the original code assumed) MOV a1, a1, LSR #2 ; Number of elements remaining MOV a3, #&1000000 ; Max elements per transfer+1 (not quite 64MB!) 40 LDR v1, [v5, #DMA4_CSRi] STR v1, [v5, #DMA4_CSRi] ; Clear status register CMP a1, a3 MOVLT v1, a1 SUBGE v1, a3, #1 STR v1, [v5, #DMA4_CENi] SUB a1, a1, v1 STR a2, [v5, #DMA4_CDSAi] ADD a2, a2, v1, LSL #2 LDR a4, =&1014080 STR a4, [v5, #DMA4_CCRi] ; Enable channel ; Use the transfer size as a rough timer for how long we should wait before we start hammering the status register 50 SUBS v1, v1, #256 BGT %BT50 50 LDR v1, [v5, #DMA4_CSRi] TST v1, #1<<4 BEQ %BT50 ; Make doubly sure that it's finished by checking WR_ACTIVE/RD_ACTIVE 50 LDR v1, [v5, #DMA4_CCRi] TST v1, #&600 BNE %BT50 CMP a1, #0 BNE %BT40 | MOV a3, #0 MOV a4, #0 MOV v1, #0 MOV v3, #0 MOV v4, #0 MOV v5, #0 MOV sp, #0 MOV ip, #0 20 STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 32 bytes STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 64 bytes STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 96 bytes STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 128 bytes SUBS a1, a1, #128 BGT %BT20 ] 10 LDR v1, =SDRC_Regs LDR a3, [v1, #SDRC_MCFG_1] LDR a2, =&3FF00<<13 ANDS a3, a2, a3, LSL #13 BEQ %FT30 LDR a2, =CS0_SDRAM LDR a4, [v1, #SDRC_CS_CFG] AND v3, a4, #7 ; Offset in 128MB units ADD a2, a2, v3, LSL #20+7 AND v3, a4, #&300 ; Offset in 32MB units ADD a2, a2, v3, LSL #20+5-8 SUB a1, v2, a2 CMP a1, a3 MOVGT a1, a3 ; Work out how much we're meant to be clearing CMP a1, #0 BEQ %FT30 [ Use_DMA_Clear ; To keep things simple we split the transfer into chunks small enough to fit inside one frame (64MB) and wait for each one to complete ; This means we don't have to worry about the code breaking if the clear area isn't MB aligned (or 128 byte aligned, as the original code assumed) MOV a1, a1, LSR #2 ; Number of elements remaining MOV a3, #&1000000 ; Max elements per transfer+1 (not quite 64MB!) 40 LDR v1, [v5, #DMA4_CSRi] STR v1, [v5, #DMA4_CSRi] ; Clear status register CMP a1, a3 MOVLT v1, a1 SUBGE v1, a3, #1 STR v1, [v5, #DMA4_CENi] SUB a1, a1, v1 STR a2, [v5, #DMA4_CDSAi] ADD a2, a2, v1, LSL #2 LDR a4, =&1014080 STR a4, [v5, #DMA4_CCRi] ; Enable channel ; Use the transfer size as a rough timer for how long we should wait before we start hammering the status register 50 SUBS v1, v1, #256 BGT %BT50 50 LDR v1, [v5, #DMA4_CSRi] TST v1, #1<<4 BEQ %BT50 ; Make doubly sure that it's finished by checking WR_ACTIVE/RD_ACTIVE 50 LDR v1, [v5, #DMA4_CCRi] TST v1, #&600 BNE %BT50 CMP a1, #0 BNE %BT40 | MOV a3, #0 MOV a4, #0 MOV v1, #0 MOV v3, #0 MOV v4, #0 MOV v5, #0 MOV sp, #0 MOV ip, #0 40 STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 32 bytes STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 64 bytes STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 96 bytes STMIA a2!,{a3,a4,v1,v3,v4,v5,sp,ip} ; 128 bytes SUBS a1, a1, #128 BGT %BT40 ] 30 [ Use_DMA_Clear ; Invalidate the I-cache & BTC, just in case (D cache & L2 cache should be turned off, so no need to worry about them) MOV a1, #0 MCR p15, 0, a1, c7, c5, 0 MCR p15, 0, a1, c7, c5, 6 myDSB myISB ] MOV pc, lr END