1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm/boot/compressed/head.S
5 * Copyright (C) 1996-2002 Russell King
6 * Copyright (C) 2004 Hyok S. Choi (MPU support)
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
12 #include "efi-header.S"
15 #define OF_DT_MAGIC 0xd00dfeed
17 #define OF_DT_MAGIC 0xedfe0dd0
20 AR_CLASS( .arch armv7-a )
21 M_CLASS( .arch armv7-m )
26 * Note that these macros must not contain any code which is not
27 * 100% relocatable. Any attempt to do so will result in a crash.
28 * Please select one of the following when turning on debugging.
32 #if defined(CONFIG_DEBUG_ICEDCC)
34 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
35 .macro loadsp, rb, tmp1, tmp2
37 .macro writeb, ch, rb, tmp
38 mcr p14, 0, \ch, c0, c5, 0
40 #elif defined(CONFIG_CPU_XSCALE)
41 .macro loadsp, rb, tmp1, tmp2
43 .macro writeb, ch, rb, tmp
44 mcr p14, 0, \ch, c8, c0, 0
47 .macro loadsp, rb, tmp1, tmp2
49 .macro writeb, ch, rb, tmp
50 mcr p14, 0, \ch, c1, c0, 0
56 #include CONFIG_DEBUG_LL_INCLUDE
58 .macro writeb, ch, rb, tmp
59 #ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
62 waituarttxrdy \tmp, \rb
67 #if defined(CONFIG_ARCH_SA1100)
68 .macro loadsp, rb, tmp1, tmp2
69 mov \rb, #0x80000000 @ physical base address
70 #ifdef CONFIG_DEBUG_LL_SER3
71 add \rb, \rb, #0x00050000 @ Ser3
73 add \rb, \rb, #0x00010000 @ Ser1
77 .macro loadsp, rb, tmp1, tmp2
78 addruart \rb, \tmp1, \tmp2
96 * Debug kernel copy by printing the memory addresses involved
98 .macro dbgkc, begin, end, cbegin, cend
104 kphex \begin, 8 /* Start of compressed kernel */
108 kphex \end, 8 /* End of compressed kernel */
113 kphex \cbegin, 8 /* Start of kernel copy */
117 kphex \cend, 8 /* End of kernel copy */
123 * Debug print of the final appended DTB location
125 .macro dbgadtb, begin, size
133 kphex \begin, 8 /* Start of appended DTB */
138 kphex \size, 8 /* Size of appended DTB */
144 .macro enable_cp15_barriers, reg
145 mrc p15, 0, \reg, c1, c0, 0 @ read SCTLR
146 tst \reg, #(1 << 5) @ CP15BEN bit set?
148 orr \reg, \reg, #(1 << 5) @ CP15 barrier instructions
149 mcr p15, 0, \reg, c1, c0, 0 @ write SCTLR
150 ARM( .inst 0xf57ff06f @ v7+ isb )
156 * The kernel build system appends the size of the
157 * decompressed kernel at the end of the compressed data
158 * in little-endian form.
160 .macro get_inflated_image_size, res:req, tmp1:req, tmp2:req
161 adr \res, .Linflated_image_size_offset
163 add \tmp1, \tmp1, \res @ address of inflated image size
165 ldrb \res, [\tmp1] @ get_unaligned_le32
166 ldrb \tmp2, [\tmp1, #1]
167 orr \res, \res, \tmp2, lsl #8
168 ldrb \tmp2, [\tmp1, #2]
169 ldrb \tmp1, [\tmp1, #3]
170 orr \res, \res, \tmp2, lsl #16
171 orr \res, \res, \tmp1, lsl #24
174 .macro be32tocpu, val, tmp
176 /* convert to little endian */
181 .section ".start", "ax"
183 * sort out different calling conventions
187 * Always enter in ARM state for CPUs that support the ARM ISA.
188 * As of today (2014) that's exactly the members of the A and R
193 .type start,#function
195 * These 7 nops along with the 1 nop immediately below for
196 * !THUMB2 form 8 nops that make the compressed kernel bootable
197 * on legacy ARM systems that were assuming the kernel in a.out
198 * binary format. The boot loaders on these systems would
199 * jump 32 bytes into the image to skip the a.out header.
200 * with these 8 nops filling exactly 32 bytes, things still
201 * work as expected on these legacy systems. Thumb2 mode keeps
202 * 7 of the nops as it turns out that some boot loaders
203 * were patching the initial instructions of the kernel, i.e
204 * had started to exploit this "patch area".
210 #ifndef CONFIG_THUMB2_KERNEL
213 AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode
214 M_CLASS( nop.w ) @ M: already in Thumb2 mode
219 .word _magic_sig @ Magic numbers to help the loader
220 .word _magic_start @ absolute load/run zImage address
221 .word _magic_end @ zImage end address
222 .word 0x04030201 @ endianness flag
223 .word 0x45454545 @ another magic number to indicate
224 .word _magic_table @ additional data table
228 ARM_BE8( setend be ) @ go BE8 if compiled for BE8
229 AR_CLASS( mrs r9, cpsr )
230 #ifdef CONFIG_ARM_VIRT_EXT
231 bl __hyp_stub_install @ get into SVC mode, reversibly
233 mov r7, r1 @ save architecture ID
234 mov r8, r2 @ save atags pointer
236 #ifndef CONFIG_CPU_V7M
238 * Booting from Angel - need to enter SVC mode and disable
239 * FIQs/IRQs (numeric definitions from angel arm.h source).
240 * We only do this if we were in user mode on entry.
242 mrs r2, cpsr @ get current mode
243 tst r2, #3 @ not user?
245 mov r0, #0x17 @ angel_SWIreason_EnterSVC
246 ARM( swi 0x123456 ) @ angel_SWI_ARM
247 THUMB( svc 0xab ) @ angel_SWI_THUMB
249 safe_svcmode_maskall r0
250 msr spsr_cxsf, r9 @ Save the CPU boot mode in
254 * Note that some cache flushing and other stuff may
255 * be needed here - is there an Angel SWI call for this?
259 * some architecture specific code can be inserted
260 * by the linker here, but it should preserve r7, r8, and r9.
265 #ifdef CONFIG_AUTO_ZRELADDR
267 * Find the start of physical memory. As we are executing
268 * without the MMU on, we are in the physical address space.
269 * We just need to get rid of any offset by aligning the
272 * This alignment is a balance between the requirements of
273 * different platforms - we have chosen 128MB to allow
274 * platforms which align the start of their physical memory
275 * to 128MB to use this feature, while allowing the zImage
276 * to be placed within the first 128MB of memory on other
277 * platforms. Increasing the alignment means we place
278 * stricter alignment requirements on the start of physical
279 * memory, but relaxing it means that we break people who
280 * are already placing their zImage in (eg) the top 64MB
284 and r0, r0, #0xf8000000
287 #ifdef CONFIG_ARM_APPENDED_DTB
289 * Look for an appended DTB. If found, we cannot use it to
290 * validate the calculated start of physical memory, as its
291 * memory nodes may need to be augmented by ATAGS stored at
292 * an offset from the same start of physical memory.
294 ldr r2, [r1, #4] @ get &_edata
295 add r2, r2, r1 @ relocate it
296 ldr r2, [r2] @ get DTB signature
298 cmp r2, r3 @ do we have a DTB there?
299 beq 1f @ if yes, skip validation
300 #endif /* CONFIG_ARM_APPENDED_DTB */
303 * Make sure we have some stack before calling C code.
304 * No GOT fixup has occurred yet, but none of the code we're
305 * about to call uses any global variables.
307 ldr sp, [r1] @ get stack location
308 add sp, sp, r1 @ apply relocation
310 /* Validate calculated start against passed DTB */
312 bl fdt_check_mem_start
314 #endif /* CONFIG_USE_OF */
315 /* Determine final kernel image address. */
316 add r4, r0, #TEXT_OFFSET
322 * Set up a page table only if it won't overwrite ourself.
323 * That means r4 < pc || r4 - 16k page directory > &_end.
324 * Given that r4 > &_end is most unfrequent, we add a rough
325 * additional 1MB of room for a possible appended DTB.
332 orrcc r4, r4, #1 @ remember we skipped cache_on
341 get_inflated_image_size r9, r10, lr
343 #ifndef CONFIG_ZBOOT_ROM
344 /* malloc space is above the relocated stack (64k max) */
345 add r10, sp, #MALLOC_SIZE
348 * With ZBOOT_ROM the bss/stack is non relocatable,
349 * but someone could still run this code from RAM,
350 * in which case our reference is _edata.
355 mov r5, #0 @ init dtb size to 0
356 #ifdef CONFIG_ARM_APPENDED_DTB
358 * r4 = final kernel address (possibly with LSB set)
359 * r5 = appended dtb size (still unknown)
361 * r7 = architecture ID
362 * r8 = atags/device tree pointer
363 * r9 = size of decompressed image
364 * r10 = end of this image, including bss/stack/malloc space if non XIP
367 * if there are device trees (dtb) appended to zImage, advance r10 so that the
368 * dtb data will get relocated along with the kernel if necessary.
374 bne dtb_check_done @ not found
376 #ifdef CONFIG_ARM_ATAG_DTB_COMPAT
378 * OK... Let's do some funky business here.
379 * If we do have a DTB appended to zImage, and we do have
380 * an ATAG list around, we want the later to be translated
381 * and folded into the former here. No GOT fixup has occurred
382 * yet, but none of the code we're about to call uses any
386 /* Get the initial DTB size */
390 /* 50% DTB growth should be good enough */
391 add r5, r5, r5, lsr #1
392 /* preserve 64-bit alignment */
395 /* clamp to 32KB min and 1MB max */
400 /* temporarily relocate the stack past the DTB work space */
409 * If returned value is 1, there is no ATAG at the location
410 * pointed by r8. Try the typical 0x100 offset from start
411 * of RAM and hope for the best.
414 sub r0, r4, #TEXT_OFFSET
424 mov r8, r6 @ use the appended device tree
427 * Make sure that the DTB doesn't end up in the final
428 * kernel's .bss area. To do so, we adjust the decompressed
429 * kernel size to compensate if that .bss size is larger
430 * than the relocated code.
432 ldr r5, =_kernel_bss_size
433 adr r1, wont_overwrite
438 /* Get the current DTB size */
442 /* preserve 64-bit alignment */
446 /* relocate some pointers past the appended dtb */
454 * Check to see if we will overwrite ourselves.
455 * r4 = final kernel address (possibly with LSB set)
456 * r9 = size of decompressed image
457 * r10 = end of this image, including bss/stack/malloc space if non XIP
459 * r4 - 16k page directory >= r10 -> OK
460 * r4 + image length <= address of wont_overwrite -> OK
461 * Note: the possible LSB in r4 is harmless here.
467 adr r9, wont_overwrite
472 * Relocate ourselves past the end of the decompressed kernel.
474 * r10 = end of the decompressed kernel
475 * Because we always copy ahead, we need to do it from the end and go
476 * backward in case the source and destination overlap.
479 * Bump to the next 256-byte boundary with the size of
480 * the relocation code added. This avoids overwriting
481 * ourself when the offset is small.
483 add r10, r10, #((reloc_code_end - restart + 256) & ~255)
486 /* Get start of code we want to copy and align it down. */
490 /* Relocate the hyp vector base if necessary */
491 #ifdef CONFIG_ARM_VIRT_EXT
493 and r0, r0, #MODE_MASK
498 * Compute the address of the hyp vectors after relocation.
499 * Call __hyp_set_vectors with the new address so that we
500 * can HVC again after the copy.
502 adr_l r0, __hyp_stub_vectors
509 sub r9, r6, r5 @ size to copy
510 add r9, r9, #31 @ rounded up to a multiple
511 bic r9, r9, #31 @ ... of 32 bytes
519 * We are about to copy the kernel to a new memory area.
520 * The boundaries of the new memory area can be found in
521 * r10 and r9, whilst r5 and r6 contain the boundaries
522 * of the memory we are going to copy.
523 * Calling dbgkc will help with the printing of this
526 dbgkc r5, r6, r10, r9
529 1: ldmdb r6!, {r0 - r3, r10 - r12, lr}
531 stmdb r9!, {r0 - r3, r10 - r12, lr}
534 /* Preserve offset to relocated code. */
537 mov r0, r9 @ start of relocated zImage
538 add r1, sp, r6 @ end of relocated zImage
547 ldmia r0, {r1, r2, r3, r11, r12}
548 sub r0, r0, r1 @ calculate the delta offset
551 * If delta is zero, we are running at the address we were linked at.
555 * r4 = kernel execution address (possibly with LSB set)
556 * r5 = appended dtb size (0 if not present)
557 * r7 = architecture ID
569 #ifndef CONFIG_ZBOOT_ROM
571 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
572 * we need to fix up pointers into the BSS region.
573 * Note that the stack pointer has already been fixed up.
579 * Relocate all entries in the GOT table.
580 * Bump bss entries to _edata + dtb size
582 1: ldr r1, [r11, #0] @ relocate entries in the GOT
583 add r1, r1, r0 @ This fixes up C references
584 cmp r1, r2 @ if entry >= bss_start &&
585 cmphs r3, r1 @ bss_end > entry
586 addhi r1, r1, r5 @ entry += dtb size
587 str r1, [r11], #4 @ next entry
591 /* bump our bss pointers too */
598 * Relocate entries in the GOT table. We only relocate
599 * the entries that are outside the (relocated) BSS region.
601 1: ldr r1, [r11, #0] @ relocate entries in the GOT
602 cmp r1, r2 @ entry < bss_start ||
603 cmphs r3, r1 @ _end < entry
604 addlo r1, r1, r0 @ table. This fixes up the
605 str r1, [r11], #4 @ C references.
610 not_relocated: mov r0, #0
611 1: str r0, [r2], #4 @ clear bss
619 * Did we skip the cache setup earlier?
620 * That is indicated by the LSB in r4.
628 * The C runtime environment should now be setup sufficiently.
629 * Set up some pointers, and start decompressing.
630 * r4 = kernel execution address
631 * r7 = architecture ID
635 mov r1, sp @ malloc space above stack
636 add r2, sp, #MALLOC_SIZE @ 64k max
640 get_inflated_image_size r1, r2, r3
642 mov r0, r4 @ start of inflated image
643 add r1, r1, r0 @ end of inflated image
647 #ifdef CONFIG_ARM_VIRT_EXT
648 mrs r0, spsr @ Get saved CPU boot mode
649 and r0, r0, #MODE_MASK
650 cmp r0, #HYP_MODE @ if not booted in HYP mode...
651 bne __enter_kernel @ boot kernel directly
653 adr_l r0, __hyp_reentry_vectors
655 __HVC(0) @ otherwise bounce to hyp mode
657 b . @ should never be reached
665 .word __bss_start @ r2
667 .word _got_start @ r11
672 LC1: .word .L_user_stack_end - LC1 @ sp
673 .word _edata - LC1 @ r6
677 .word _end - restart + 16384 + 1024*1024
679 .Linflated_image_size_offset:
680 .long (input_data_end - 4) - .
682 #ifdef CONFIG_ARCH_RPC
684 params: ldr r0, =0x10000100 @ params_phys for RPC
691 * dcache_line_size - get the minimum D-cache line size from the CTR register
694 .macro dcache_line_size, reg, tmp
695 #ifdef CONFIG_CPU_V7M
696 movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
697 movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
700 mrc p15, 0, \tmp, c0, c0, 1 @ read ctr
703 and \tmp, \tmp, #0xf @ cache line size encoding
704 mov \reg, #4 @ bytes per word
705 mov \reg, \reg, lsl \tmp @ actual cache line size
709 * Turn on the cache. We need to setup some page tables so that we
710 * can have both the I and D caches on.
712 * We place the page tables 16k down from the kernel execution address,
713 * and we hope that nothing else is using it. If we're using it, we
717 * r4 = kernel execution address
718 * r7 = architecture number
721 * r0, r1, r2, r3, r9, r10, r12 corrupted
722 * This routine must preserve:
726 cache_on: mov r3, #8 @ cache_on function
730 * Initialize the highest priority protection region, PR7
731 * to cover all 32bit address and cacheable and bufferable.
733 __armv4_mpu_cache_on:
734 mov r0, #0x3f @ 4G, the whole
735 mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting
736 mcr p15, 0, r0, c6, c7, 1
739 mcr p15, 0, r0, c2, c0, 0 @ D-cache on
740 mcr p15, 0, r0, c2, c0, 1 @ I-cache on
741 mcr p15, 0, r0, c3, c0, 0 @ write-buffer on
744 mcr p15, 0, r0, c5, c0, 1 @ I-access permission
745 mcr p15, 0, r0, c5, c0, 0 @ D-access permission
748 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
749 mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache
750 mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache
751 mrc p15, 0, r0, c1, c0, 0 @ read control reg
752 @ ...I .... ..D. WC.M
753 orr r0, r0, #0x002d @ .... .... ..1. 11.1
754 orr r0, r0, #0x1000 @ ...1 .... .... ....
756 mcr p15, 0, r0, c1, c0, 0 @ write control reg
759 mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache
760 mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache
763 __armv3_mpu_cache_on:
764 mov r0, #0x3f @ 4G, the whole
765 mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting
768 mcr p15, 0, r0, c2, c0, 0 @ cache on
769 mcr p15, 0, r0, c3, c0, 0 @ write-buffer on
772 mcr p15, 0, r0, c5, c0, 0 @ access permission
775 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
777 * ?? ARMv3 MMU does not allow reading the control register,
778 * does this really work on ARMv3 MPU?
780 mrc p15, 0, r0, c1, c0, 0 @ read control reg
781 @ .... .... .... WC.M
782 orr r0, r0, #0x000d @ .... .... .... 11.1
783 /* ?? this overwrites the value constructed above? */
785 mcr p15, 0, r0, c1, c0, 0 @ write control reg
787 /* ?? invalidate for the second time? */
788 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
791 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
797 __setup_mmu: sub r3, r4, #16384 @ Page directory size
798 bic r3, r3, #0xff @ Align the pointer
801 * Initialise the page tables, turning on the cacheable and bufferable
802 * bits for the RAM area only.
806 mov r9, r9, lsl #18 @ start of RAM
807 add r10, r9, #0x10000000 @ a reasonable RAM size
808 mov r1, #0x12 @ XN|U + section mapping
809 orr r1, r1, #3 << 10 @ AP=11
811 1: cmp r1, r9 @ if virt > start of RAM
812 cmphs r10, r1 @ && end of RAM > virt
813 bic r1, r1, #0x1c @ clear XN|U + C + B
814 orrlo r1, r1, #0x10 @ Set XN|U for non-RAM
815 orrhs r1, r1, r6 @ set RAM section settings
816 str r1, [r0], #4 @ 1:1 mapping
821 * If ever we are running from Flash, then we surely want the cache
822 * to be enabled also for our execution instance... We map 2MB of it
823 * so there is no map overlap problem for up to 1 MB compressed kernel.
824 * If the execution is in RAM then we would only be duplicating the above.
826 orr r1, r6, #0x04 @ ensure B is set for this
830 orr r1, r1, r2, lsl #20
831 add r0, r3, r2, lsl #2
838 @ Enable unaligned access on v6, to allow better code generation
839 @ for the decompressor C code:
840 __armv6_mmu_cache_on:
841 mrc p15, 0, r0, c1, c0, 0 @ read SCTLR
842 bic r0, r0, #2 @ A (no unaligned access fault)
843 orr r0, r0, #1 << 22 @ U (v6 unaligned access model)
844 mcr p15, 0, r0, c1, c0, 0 @ write SCTLR
845 b __armv4_mmu_cache_on
847 __arm926ejs_mmu_cache_on:
848 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
849 mov r0, #4 @ put dcache in WT mode
850 mcr p15, 7, r0, c15, c0, 0
853 __armv4_mmu_cache_on:
856 mov r6, #CB_BITS | 0x12 @ U
859 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
860 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
861 mrc p15, 0, r0, c1, c0, 0 @ read control reg
862 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
864 ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
865 bl __common_mmu_cache_on
867 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
871 __armv7_mmu_cache_on:
872 enable_cp15_barriers r11
875 mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0
877 movne r6, #CB_BITS | 0x02 @ !XN
880 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
882 mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
884 mrc p15, 0, r0, c1, c0, 0 @ read control reg
885 bic r0, r0, #1 << 28 @ clear SCTLR.TRE
886 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
887 orr r0, r0, #0x003c @ write buffer
888 bic r0, r0, #2 @ A (no unaligned access fault)
889 orr r0, r0, #1 << 22 @ U (v6 unaligned access model)
890 @ (needed for ARM1176)
892 ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
893 mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg
894 orrne r0, r0, #1 @ MMU enabled
895 movne r1, #0xfffffffd @ domain 0 = client
896 bic r6, r6, #1 << 31 @ 32-bit translation system
897 bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0
898 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
899 mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
900 mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
902 mcr p15, 0, r0, c7, c5, 4 @ ISB
903 mcr p15, 0, r0, c1, c0, 0 @ load control register
904 mrc p15, 0, r0, c1, c0, 0 @ and read it back
906 mcr p15, 0, r0, c7, c5, 4 @ ISB
911 mov r6, #CB_BITS | 0x12 @ U
914 mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache
915 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
916 mcr p15, 0, r0, c8, c7, 0 @ flush UTLB
917 mrc p15, 0, r0, c1, c0, 0 @ read control reg
918 orr r0, r0, #0x1000 @ I-cache enable
919 bl __common_mmu_cache_on
921 mcr p15, 0, r0, c8, c7, 0 @ flush UTLB
924 __common_mmu_cache_on:
925 #ifndef CONFIG_THUMB2_KERNEL
927 orr r0, r0, #0x000d @ Write buffer, mmu
930 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
931 mcr p15, 0, r1, c3, c0, 0 @ load domain access control
933 .align 5 @ cache line aligned
934 1: mcr p15, 0, r0, c1, c0, 0 @ load control register
935 mrc p15, 0, r0, c1, c0, 0 @ and read it back to
936 sub pc, lr, r0, lsr #32 @ properly flush pipeline
939 #define PROC_ENTRY_SIZE (4*5)
942 * Here follow the relocatable cache support functions for the
943 * various processors. This is a generic hook for locating an
944 * entry and jumping to an instruction at the specified offset
945 * from the start of the block. Please note this is all position
955 call_cache_fn: adr r12, proc_types
956 #ifdef CONFIG_CPU_CP15
957 mrc p15, 0, r9, c0, c0 @ get processor ID
958 #elif defined(CONFIG_CPU_V7M)
960 * On v7-M the processor id is located in the V7M_SCB_CPUID
961 * register, but as cache handling is IMPLEMENTATION DEFINED on
962 * v7-M (if existant at all) we just return early here.
963 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
964 * __armv7_mmu_cache_{on,off,flush}) would be selected which
965 * use cp15 registers that are not implemented on v7-M.
969 ldr r9, =CONFIG_PROCESSOR_ID
971 1: ldr r1, [r12, #0] @ get value
972 ldr r2, [r12, #4] @ get mask
973 eor r1, r1, r9 @ (real ^ match)
975 ARM( addeq pc, r12, r3 ) @ call cache function
976 THUMB( addeq r12, r3 )
977 THUMB( moveq pc, r12 ) @ call cache function
978 add r12, r12, #PROC_ENTRY_SIZE
982 * Table for cache operations. This is basically:
985 * - 'cache on' method instruction
986 * - 'cache off' method instruction
987 * - 'cache flush' method instruction
989 * We match an entry using: ((real_id ^ match) & mask) == 0
991 * Writethrough caches generally only need 'on' and 'off'
992 * methods. Writeback caches _must_ have the flush method
996 .type proc_types,#object
998 .word 0x41000000 @ old ARM ID
1007 .word 0x41007000 @ ARM7/710
1016 .word 0x41807200 @ ARM720T (writethrough)
1018 W(b) __armv4_mmu_cache_on
1019 W(b) __armv4_mmu_cache_off
1023 .word 0x41007400 @ ARM74x
1025 W(b) __armv3_mpu_cache_on
1026 W(b) __armv3_mpu_cache_off
1027 W(b) __armv3_mpu_cache_flush
1029 .word 0x41009400 @ ARM94x
1031 W(b) __armv4_mpu_cache_on
1032 W(b) __armv4_mpu_cache_off
1033 W(b) __armv4_mpu_cache_flush
1035 .word 0x41069260 @ ARM926EJ-S (v5TEJ)
1037 W(b) __arm926ejs_mmu_cache_on
1038 W(b) __armv4_mmu_cache_off
1039 W(b) __armv5tej_mmu_cache_flush
1041 .word 0x00007000 @ ARM7 IDs
1050 @ Everything from here on will be the new ID system.
1052 .word 0x4401a100 @ sa110 / sa1100
1054 W(b) __armv4_mmu_cache_on
1055 W(b) __armv4_mmu_cache_off
1056 W(b) __armv4_mmu_cache_flush
1058 .word 0x6901b110 @ sa1110
1060 W(b) __armv4_mmu_cache_on
1061 W(b) __armv4_mmu_cache_off
1062 W(b) __armv4_mmu_cache_flush
1065 .word 0xffffff00 @ PXA9xx
1066 W(b) __armv4_mmu_cache_on
1067 W(b) __armv4_mmu_cache_off
1068 W(b) __armv4_mmu_cache_flush
1070 .word 0x56158000 @ PXA168
1072 W(b) __armv4_mmu_cache_on
1073 W(b) __armv4_mmu_cache_off
1074 W(b) __armv5tej_mmu_cache_flush
1076 .word 0x56050000 @ Feroceon
1078 W(b) __armv4_mmu_cache_on
1079 W(b) __armv4_mmu_cache_off
1080 W(b) __armv5tej_mmu_cache_flush
1082 #ifdef CONFIG_CPU_FEROCEON_OLD_ID
1083 /* this conflicts with the standard ARMv5TE entry */
1084 .long 0x41009260 @ Old Feroceon
1086 b __armv4_mmu_cache_on
1087 b __armv4_mmu_cache_off
1088 b __armv5tej_mmu_cache_flush
1091 .word 0x66015261 @ FA526
1093 W(b) __fa526_cache_on
1094 W(b) __armv4_mmu_cache_off
1095 W(b) __fa526_cache_flush
1097 @ These match on the architecture ID
1099 .word 0x00020000 @ ARMv4T
1101 W(b) __armv4_mmu_cache_on
1102 W(b) __armv4_mmu_cache_off
1103 W(b) __armv4_mmu_cache_flush
1105 .word 0x00050000 @ ARMv5TE
1107 W(b) __armv4_mmu_cache_on
1108 W(b) __armv4_mmu_cache_off
1109 W(b) __armv4_mmu_cache_flush
1111 .word 0x00060000 @ ARMv5TEJ
1113 W(b) __armv4_mmu_cache_on
1114 W(b) __armv4_mmu_cache_off
1115 W(b) __armv5tej_mmu_cache_flush
1117 .word 0x0007b000 @ ARMv6
1119 W(b) __armv6_mmu_cache_on
1120 W(b) __armv4_mmu_cache_off
1121 W(b) __armv6_mmu_cache_flush
1123 .word 0x000f0000 @ new CPU Id
1125 W(b) __armv7_mmu_cache_on
1126 W(b) __armv7_mmu_cache_off
1127 W(b) __armv7_mmu_cache_flush
1129 .word 0 @ unrecognised type
1138 .size proc_types, . - proc_types
1141 * If you get a "non-constant expression in ".if" statement"
1142 * error from the assembler on this line, check that you have
1143 * not accidentally written a "b" instruction where you should
1144 * have written W(b).
1146 .if (. - proc_types) % PROC_ENTRY_SIZE != 0
1147 .error "The size of one or more proc_types entries is wrong."
1151 * Turn off the Cache and MMU. ARMv3 does not support
1152 * reading the control register, but ARMv4 does.
1155 * r0, r1, r2, r3, r9, r12 corrupted
1156 * This routine must preserve:
1160 cache_off: mov r3, #12 @ cache_off function
1163 __armv4_mpu_cache_off:
1164 mrc p15, 0, r0, c1, c0
1166 mcr p15, 0, r0, c1, c0 @ turn MPU and cache off
1168 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
1169 mcr p15, 0, r0, c7, c6, 0 @ flush D-Cache
1170 mcr p15, 0, r0, c7, c5, 0 @ flush I-Cache
1173 __armv3_mpu_cache_off:
1174 mrc p15, 0, r0, c1, c0
1176 mcr p15, 0, r0, c1, c0, 0 @ turn MPU and cache off
1178 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
1181 __armv4_mmu_cache_off:
1183 mrc p15, 0, r0, c1, c0
1185 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
1187 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
1188 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
1192 __armv7_mmu_cache_off:
1193 mrc p15, 0, r0, c1, c0
1199 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
1202 mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB
1204 mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC
1205 mcr p15, 0, r0, c7, c10, 4 @ DSB
1206 mcr p15, 0, r0, c7, c5, 4 @ ISB
1210 * Clean and flush the cache to maintain consistency.
1213 * r0 = start address
1214 * r1 = end address (exclusive)
1216 * r1, r2, r3, r9, r10, r11, r12 corrupted
1217 * This routine must preserve:
1226 __armv4_mpu_cache_flush:
1231 mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
1232 mov r1, #7 << 5 @ 8 segments
1233 1: orr r3, r1, #63 << 26 @ 64 entries
1234 2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index
1235 subs r3, r3, #1 << 26
1236 bcs 2b @ entries 63 to 0
1237 subs r1, r1, #1 << 5
1238 bcs 1b @ segments 7 to 0
1241 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
1242 mcr p15, 0, ip, c7, c10, 4 @ drain WB
1245 __fa526_cache_flush:
1249 mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache
1250 mcr p15, 0, r1, c7, c5, 0 @ flush I cache
1251 mcr p15, 0, r1, c7, c10, 4 @ drain WB
1254 __armv6_mmu_cache_flush:
1257 mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D
1258 mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB
1259 mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified
1260 mcr p15, 0, r1, c7, c10, 4 @ drain WB
1263 __armv7_mmu_cache_flush:
1264 enable_cp15_barriers r10
1267 mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1
1268 tst r10, #0xf << 16 @ hierarchical cache (ARMv7)
1271 mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
1274 dcache_line_size r1, r2 @ r1 := dcache min line size
1275 sub r2, r1, #1 @ r2 := line size mask
1276 bic r0, r0, r2 @ round down start to line size
1277 sub r11, r11, #1 @ end address is exclusive
1278 bic r11, r11, r2 @ round down end to line size
1279 0: cmp r0, r11 @ finished?
1281 mcr p15, 0, r0, c7, c14, 1 @ Dcache clean/invalidate by VA
1285 mcr p15, 0, r10, c7, c10, 4 @ DSB
1286 mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB
1287 mcr p15, 0, r10, c7, c10, 4 @ DSB
1288 mcr p15, 0, r10, c7, c5, 4 @ ISB
1291 __armv5tej_mmu_cache_flush:
1294 1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache
1296 mcr p15, 0, r0, c7, c5, 0 @ flush I cache
1297 mcr p15, 0, r0, c7, c10, 4 @ drain WB
1300 __armv4_mmu_cache_flush:
1303 mov r2, #64*1024 @ default: 32K dcache size (*2)
1304 mov r11, #32 @ default: 32 byte line size
1305 mrc p15, 0, r3, c0, c0, 1 @ read cache type
1306 teq r3, r9 @ cache ID register present?
1311 mov r2, r2, lsl r1 @ base dcache size *2
1312 tst r3, #1 << 14 @ test M bit
1313 addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1
1317 mov r11, r11, lsl r3 @ cache line size in bytes
1320 bic r1, r1, #63 @ align to longest cache line
1323 ARM( ldr r3, [r1], r11 ) @ s/w flush D cache
1324 THUMB( ldr r3, [r1] ) @ s/w flush D cache
1325 THUMB( add r1, r1, r11 )
1329 mcr p15, 0, r1, c7, c5, 0 @ flush I cache
1330 mcr p15, 0, r1, c7, c6, 0 @ flush D cache
1331 mcr p15, 0, r1, c7, c10, 4 @ drain WB
1334 __armv3_mmu_cache_flush:
1335 __armv3_mpu_cache_flush:
1339 mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3
1343 * Various debugging routines for printing hex characters and
1344 * memory, which again must be relocatable.
1348 .type phexbuf,#object
1350 .size phexbuf, . - phexbuf
1352 @ phex corrupts {r0, r1, r2, r3}
1353 phex: adr r3, phexbuf
1367 @ puts corrupts {r0, r1, r2, r3}
1368 puts: loadsp r3, r2, r1
1369 1: ldrb r2, [r0], #1
1372 2: writeb r2, r3, r1
1382 @ putc corrupts {r0, r1, r2, r3}
1389 @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1390 memdump: mov r12, r0
1393 2: mov r0, r11, lsl #2
1401 ldr r0, [r12, r11, lsl #2]
1421 #ifdef CONFIG_ARM_VIRT_EXT
1423 __hyp_reentry_vectors:
1426 #ifdef CONFIG_EFI_STUB
1427 W(b) __enter_kernel_from_hyp @ hvc from HYP
1433 W(b) __enter_kernel @ hyp
1436 #endif /* CONFIG_ARM_VIRT_EXT */
1439 mov r0, #0 @ must be 0
1440 mov r1, r7 @ restore architecture number
1441 mov r2, r8 @ restore atags pointer
1442 ARM( mov pc, r4 ) @ call kernel
1443 M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class
1444 THUMB( bx r4 ) @ entry point is always ARM for A/R classes
1448 #ifdef CONFIG_EFI_STUB
1449 __enter_kernel_from_hyp:
1450 mrc p15, 4, r0, c1, c0, 0 @ read HSCTLR
1451 bic r0, r0, #0x5 @ disable MMU and caches
1452 mcr p15, 4, r0, c1, c0, 0 @ write HSCTLR
1456 ENTRY(efi_enter_kernel)
1457 mov r4, r0 @ preserve image base
1458 mov r8, r1 @ preserve DT pointer
1460 adr_l r0, call_cache_fn
1461 adr r1, 0f @ clean the region of code we
1462 bl cache_clean_flush @ may run with the MMU off
1464 #ifdef CONFIG_ARM_VIRT_EXT
1466 @ The EFI spec does not support booting on ARM in HYP mode,
1467 @ since it mandates that the MMU and caches are on, with all
1468 @ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1470 @ While the EDK2 reference implementation adheres to this,
1471 @ U-Boot might decide to enter the EFI stub in HYP mode
1472 @ anyway, with the MMU and caches either on or off.
1474 mrs r0, cpsr @ get the current mode
1475 msr spsr_cxsf, r0 @ record boot mode
1476 and r0, r0, #MODE_MASK @ are we running in HYP mode?
1480 mrc p15, 4, r1, c1, c0, 0 @ read HSCTLR
1481 tst r1, #0x1 @ MMU enabled at HYP?
1485 @ When running in HYP mode with the caches on, we're better
1486 @ off just carrying on using the cached 1:1 mapping that the
1487 @ firmware provided. Set up the HYP vectors so HVC instructions
1488 @ issued from HYP mode take us to the correct handler code. We
1489 @ will disable the MMU before jumping to the kernel proper.
1491 ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE
1492 THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE
1493 mcr p15, 4, r1, c1, c0, 0
1494 adr r0, __hyp_reentry_vectors
1495 mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR)
1500 @ When running in HYP mode with the caches off, we need to drop
1501 @ into SVC mode now, and let the decompressor set up its cached
1502 @ 1:1 mapping as usual.
1504 1: mov r9, r4 @ preserve image base
1505 bl __hyp_stub_install @ install HYP stub vectors
1506 safe_svcmode_maskall r1 @ drop to SVC mode
1507 msr spsr_cxsf, r0 @ record boot mode
1508 orr r4, r9, #1 @ restore image base and set LSB
1512 mrc p15, 0, r0, c1, c0, 0 @ read SCTLR
1513 tst r0, #0x1 @ MMU enabled?
1514 orreq r4, r4, #1 @ set LSB if not
1517 mov r0, r8 @ DT start
1518 add r1, r8, r2 @ DT end
1519 bl cache_clean_flush
1521 adr r0, 0f @ switch to our stack
1525 mov r5, #0 @ appended DTB size
1526 mov r7, #0xFFFFFFFF @ machine ID
1528 ENDPROC(efi_enter_kernel)
1529 0: .long .L_user_stack_end - .
1533 .section ".stack", "aw", %nobits
1534 .L_user_stack: .space 4096