2 * Re-map IO memory to kernel address space so that we can access it.
3 * This is needed for high PCI addresses that aren't mapped in the
4 * 640k-1MB IO memory area on PC's
6 * (C) Copyright 1995 1996 Linus Torvalds
9 #include <linux/bootmem.h>
10 #include <linux/init.h>
12 #include <linux/ioport.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mmiotrace.h>
16 #include <linux/mem_encrypt.h>
17 #include <linux/efi.h>
19 #include <asm/set_memory.h>
20 #include <asm/e820/api.h>
21 #include <asm/fixmap.h>
22 #include <asm/pgtable.h>
23 #include <asm/tlbflush.h>
24 #include <asm/pgalloc.h>
26 #include <asm/setup.h>
31 * Fix up the linear direct mapping of the kernel to avoid cache attribute
34 int ioremap_change_attr(unsigned long vaddr, unsigned long size,
35 enum page_cache_mode pcm)
37 unsigned long nrpages = size >> PAGE_SHIFT;
41 case _PAGE_CACHE_MODE_UC:
43 err = _set_memory_uc(vaddr, nrpages);
45 case _PAGE_CACHE_MODE_WC:
46 err = _set_memory_wc(vaddr, nrpages);
48 case _PAGE_CACHE_MODE_WT:
49 err = _set_memory_wt(vaddr, nrpages);
51 case _PAGE_CACHE_MODE_WB:
52 err = _set_memory_wb(vaddr, nrpages);
59 static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
64 for (i = 0; i < nr_pages; ++i)
65 if (pfn_valid(start_pfn + i) &&
66 !PageReserved(pfn_to_page(start_pfn + i)))
73 * Remap an arbitrary physical address space into the kernel virtual
74 * address space. It transparently creates kernel huge I/O mapping when
75 * the physical address is aligned by a huge page size (1GB or 2MB) and
76 * the requested size is at least the huge page size.
78 * NOTE: MTRRs can override PAT memory types with a 4KB granularity.
79 * Therefore, the mapping code falls back to use a smaller page toward 4KB
80 * when a mapping range is covered by non-WB type of MTRRs.
82 * NOTE! We need to allow non-page-aligned mappings too: we will obviously
83 * have to convert them into an offset in a page-aligned mapping, but the
84 * caller shouldn't need to know that small detail.
86 static void __iomem *__ioremap_caller(resource_size_t phys_addr,
87 unsigned long size, enum page_cache_mode pcm, void *caller)
89 unsigned long offset, vaddr;
90 resource_size_t pfn, last_pfn, last_addr;
91 const resource_size_t unaligned_phys_addr = phys_addr;
92 const unsigned long unaligned_size = size;
93 struct vm_struct *area;
94 enum page_cache_mode new_pcm;
97 void __iomem *ret_addr;
99 /* Don't allow wraparound or zero size */
100 last_addr = phys_addr + size - 1;
101 if (!size || last_addr < phys_addr)
104 if (!phys_addr_valid(phys_addr)) {
105 printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
106 (unsigned long long)phys_addr);
112 * Don't allow anybody to remap normal RAM that we're using..
114 pfn = phys_addr >> PAGE_SHIFT;
115 last_pfn = last_addr >> PAGE_SHIFT;
116 if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
117 __ioremap_check_ram) == 1) {
118 WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
119 &phys_addr, &last_addr);
124 * Mappings have to be page-aligned
126 offset = phys_addr & ~PAGE_MASK;
127 phys_addr &= PHYSICAL_PAGE_MASK;
128 size = PAGE_ALIGN(last_addr+1) - phys_addr;
130 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
133 printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
137 if (pcm != new_pcm) {
138 if (!is_new_memtype_allowed(phys_addr, size, pcm, new_pcm)) {
140 "ioremap error for 0x%llx-0x%llx, requested 0x%x, got 0x%x\n",
141 (unsigned long long)phys_addr,
142 (unsigned long long)(phys_addr + size),
144 goto err_free_memtype;
149 prot = PAGE_KERNEL_IO;
151 case _PAGE_CACHE_MODE_UC:
153 prot = __pgprot(pgprot_val(prot) |
154 cachemode2protval(_PAGE_CACHE_MODE_UC));
156 case _PAGE_CACHE_MODE_UC_MINUS:
157 prot = __pgprot(pgprot_val(prot) |
158 cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
160 case _PAGE_CACHE_MODE_WC:
161 prot = __pgprot(pgprot_val(prot) |
162 cachemode2protval(_PAGE_CACHE_MODE_WC));
164 case _PAGE_CACHE_MODE_WT:
165 prot = __pgprot(pgprot_val(prot) |
166 cachemode2protval(_PAGE_CACHE_MODE_WT));
168 case _PAGE_CACHE_MODE_WB:
175 area = get_vm_area_caller(size, VM_IOREMAP, caller);
177 goto err_free_memtype;
178 area->phys_addr = phys_addr;
179 vaddr = (unsigned long) area->addr;
181 if (kernel_map_sync_memtype(phys_addr, size, pcm))
184 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
187 ret_addr = (void __iomem *) (vaddr + offset);
188 mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
191 * Check if the request spans more than any BAR in the iomem resource
194 if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
195 pr_warn("caller %pS mapping multiple BARs\n", caller);
201 free_memtype(phys_addr, phys_addr + size);
206 * ioremap_nocache - map bus memory into CPU space
207 * @phys_addr: bus address of the memory
208 * @size: size of the resource to map
210 * ioremap_nocache performs a platform specific sequence of operations to
211 * make bus memory CPU accessible via the readb/readw/readl/writeb/
212 * writew/writel functions and the other mmio helpers. The returned
213 * address is not guaranteed to be usable directly as a virtual
216 * This version of ioremap ensures that the memory is marked uncachable
217 * on the CPU as well as honouring existing caching rules from things like
218 * the PCI bus. Note that there are other caches and buffers on many
219 * busses. In particular driver authors should read up on PCI writes
221 * It's useful if some control registers are in such an area and
222 * write combining or read caching is not desirable:
224 * Must be freed with iounmap.
226 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
229 * Ideally, this should be:
230 * pat_enabled() ? _PAGE_CACHE_MODE_UC : _PAGE_CACHE_MODE_UC_MINUS;
232 * Till we fix all X drivers to use ioremap_wc(), we will use
233 * UC MINUS. Drivers that are certain they need or can already
234 * be converted over to strong UC can use ioremap_uc().
236 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC_MINUS;
238 return __ioremap_caller(phys_addr, size, pcm,
239 __builtin_return_address(0));
241 EXPORT_SYMBOL(ioremap_nocache);
244 * ioremap_uc - map bus memory into CPU space as strongly uncachable
245 * @phys_addr: bus address of the memory
246 * @size: size of the resource to map
248 * ioremap_uc performs a platform specific sequence of operations to
249 * make bus memory CPU accessible via the readb/readw/readl/writeb/
250 * writew/writel functions and the other mmio helpers. The returned
251 * address is not guaranteed to be usable directly as a virtual
254 * This version of ioremap ensures that the memory is marked with a strong
255 * preference as completely uncachable on the CPU when possible. For non-PAT
256 * systems this ends up setting page-attribute flags PCD=1, PWT=1. For PAT
257 * systems this will set the PAT entry for the pages as strong UC. This call
258 * will honor existing caching rules from things like the PCI bus. Note that
259 * there are other caches and buffers on many busses. In particular driver
260 * authors should read up on PCI writes.
262 * It's useful if some control registers are in such an area and
263 * write combining or read caching is not desirable:
265 * Must be freed with iounmap.
267 void __iomem *ioremap_uc(resource_size_t phys_addr, unsigned long size)
269 enum page_cache_mode pcm = _PAGE_CACHE_MODE_UC;
271 return __ioremap_caller(phys_addr, size, pcm,
272 __builtin_return_address(0));
274 EXPORT_SYMBOL_GPL(ioremap_uc);
277 * ioremap_wc - map memory into CPU space write combined
278 * @phys_addr: bus address of the memory
279 * @size: size of the resource to map
281 * This version of ioremap ensures that the memory is marked write combining.
282 * Write combining allows faster writes to some hardware devices.
284 * Must be freed with iounmap.
286 void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
288 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WC,
289 __builtin_return_address(0));
291 EXPORT_SYMBOL(ioremap_wc);
294 * ioremap_wt - map memory into CPU space write through
295 * @phys_addr: bus address of the memory
296 * @size: size of the resource to map
298 * This version of ioremap ensures that the memory is marked write through.
299 * Write through stores data into memory while keeping the cache up-to-date.
301 * Must be freed with iounmap.
303 void __iomem *ioremap_wt(resource_size_t phys_addr, unsigned long size)
305 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WT,
306 __builtin_return_address(0));
308 EXPORT_SYMBOL(ioremap_wt);
310 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
312 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_MODE_WB,
313 __builtin_return_address(0));
315 EXPORT_SYMBOL(ioremap_cache);
317 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
318 unsigned long prot_val)
320 return __ioremap_caller(phys_addr, size,
321 pgprot2cachemode(__pgprot(prot_val)),
322 __builtin_return_address(0));
324 EXPORT_SYMBOL(ioremap_prot);
327 * iounmap - Free a IO remapping
328 * @addr: virtual address from ioremap_*
330 * Caller must ensure there is only one unmapping for the same pointer.
332 void iounmap(volatile void __iomem *addr)
334 struct vm_struct *p, *o;
336 if ((void __force *)addr <= high_memory)
340 * The PCI/ISA range special-casing was removed from __ioremap()
341 * so this check, in theory, can be removed. However, there are
342 * cases where iounmap() is called for addresses not obtained via
343 * ioremap() (vga16fb for example). Add a warning so that these
344 * cases can be caught and fixed.
346 if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
347 (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) {
348 WARN(1, "iounmap() called for ISA range not obtained using ioremap()\n");
352 mmiotrace_iounmap(addr);
354 addr = (volatile void __iomem *)
355 (PAGE_MASK & (unsigned long __force)addr);
357 /* Use the vm area unlocked, assuming the caller
358 ensures there isn't another iounmap for the same address
359 in parallel. Reuse of the virtual address is prevented by
360 leaving it in the global lists until we're done with it.
361 cpa takes care of the direct mappings. */
362 p = find_vm_area((void __force *)addr);
365 printk(KERN_ERR "iounmap: bad address %p\n", addr);
370 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
372 /* Finally remove it */
373 o = remove_vm_area((void __force *)addr);
374 BUG_ON(p != o || o == NULL);
377 EXPORT_SYMBOL(iounmap);
379 int __init arch_ioremap_pud_supported(void)
382 return boot_cpu_has(X86_FEATURE_GBPAGES);
388 int __init arch_ioremap_pmd_supported(void)
390 return boot_cpu_has(X86_FEATURE_PSE);
394 * Convert a physical pointer to a virtual kernel pointer for /dev/mem
397 void *xlate_dev_mem_ptr(phys_addr_t phys)
399 unsigned long start = phys & PAGE_MASK;
400 unsigned long offset = phys & ~PAGE_MASK;
403 /* memremap() maps if RAM, otherwise falls back to ioremap() */
404 vaddr = memremap(start, PAGE_SIZE, MEMREMAP_WB);
406 /* Only add the offset on success and return NULL if memremap() failed */
413 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
415 memunmap((void *)((unsigned long)addr & PAGE_MASK));
419 * Examine the physical address to determine if it is an area of memory
420 * that should be mapped decrypted. If the memory is not part of the
421 * kernel usable area it was accessed and created decrypted, so these
422 * areas should be mapped decrypted. And since the encryption key can
423 * change across reboots, persistent memory should also be mapped
426 static bool memremap_should_map_decrypted(resource_size_t phys_addr,
432 * Check if the address is part of a persistent memory region.
433 * This check covers areas added by E820, EFI and ACPI.
435 is_pmem = region_intersects(phys_addr, size, IORESOURCE_MEM,
436 IORES_DESC_PERSISTENT_MEMORY);
437 if (is_pmem != REGION_DISJOINT)
441 * Check if the non-volatile attribute is set for an EFI
444 if (efi_enabled(EFI_BOOT)) {
445 switch (efi_mem_type(phys_addr)) {
446 case EFI_RESERVED_TYPE:
447 if (efi_mem_attributes(phys_addr) & EFI_MEMORY_NV)
455 /* Check if the address is outside kernel usable area */
456 switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
457 case E820_TYPE_RESERVED:
460 case E820_TYPE_UNUSABLE:
471 * Examine the physical address to determine if it is EFI data. Check
472 * it against the boot params structure and EFI tables and memory types.
474 static bool memremap_is_efi_data(resource_size_t phys_addr,
479 /* Check if the address is part of EFI boot/runtime data */
480 if (!efi_enabled(EFI_BOOT))
483 paddr = boot_params.efi_info.efi_memmap_hi;
485 paddr |= boot_params.efi_info.efi_memmap;
486 if (phys_addr == paddr)
489 paddr = boot_params.efi_info.efi_systab_hi;
491 paddr |= boot_params.efi_info.efi_systab;
492 if (phys_addr == paddr)
495 if (efi_is_table_address(phys_addr))
498 switch (efi_mem_type(phys_addr)) {
499 case EFI_BOOT_SERVICES_DATA:
500 case EFI_RUNTIME_SERVICES_DATA:
510 * Examine the physical address to determine if it is boot data by checking
511 * it against the boot params setup_data chain.
513 static bool memremap_is_setup_data(resource_size_t phys_addr,
516 struct setup_data *data;
517 u64 paddr, paddr_next;
519 paddr = boot_params.hdr.setup_data;
523 if (phys_addr == paddr)
526 data = memremap(paddr, sizeof(*data),
527 MEMREMAP_WB | MEMREMAP_DEC);
529 paddr_next = data->next;
534 if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
544 * Examine the physical address to determine if it is boot data by checking
545 * it against the boot params setup_data chain (early boot version).
547 static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
550 struct setup_data *data;
551 u64 paddr, paddr_next;
553 paddr = boot_params.hdr.setup_data;
557 if (phys_addr == paddr)
560 data = early_memremap_decrypted(paddr, sizeof(*data));
562 paddr_next = data->next;
565 early_memunmap(data, sizeof(*data));
567 if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
577 * Architecture function to determine if RAM remap is allowed. By default, a
578 * RAM remap will map the data as encrypted. Determine if a RAM remap should
579 * not be done so that the data will be mapped decrypted.
581 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
587 if (flags & MEMREMAP_ENC)
590 if (flags & MEMREMAP_DEC)
593 if (memremap_is_setup_data(phys_addr, size) ||
594 memremap_is_efi_data(phys_addr, size) ||
595 memremap_should_map_decrypted(phys_addr, size))
602 * Architecture override of __weak function to adjust the protection attributes
603 * used when remapping memory. By default, early_memremap() will map the data
604 * as encrypted. Determine if an encrypted mapping should not be done and set
605 * the appropriate protection attributes.
607 pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
614 if (early_memremap_is_setup_data(phys_addr, size) ||
615 memremap_is_efi_data(phys_addr, size) ||
616 memremap_should_map_decrypted(phys_addr, size))
617 prot = pgprot_decrypted(prot);
619 prot = pgprot_encrypted(prot);
624 bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
626 return arch_memremap_can_ram_remap(phys_addr, size, 0);
629 #ifdef CONFIG_AMD_MEM_ENCRYPT
630 /* Remap memory with encryption */
631 void __init *early_memremap_encrypted(resource_size_t phys_addr,
634 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC);
638 * Remap memory with encryption and write-protected - cannot be called
639 * before pat_init() is called
641 void __init *early_memremap_encrypted_wp(resource_size_t phys_addr,
644 /* Be sure the write-protect PAT entry is set for write-protect */
645 if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
648 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_ENC_WP);
651 /* Remap memory without encryption */
652 void __init *early_memremap_decrypted(resource_size_t phys_addr,
655 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC);
659 * Remap memory without encryption and write-protected - cannot be called
660 * before pat_init() is called
662 void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
665 /* Be sure the write-protect PAT entry is set for write-protect */
666 if (__pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] != _PAGE_CACHE_MODE_WP)
669 return early_memremap_prot(phys_addr, size, __PAGE_KERNEL_NOENC_WP);
671 #endif /* CONFIG_AMD_MEM_ENCRYPT */
673 static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
675 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
677 /* Don't assume we're using swapper_pg_dir at this point */
678 pgd_t *base = __va(read_cr3_pa());
679 pgd_t *pgd = &base[pgd_index(addr)];
680 p4d_t *p4d = p4d_offset(pgd, addr);
681 pud_t *pud = pud_offset(p4d, addr);
682 pmd_t *pmd = pmd_offset(pud, addr);
687 static inline pte_t * __init early_ioremap_pte(unsigned long addr)
689 return &bm_pte[pte_index(addr)];
692 bool __init is_early_ioremap_ptep(pte_t *ptep)
694 return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
697 void __init early_ioremap_init(void)
702 BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
704 WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1));
707 early_ioremap_setup();
709 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
710 memset(bm_pte, 0, sizeof(bm_pte));
711 pmd_populate_kernel(&init_mm, pmd, bm_pte);
714 * The boot-ioremap range spans multiple pmds, for which
715 * we are not prepared:
717 #define __FIXADDR_TOP (-PAGE_SIZE)
718 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
719 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
721 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
723 printk(KERN_WARNING "pmd %p != %p\n",
724 pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
725 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
726 fix_to_virt(FIX_BTMAP_BEGIN));
727 printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
728 fix_to_virt(FIX_BTMAP_END));
730 printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
731 printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
736 void __init __early_set_fixmap(enum fixed_addresses idx,
737 phys_addr_t phys, pgprot_t flags)
739 unsigned long addr = __fix_to_virt(idx);
742 if (idx >= __end_of_fixed_addresses) {
746 pte = early_ioremap_pte(addr);
748 if (pgprot_val(flags))
749 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
751 pte_clear(&init_mm, addr, pte);
752 __flush_tlb_one_kernel(addr);