1 // SPDX-License-Identifier: GPL-2.0-only
2 /**************************************************************************
3 * Copyright (c) 2007, Intel Corporation.
5 **************************************************************************/
7 #include <linux/highmem.h>
14 * Code for the SGX MMU:
18 * clflush on one processor only:
19 * clflush should apparently flush the cache line on all processors in an
25 * The usage of the slots must be completely encapsulated within a spinlock, and
26 * no other functions that may be using the locks for other purposed may be
27 * called from within the locked region.
28 * Since the slots are per processor, this will guarantee that we are the only
33 * TODO: Inserting ptes from an interrupt handler:
34 * This may be desirable for some SGX functionality where the GPU can fault in
35 * needed pages. For that, we need to make an atomic insert_pages function, that
37 * If it fails, the caller need to insert the page using a workqueue function,
38 * but on average it should be fast.
41 static inline uint32_t psb_mmu_pt_index(uint32_t offset)
43 return (offset >> PSB_PTE_SHIFT) & 0x3FF;
46 static inline uint32_t psb_mmu_pd_index(uint32_t offset)
48 return offset >> PSB_PDE_SHIFT;
51 static inline void psb_clflush(void *addr)
53 __asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory");
56 static inline void psb_mmu_clflush(struct psb_mmu_driver *driver, void *addr)
58 if (!driver->has_clflush)
66 static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver, int force)
68 struct drm_device *dev = driver->dev;
69 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
71 if (atomic_read(&driver->needs_tlbflush) || force) {
72 uint32_t val = PSB_RSGX32(PSB_CR_BIF_CTRL);
73 PSB_WSGX32(val | _PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
75 /* Make sure data cache is turned off before enabling it */
77 PSB_WSGX32(val & ~_PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
78 (void)PSB_RSGX32(PSB_CR_BIF_CTRL);
79 if (driver->msvdx_mmu_invaldc)
80 atomic_set(driver->msvdx_mmu_invaldc, 1);
82 atomic_set(&driver->needs_tlbflush, 0);
86 static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force)
88 down_write(&driver->sem);
89 psb_mmu_flush_pd_locked(driver, force);
90 up_write(&driver->sem);
94 void psb_mmu_flush(struct psb_mmu_driver *driver)
96 struct drm_device *dev = driver->dev;
97 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
100 down_write(&driver->sem);
101 val = PSB_RSGX32(PSB_CR_BIF_CTRL);
102 if (atomic_read(&driver->needs_tlbflush))
103 PSB_WSGX32(val | _PSB_CB_CTRL_INVALDC, PSB_CR_BIF_CTRL);
105 PSB_WSGX32(val | _PSB_CB_CTRL_FLUSH, PSB_CR_BIF_CTRL);
107 /* Make sure data cache is turned off and MMU is flushed before
108 restoring bank interface control register */
110 PSB_WSGX32(val & ~(_PSB_CB_CTRL_FLUSH | _PSB_CB_CTRL_INVALDC),
112 (void)PSB_RSGX32(PSB_CR_BIF_CTRL);
114 atomic_set(&driver->needs_tlbflush, 0);
115 if (driver->msvdx_mmu_invaldc)
116 atomic_set(driver->msvdx_mmu_invaldc, 1);
117 up_write(&driver->sem);
120 void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context)
122 struct drm_device *dev = pd->driver->dev;
123 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
124 uint32_t offset = (hw_context == 0) ? PSB_CR_BIF_DIR_LIST_BASE0 :
125 PSB_CR_BIF_DIR_LIST_BASE1 + hw_context * 4;
127 down_write(&pd->driver->sem);
128 PSB_WSGX32(page_to_pfn(pd->p) << PAGE_SHIFT, offset);
130 psb_mmu_flush_pd_locked(pd->driver, 1);
131 pd->hw_context = hw_context;
132 up_write(&pd->driver->sem);
136 static inline unsigned long psb_pd_addr_end(unsigned long addr,
139 addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK;
140 return (addr < end) ? addr : end;
143 static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type)
145 uint32_t mask = PSB_PTE_VALID;
147 if (type & PSB_MMU_CACHED_MEMORY)
148 mask |= PSB_PTE_CACHED;
149 if (type & PSB_MMU_RO_MEMORY)
151 if (type & PSB_MMU_WO_MEMORY)
154 return (pfn << PAGE_SHIFT) | mask;
157 struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver,
158 int trap_pagefaults, int invalid_type)
160 struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL);
167 pd->p = alloc_page(GFP_DMA32);
170 pd->dummy_pt = alloc_page(GFP_DMA32);
173 pd->dummy_page = alloc_page(GFP_DMA32);
177 if (!trap_pagefaults) {
178 pd->invalid_pde = psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt),
180 pd->invalid_pte = psb_mmu_mask_pte(page_to_pfn(pd->dummy_page),
187 v = kmap_local_page(pd->dummy_pt);
188 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
189 v[i] = pd->invalid_pte;
193 v = kmap_local_page(pd->p);
194 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
195 v[i] = pd->invalid_pde;
199 clear_page(kmap(pd->dummy_page));
200 kunmap(pd->dummy_page);
202 pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
207 pd->pd_mask = PSB_PTE_VALID;
213 __free_page(pd->dummy_page);
215 __free_page(pd->dummy_pt);
223 static void psb_mmu_free_pt(struct psb_mmu_pt *pt)
229 void psb_mmu_free_pagedir(struct psb_mmu_pd *pd)
231 struct psb_mmu_driver *driver = pd->driver;
232 struct drm_device *dev = driver->dev;
233 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
234 struct psb_mmu_pt *pt;
237 down_write(&driver->sem);
238 if (pd->hw_context != -1) {
239 PSB_WSGX32(0, PSB_CR_BIF_DIR_LIST_BASE0 + pd->hw_context * 4);
240 psb_mmu_flush_pd_locked(driver, 1);
243 /* Should take the spinlock here, but we don't need to do that
244 since we have the semaphore in write mode. */
246 for (i = 0; i < 1024; ++i) {
253 __free_page(pd->dummy_page);
254 __free_page(pd->dummy_pt);
257 up_write(&driver->sem);
260 static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd)
262 struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL);
264 uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT;
265 uint32_t clflush_count = PAGE_SIZE / clflush_add;
266 spinlock_t *lock = &pd->driver->lock;
274 pt->p = alloc_page(GFP_DMA32);
282 v = kmap_atomic(pt->p);
284 ptes = (uint32_t *) v;
285 for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
286 *ptes++ = pd->invalid_pte;
288 if (pd->driver->has_clflush && pd->hw_context != -1) {
290 for (i = 0; i < clflush_count; ++i) {
306 static struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd,
309 uint32_t index = psb_mmu_pd_index(addr);
310 struct psb_mmu_pt *pt;
312 spinlock_t *lock = &pd->driver->lock;
315 pt = pd->tables[index];
318 pt = psb_mmu_alloc_pt(pd);
323 if (pd->tables[index]) {
327 pt = pd->tables[index];
331 v = kmap_atomic(pd->p);
332 pd->tables[index] = pt;
333 v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask;
335 kunmap_atomic((void *) v);
337 if (pd->hw_context != -1) {
338 psb_mmu_clflush(pd->driver, (void *)&v[index]);
339 atomic_set(&pd->driver->needs_tlbflush, 1);
342 pt->v = kmap_atomic(pt->p);
346 static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd,
349 uint32_t index = psb_mmu_pd_index(addr);
350 struct psb_mmu_pt *pt;
351 spinlock_t *lock = &pd->driver->lock;
354 pt = pd->tables[index];
359 pt->v = kmap_atomic(pt->p);
363 static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt)
365 struct psb_mmu_pd *pd = pt->pd;
368 kunmap_atomic(pt->v);
369 if (pt->count == 0) {
370 v = kmap_atomic(pd->p);
371 v[pt->index] = pd->invalid_pde;
372 pd->tables[pt->index] = NULL;
374 if (pd->hw_context != -1) {
375 psb_mmu_clflush(pd->driver, (void *)&v[pt->index]);
376 atomic_set(&pd->driver->needs_tlbflush, 1);
379 spin_unlock(&pd->driver->lock);
383 spin_unlock(&pd->driver->lock);
386 static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt, unsigned long addr,
389 pt->v[psb_mmu_pt_index(addr)] = pte;
392 static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt,
395 pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte;
398 struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver)
400 struct psb_mmu_pd *pd;
402 down_read(&driver->sem);
403 pd = driver->default_pd;
404 up_read(&driver->sem);
409 void psb_mmu_driver_takedown(struct psb_mmu_driver *driver)
411 struct drm_device *dev = driver->dev;
412 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
414 PSB_WSGX32(driver->bif_ctrl, PSB_CR_BIF_CTRL);
415 psb_mmu_free_pagedir(driver->default_pd);
419 struct psb_mmu_driver *psb_mmu_driver_init(struct drm_device *dev,
422 atomic_t *msvdx_mmu_invaldc)
424 struct psb_mmu_driver *driver;
425 struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
427 driver = kmalloc(sizeof(*driver), GFP_KERNEL);
433 driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults,
435 if (!driver->default_pd)
438 spin_lock_init(&driver->lock);
439 init_rwsem(&driver->sem);
440 down_write(&driver->sem);
441 atomic_set(&driver->needs_tlbflush, 1);
442 driver->msvdx_mmu_invaldc = msvdx_mmu_invaldc;
444 driver->bif_ctrl = PSB_RSGX32(PSB_CR_BIF_CTRL);
445 PSB_WSGX32(driver->bif_ctrl | _PSB_CB_CTRL_CLEAR_FAULT,
447 PSB_WSGX32(driver->bif_ctrl & ~_PSB_CB_CTRL_CLEAR_FAULT,
450 driver->has_clflush = 0;
452 if (boot_cpu_has(X86_FEATURE_CLFLUSH)) {
453 uint32_t tfms, misc, cap0, cap4, clflush_size;
456 * clflush size is determined at kernel setup for x86_64 but not
457 * for i386. We have to do it here.
460 cpuid(0x00000001, &tfms, &misc, &cap0, &cap4);
461 clflush_size = ((misc >> 8) & 0xff) * 8;
462 driver->has_clflush = 1;
463 driver->clflush_add =
464 PAGE_SIZE * clflush_size / sizeof(uint32_t);
465 driver->clflush_mask = driver->clflush_add - 1;
466 driver->clflush_mask = ~driver->clflush_mask;
469 up_write(&driver->sem);
477 static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd, unsigned long address,
478 uint32_t num_pages, uint32_t desired_tile_stride,
479 uint32_t hw_tile_stride)
481 struct psb_mmu_pt *pt;
488 unsigned long row_add;
489 unsigned long clflush_add = pd->driver->clflush_add;
490 unsigned long clflush_mask = pd->driver->clflush_mask;
492 if (!pd->driver->has_clflush)
496 rows = num_pages / desired_tile_stride;
498 desired_tile_stride = num_pages;
500 add = desired_tile_stride << PAGE_SHIFT;
501 row_add = hw_tile_stride << PAGE_SHIFT;
503 for (i = 0; i < rows; ++i) {
509 next = psb_pd_addr_end(addr, end);
510 pt = psb_mmu_pt_map_lock(pd, addr);
514 psb_clflush(&pt->v[psb_mmu_pt_index(addr)]);
515 } while (addr += clflush_add,
516 (addr & clflush_mask) < next);
518 psb_mmu_pt_unmap_unlock(pt);
519 } while (addr = next, next != end);
525 void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd,
526 unsigned long address, uint32_t num_pages)
528 struct psb_mmu_pt *pt;
532 unsigned long f_address = address;
534 down_read(&pd->driver->sem);
537 end = addr + (num_pages << PAGE_SHIFT);
540 next = psb_pd_addr_end(addr, end);
541 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
545 psb_mmu_invalidate_pte(pt, addr);
547 } while (addr += PAGE_SIZE, addr < next);
548 psb_mmu_pt_unmap_unlock(pt);
550 } while (addr = next, next != end);
553 if (pd->hw_context != -1)
554 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
556 up_read(&pd->driver->sem);
558 if (pd->hw_context != -1)
559 psb_mmu_flush(pd->driver);
564 void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address,
565 uint32_t num_pages, uint32_t desired_tile_stride,
566 uint32_t hw_tile_stride)
568 struct psb_mmu_pt *pt;
575 unsigned long row_add;
576 unsigned long f_address = address;
579 rows = num_pages / desired_tile_stride;
581 desired_tile_stride = num_pages;
583 add = desired_tile_stride << PAGE_SHIFT;
584 row_add = hw_tile_stride << PAGE_SHIFT;
586 down_read(&pd->driver->sem);
588 /* Make sure we only need to flush this processor's cache */
590 for (i = 0; i < rows; ++i) {
596 next = psb_pd_addr_end(addr, end);
597 pt = psb_mmu_pt_map_lock(pd, addr);
601 psb_mmu_invalidate_pte(pt, addr);
604 } while (addr += PAGE_SIZE, addr < next);
605 psb_mmu_pt_unmap_unlock(pt);
607 } while (addr = next, next != end);
610 if (pd->hw_context != -1)
611 psb_mmu_flush_ptes(pd, f_address, num_pages,
612 desired_tile_stride, hw_tile_stride);
614 up_read(&pd->driver->sem);
616 if (pd->hw_context != -1)
617 psb_mmu_flush(pd->driver);
620 int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn,
621 unsigned long address, uint32_t num_pages,
624 struct psb_mmu_pt *pt;
629 unsigned long f_address = address;
632 down_read(&pd->driver->sem);
635 end = addr + (num_pages << PAGE_SHIFT);
638 next = psb_pd_addr_end(addr, end);
639 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
645 pte = psb_mmu_mask_pte(start_pfn++, type);
646 psb_mmu_set_pte(pt, addr, pte);
648 } while (addr += PAGE_SIZE, addr < next);
649 psb_mmu_pt_unmap_unlock(pt);
651 } while (addr = next, next != end);
655 if (pd->hw_context != -1)
656 psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1);
658 up_read(&pd->driver->sem);
660 if (pd->hw_context != -1)
661 psb_mmu_flush(pd->driver);
666 int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages,
667 unsigned long address, uint32_t num_pages,
668 uint32_t desired_tile_stride, uint32_t hw_tile_stride,
671 struct psb_mmu_pt *pt;
679 unsigned long row_add;
680 unsigned long f_address = address;
683 if (hw_tile_stride) {
684 if (num_pages % desired_tile_stride != 0)
686 rows = num_pages / desired_tile_stride;
688 desired_tile_stride = num_pages;
691 add = desired_tile_stride << PAGE_SHIFT;
692 row_add = hw_tile_stride << PAGE_SHIFT;
694 down_read(&pd->driver->sem);
696 for (i = 0; i < rows; ++i) {
702 next = psb_pd_addr_end(addr, end);
703 pt = psb_mmu_pt_alloc_map_lock(pd, addr);
707 pte = psb_mmu_mask_pte(page_to_pfn(*pages++),
709 psb_mmu_set_pte(pt, addr, pte);
711 } while (addr += PAGE_SIZE, addr < next);
712 psb_mmu_pt_unmap_unlock(pt);
714 } while (addr = next, next != end);
721 if (pd->hw_context != -1)
722 psb_mmu_flush_ptes(pd, f_address, num_pages,
723 desired_tile_stride, hw_tile_stride);
725 up_read(&pd->driver->sem);
727 if (pd->hw_context != -1)
728 psb_mmu_flush(pd->driver);
733 int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual,
737 struct psb_mmu_pt *pt;
739 spinlock_t *lock = &pd->driver->lock;
741 down_read(&pd->driver->sem);
742 pt = psb_mmu_pt_map_lock(pd, virtual);
747 v = kmap_atomic(pd->p);
748 tmp = v[psb_mmu_pd_index(virtual)];
752 if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) ||
753 !(pd->invalid_pte & PSB_PTE_VALID)) {
758 *pfn = pd->invalid_pte >> PAGE_SHIFT;
761 tmp = pt->v[psb_mmu_pt_index(virtual)];
762 if (!(tmp & PSB_PTE_VALID)) {
766 *pfn = tmp >> PAGE_SHIFT;
768 psb_mmu_pt_unmap_unlock(pt);
770 up_read(&pd->driver->sem);