1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2017 IBM Corp.
6 #include <linux/hugetlb.h>
7 #include <linux/sched/mm.h>
8 #include <asm/pnv-pci.h>
9 #include <misc/cxllib.h>
13 #define CXL_INVALID_DRA ~0ull
14 #define CXL_DUMMY_READ_SIZE 128
15 #define CXL_DUMMY_READ_ALIGN 8
16 #define CXL_CAPI_WINDOW_START 0x2000000000000ull
17 #define CXL_CAPI_WINDOW_LOG_SIZE 48
18 #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1
21 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
25 u64 chip_id, capp_unit_id;
27 /* No flags currently supported */
31 if (!cpu_has_feature(CPU_FTR_HVMODE))
37 if (cxl_slot_is_switched(dev))
40 /* on p9, some pci slots are not connected to a CAPP unit */
41 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
47 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
49 static DEFINE_MUTEX(dra_mutex);
50 static u64 dummy_read_addr = CXL_INVALID_DRA;
52 static int allocate_dummy_read_buf(void)
58 * Dummy read buffer is 128-byte long, aligned on a
59 * 256-byte boundary and we need the physical address.
61 buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
62 buf = (u64) kzalloc(buf_size, GFP_KERNEL);
66 vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
67 (~0ull << CXL_DUMMY_READ_ALIGN);
69 WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
70 "Dummy read buffer alignment issue");
71 dummy_read_addr = virt_to_phys((void *) vaddr);
75 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
79 u64 chip_id, capp_unit_id;
81 if (!cpu_has_feature(CPU_FTR_HVMODE))
84 mutex_lock(&dra_mutex);
85 if (dummy_read_addr == CXL_INVALID_DRA) {
86 rc = allocate_dummy_read_buf();
88 mutex_unlock(&dra_mutex);
92 mutex_unlock(&dra_mutex);
94 rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
98 rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
102 cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION;
103 cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
104 cfg->bar_addr = CXL_CAPI_WINDOW_START;
105 cfg->dra = dummy_read_addr;
108 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
110 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
115 if (!cpu_has_feature(CPU_FTR_HVMODE))
121 * We currently don't support going back to PCI mode
122 * However, we'll turn the invalidations off, so that
123 * the firmware doesn't have to ack them and can do
124 * things like reset, etc.. with no worries.
125 * So always return EPERM (can't go back to PCI) or
126 * EBUSY if we couldn't even turn off snooping
128 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
135 /* DMA only supported on TVT1 for the time being */
136 if (flags != CXL_MODE_DMA_TVT1)
138 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
141 rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
148 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
151 * When switching the PHB to capi mode, the TVT#1 entry for
152 * the Partitionable Endpoint is set in bypass mode, like
154 * Configure the device dma to use TVT#1, which is done
155 * by calling dma_set_mask() with a mask large enough.
157 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
164 rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
167 EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
169 int cxllib_get_PE_attributes(struct task_struct *task,
170 unsigned long translation_mode,
171 struct cxllib_pe_attributes *attr)
173 struct mm_struct *mm = NULL;
175 if (translation_mode != CXL_TRANSLATED_MODE &&
176 translation_mode != CXL_REAL_MODE)
179 attr->sr = cxl_calculate_sr(false,
181 translation_mode == CXL_REAL_MODE,
183 attr->lpid = mfspr(SPRN_LPID);
185 mm = get_task_mm(task);
189 * Caller is keeping a reference on mm_users for as long
190 * as XSL uses the memory context
192 attr->pid = mm->context.id;
194 attr->tid = task->thread.tidr;
201 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
203 static int get_vma_info(struct mm_struct *mm, u64 addr,
204 u64 *vma_start, u64 *vma_end,
205 unsigned long *page_size)
207 struct vm_area_struct *vma = NULL;
212 vma = find_vma(mm, addr);
217 *page_size = vma_kernel_pagesize(vma);
218 *vma_start = vma->vm_start;
219 *vma_end = vma->vm_end;
221 mmap_read_unlock(mm);
225 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
228 u64 dar, vma_start, vma_end;
229 unsigned long page_size;
235 * The buffer we have to process can extend over several pages
236 * and may also cover several VMAs.
237 * We iterate over all the pages. The page size could vary
240 rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
244 for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
246 if (dar < vma_start || dar >= vma_end) {
248 * We don't hold mm->mmap_lock while iterating, since
249 * the lock is required by one of the lower-level page
250 * fault processing functions and it could
253 * It means the VMAs can be altered between 2
254 * loop iterations and we could theoretically
255 * miss a page (however unlikely). But that's
256 * not really a problem, as the driver will
257 * retry access, get another page fault on the
258 * missing page and call us again.
260 rc = get_vma_info(mm, dar, &vma_start, &vma_end,
266 rc = cxl_handle_mm_fault(mm, flags, dar);
272 EXPORT_SYMBOL_GPL(cxllib_handle_fault);