1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2016-17 IBM Corp.
6 #define pr_fmt(fmt) "vas: " fmt
8 #include <linux/types.h>
9 #include <linux/mutex.h>
10 #include <linux/slab.h>
12 #include <linux/log2.h>
13 #include <linux/rcupdate.h>
14 #include <linux/cred.h>
15 #include <linux/sched/mm.h>
16 #include <linux/mmu_context.h>
17 #include <asm/switch_to.h>
18 #include <asm/ppc-opcode.h>
21 #include "copy-paste.h"
23 #define CREATE_TRACE_POINTS
24 #include "vas-trace.h"
27 * Compute the paste address region for the window @window using the
28 * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
30 void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len)
35 base = window->vinst->paste_base_addr;
36 shift = window->vinst->paste_win_id_shift;
37 winid = window->vas_win.winid;
39 *addr = base + (winid << shift);
43 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
46 static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window,
51 pbaddr = window->vinst->hvwc_bar_start;
52 *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE;
56 static inline void get_uwc_mmio_bar(struct pnv_vas_window *window,
61 pbaddr = window->vinst->uwc_bar_start;
62 *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE;
67 * Map the paste bus address of the given send window into kernel address
68 * space. Unlike MMIO regions (map_mmio_region() below), paste region must
69 * be mapped cache-able and is only applicable to send windows.
71 static void *map_paste_region(struct pnv_vas_window *txwin)
78 name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
79 txwin->vas_win.winid);
83 txwin->paste_addr_name = name;
84 vas_win_paste_addr(txwin, &start, &len);
86 if (!request_mem_region(start, len, name)) {
87 pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
88 __func__, start, len);
92 map = ioremap_cache(start, len);
94 pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
99 pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
104 return ERR_PTR(-ENOMEM);
107 static void *map_mmio_region(char *name, u64 start, int len)
111 if (!request_mem_region(start, len, name)) {
112 pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
113 __func__, start, len);
117 map = ioremap(start, len);
119 pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
127 static void unmap_region(void *addr, u64 start, int len)
130 release_mem_region((phys_addr_t)start, len);
134 * Unmap the paste address region for a window.
136 static void unmap_paste_region(struct pnv_vas_window *window)
141 if (window->paste_kaddr) {
142 vas_win_paste_addr(window, &busaddr_start, &len);
143 unmap_region(window->paste_kaddr, busaddr_start, len);
144 window->paste_kaddr = NULL;
145 kfree(window->paste_addr_name);
146 window->paste_addr_name = NULL;
151 * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
152 * unmap when the window's debugfs dir is in use. This serializes close
153 * of a window even on another VAS instance but since its not a critical
154 * path, just minimize the time we hold the mutex for now. We can add
155 * a per-instance mutex later if necessary.
157 static void unmap_winctx_mmio_bars(struct pnv_vas_window *window)
164 mutex_lock(&vas_mutex);
166 hvwc_map = window->hvwc_map;
167 window->hvwc_map = NULL;
169 uwc_map = window->uwc_map;
170 window->uwc_map = NULL;
172 mutex_unlock(&vas_mutex);
175 get_hvwc_mmio_bar(window, &busaddr_start, &len);
176 unmap_region(hvwc_map, busaddr_start, len);
180 get_uwc_mmio_bar(window, &busaddr_start, &len);
181 unmap_region(uwc_map, busaddr_start, len);
186 * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
187 * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
188 * Map these bus addresses and save the mapped kernel addresses in @window.
190 static int map_winctx_mmio_bars(struct pnv_vas_window *window)
195 get_hvwc_mmio_bar(window, &start, &len);
196 window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
198 get_uwc_mmio_bar(window, &start, &len);
199 window->uwc_map = map_mmio_region("UWCM_Window", start, len);
201 if (!window->hvwc_map || !window->uwc_map) {
202 unmap_winctx_mmio_bars(window);
210 * Reset all valid registers in the HV and OS/User Window Contexts for
211 * the window identified by @window.
213 * NOTE: We cannot really use a for loop to reset window context. Not all
214 * offsets in a window context are valid registers and the valid
215 * registers are not sequential. And, we can only write to offsets
216 * with valid registers.
218 static void reset_window_regs(struct pnv_vas_window *window)
220 write_hvwc_reg(window, VREG(LPID), 0ULL);
221 write_hvwc_reg(window, VREG(PID), 0ULL);
222 write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
223 write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
224 write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
225 write_hvwc_reg(window, VREG(AMR), 0ULL);
226 write_hvwc_reg(window, VREG(SEIDR), 0ULL);
227 write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
228 write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
229 write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
230 write_hvwc_reg(window, VREG(PSWID), 0ULL);
231 write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
232 write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
233 write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
234 write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
235 write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
236 write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
237 write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
238 write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
239 write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
240 write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
241 write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
242 write_hvwc_reg(window, VREG(WINCTL), 0ULL);
243 write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
244 write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
245 write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
246 write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
247 write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
248 write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
249 write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
250 write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
251 write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
252 write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
254 /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
257 * The send and receive window credit adder registers are also
258 * accessible from HVWC and have been initialized above. We don't
259 * need to initialize from the OS/User Window Context, so skip
262 * write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
263 * write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
268 * Initialize window context registers related to Address Translation.
269 * These registers are common to send/receive windows although they
270 * differ for user/kernel windows. As we resolve the TODOs we may
271 * want to add fields to vas_winctx and move the initialization to
272 * init_vas_winctx_regs().
274 static void init_xlate_regs(struct pnv_vas_window *window, bool user_win)
279 * MSR_TA, MSR_US are false for both kernel and user.
280 * MSR_DR and MSR_PR are false for kernel.
283 val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
284 val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
286 val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
287 val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
289 write_hvwc_reg(window, VREG(XLATE_MSR), val);
291 lpcr = mfspr(SPRN_LPCR);
294 * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
295 * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
297 * NOTE: From Section 1.3.1, Address Translation Context of the
298 * Nest MMU Workbook, LPCR_SC should be 0 for Power9.
300 val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
301 val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
302 val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
303 val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
304 write_hvwc_reg(window, VREG(XLATE_LPCR), val);
307 * Section 1.3.1 (Address translation Context) of NMMU workbook.
308 * 0b00 Hashed Page Table mode
311 * 0b11 Radix on Radix
314 val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
315 write_hvwc_reg(window, VREG(XLATE_CTL), val);
318 * TODO: Can we mfspr(AMR) even for user windows?
321 val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
322 write_hvwc_reg(window, VREG(AMR), val);
325 val = SET_FIELD(VAS_SEIDR, val, 0);
326 write_hvwc_reg(window, VREG(SEIDR), val);
330 * Initialize Reserved Send Buffer Count for the send window. It involves
331 * writing to the register, reading it back to confirm that the hardware
332 * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
334 * Since we can only make a best-effort attempt to fulfill the request,
335 * we don't return any errors if we cannot.
337 * TODO: Reserved (aka dedicated) send buffers are not supported yet.
339 static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin,
340 struct vas_winctx *winctx)
342 write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
347 * Initialize window context registers for a receive window.
348 * Except for caching control and marking window open, the registers
349 * are initialized in the order listed in Section 3.1.4 (Window Context
350 * Cache Register Details) of the VAS workbook although they don't need
353 * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
354 * (so that it can get a large contiguous area) and passes that buffer
355 * to kernel via device tree. We now write that buffer address to the
356 * FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
357 * write the per-chip RX FIFO addresses to the windows during boot-up
358 * as a one-time task? That could work for NX but what about other
359 * receivers? Let the receivers tell us the rx-fifo buffers for now.
361 static void init_winctx_regs(struct pnv_vas_window *window,
362 struct vas_winctx *winctx)
367 reset_window_regs(window);
370 val = SET_FIELD(VAS_LPID, val, winctx->lpid);
371 write_hvwc_reg(window, VREG(LPID), val);
374 val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
375 write_hvwc_reg(window, VREG(PID), val);
377 init_xlate_regs(window, winctx->user_win);
380 val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id);
381 write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
383 /* In PowerNV, interrupts go to HV. */
384 write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
387 val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
388 write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);
391 val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
392 write_hvwc_reg(window, VREG(PSWID), val);
394 write_hvwc_reg(window, VREG(SPARE1), 0ULL);
395 write_hvwc_reg(window, VREG(SPARE2), 0ULL);
396 write_hvwc_reg(window, VREG(SPARE3), 0ULL);
399 * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
400 * register as is - do NOT shift the address into VAS_LFIFO_BAR
401 * bit fields! Ok to set the page migration select fields -
402 * VAS ignores the lower 10+ bits in the address anyway, because
403 * the minimum FIFO size is 1K?
405 * See also: Design note in function header.
407 val = winctx->rx_fifo;
408 val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
409 write_hvwc_reg(window, VREG(LFIFO_BAR), val);
412 val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
413 write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);
416 val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
417 val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
418 write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);
420 write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
421 write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
422 write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
425 val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
426 write_hvwc_reg(window, VREG(LRX_WCRED), val);
429 val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
430 write_hvwc_reg(window, VREG(TX_WCRED), val);
432 write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
433 write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
435 fifo_size = winctx->rx_fifo_size / 1024;
438 val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
439 write_hvwc_reg(window, VREG(LFIFO_SIZE), val);
441 /* Update window control and caching control registers last so
442 * we mark the window open only after fully initializing it and
443 * pushing context to cache.
446 write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
448 init_rsvd_tx_buf_count(window, winctx);
450 /* for a send window, point to the matching receive window */
452 val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
453 write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);
455 write_hvwc_reg(window, VREG(SPARE4), 0ULL);
458 val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
459 val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
460 val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
461 val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
462 write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);
465 val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
466 write_hvwc_reg(window, VREG(LNOTIFY_PID), val);
469 val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
470 write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);
473 val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
474 write_hvwc_reg(window, VREG(LNOTIFY_TID), val);
477 val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
478 val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
479 write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);
481 /* Skip read-only registers NX_UTIL and NX_UTIL_SE */
483 write_hvwc_reg(window, VREG(SPARE5), 0ULL);
484 write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
485 write_hvwc_reg(window, VREG(SPARE6), 0ULL);
487 /* Finally, push window context to memory and... */
489 val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
490 write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
492 /* ... mark the window open for business */
494 val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
495 val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
496 val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
497 val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
498 val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
499 val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
500 val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
501 val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
502 val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
503 write_hvwc_reg(window, VREG(WINCTL), val);
506 static void vas_release_window_id(struct ida *ida, int winid)
508 ida_free(ida, winid);
511 static int vas_assign_window_id(struct ida *ida)
513 int winid = ida_alloc_max(ida, VAS_WINDOWS_PER_CHIP - 1, GFP_KERNEL);
515 if (winid == -ENOSPC) {
516 pr_err("Too many (%d) open windows\n", VAS_WINDOWS_PER_CHIP);
523 static void vas_window_free(struct pnv_vas_window *window)
525 struct vas_instance *vinst = window->vinst;
526 int winid = window->vas_win.winid;
528 unmap_winctx_mmio_bars(window);
530 vas_window_free_dbgdir(window);
534 vas_release_window_id(&vinst->ida, winid);
537 static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst)
540 struct pnv_vas_window *window;
542 winid = vas_assign_window_id(&vinst->ida);
544 return ERR_PTR(winid);
546 window = kzalloc(sizeof(*window), GFP_KERNEL);
550 window->vinst = vinst;
551 window->vas_win.winid = winid;
553 if (map_winctx_mmio_bars(window))
556 vas_window_init_dbgdir(window);
562 vas_release_window_id(&vinst->ida, winid);
563 return ERR_PTR(-ENOMEM);
566 static void put_rx_win(struct pnv_vas_window *rxwin)
568 /* Better not be a send window! */
569 WARN_ON_ONCE(rxwin->tx_win);
571 atomic_dec(&rxwin->num_txwins);
575 * Find the user space receive window given the @pswid.
576 * - We must have a valid vasid and it must belong to this instance.
577 * (so both send and receive windows are on the same VAS instance)
578 * - The window must refer to an OPEN, FTW, RECEIVE window.
580 * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
582 static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst,
586 struct pnv_vas_window *rxwin;
588 decode_pswid(pswid, &vasid, &winid);
590 if (vinst->vas_id != vasid)
591 return ERR_PTR(-EINVAL);
593 rxwin = vinst->windows[winid];
595 if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW)
596 return ERR_PTR(-EINVAL);
602 * Get the VAS receive window associated with NX engine identified
603 * by @cop and if applicable, @pswid.
605 * See also function header of set_vinst_win().
607 static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst,
608 enum vas_cop_type cop, u32 pswid)
610 struct pnv_vas_window *rxwin;
612 mutex_lock(&vinst->mutex);
614 if (cop == VAS_COP_TYPE_FTW)
615 rxwin = get_user_rxwin(vinst, pswid);
617 rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
620 atomic_inc(&rxwin->num_txwins);
622 mutex_unlock(&vinst->mutex);
628 * We have two tables of windows in a VAS instance. The first one,
629 * ->windows[], contains all the windows in the instance and allows
630 * looking up a window by its id. It is used to look up send windows
631 * during fault handling and receive windows when pairing user space
632 * send/receive windows.
634 * The second table, ->rxwin[], contains receive windows that are
635 * associated with NX engines. This table has VAS_COP_TYPE_MAX
636 * entries and is used to look up a receive window by its
639 * Here, we save @window in the ->windows[] table. If it is a receive
640 * window, we also save the window in the ->rxwin[] table.
642 static void set_vinst_win(struct vas_instance *vinst,
643 struct pnv_vas_window *window)
645 int id = window->vas_win.winid;
647 mutex_lock(&vinst->mutex);
650 * There should only be one receive window for a coprocessor type
651 * unless its a user (FTW) window.
653 if (!window->user_win && !window->tx_win) {
654 WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]);
655 vinst->rxwin[window->vas_win.cop] = window;
658 WARN_ON_ONCE(vinst->windows[id] != NULL);
659 vinst->windows[id] = window;
661 mutex_unlock(&vinst->mutex);
665 * Clear this window from the table(s) of windows for this VAS instance.
666 * See also function header of set_vinst_win().
668 static void clear_vinst_win(struct pnv_vas_window *window)
670 int id = window->vas_win.winid;
671 struct vas_instance *vinst = window->vinst;
673 mutex_lock(&vinst->mutex);
675 if (!window->user_win && !window->tx_win) {
676 WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]);
677 vinst->rxwin[window->vas_win.cop] = NULL;
680 WARN_ON_ONCE(vinst->windows[id] != window);
681 vinst->windows[id] = NULL;
683 mutex_unlock(&vinst->mutex);
686 static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
687 struct vas_rx_win_attr *rxattr,
688 struct vas_winctx *winctx)
691 * We first zero (memset()) all fields and only set non-zero fields.
692 * Following fields are 0/false but maybe deserve a comment:
694 * ->notify_os_intr_reg In powerNV, send intrs to HV
695 * ->notify_disable False for NX windows
696 * ->intr_disable False for Fault Windows
697 * ->xtra_write False for NX windows
698 * ->notify_early NA for NX windows
699 * ->rsvd_txbuf_count NA for Rx windows
700 * ->lpid, ->pid, ->tid NA for Rx windows
703 memset(winctx, 0, sizeof(struct vas_winctx));
705 winctx->rx_fifo = rxattr->rx_fifo;
706 winctx->rx_fifo_size = rxattr->rx_fifo_size;
707 winctx->wcreds_max = rxwin->vas_win.wcreds_max;
708 winctx->pin_win = rxattr->pin_win;
710 winctx->nx_win = rxattr->nx_win;
711 winctx->fault_win = rxattr->fault_win;
712 winctx->user_win = rxattr->user_win;
713 winctx->rej_no_credit = rxattr->rej_no_credit;
714 winctx->rx_word_mode = rxattr->rx_win_ord_mode;
715 winctx->tx_word_mode = rxattr->tx_win_ord_mode;
716 winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
717 winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
718 winctx->notify_early = rxattr->notify_early;
720 if (winctx->nx_win) {
721 winctx->data_stamp = true;
722 winctx->intr_disable = true;
723 winctx->pin_win = true;
725 WARN_ON_ONCE(winctx->fault_win);
726 WARN_ON_ONCE(!winctx->rx_word_mode);
727 WARN_ON_ONCE(!winctx->tx_word_mode);
728 WARN_ON_ONCE(winctx->notify_after_count);
729 } else if (winctx->fault_win) {
730 winctx->notify_disable = true;
731 } else if (winctx->user_win) {
733 * Section 1.8.1 Low Latency Core-Core Wake up of
736 * - disable credit checks ([tr]x_wcred_mode = false)
737 * - disable FIFO writes
738 * - enable ASB_Notify, disable interrupt
740 winctx->fifo_disable = true;
741 winctx->intr_disable = true;
745 winctx->lnotify_lpid = rxattr->lnotify_lpid;
746 winctx->lnotify_pid = rxattr->lnotify_pid;
747 winctx->lnotify_tid = rxattr->lnotify_tid;
748 winctx->pswid = rxattr->pswid;
749 winctx->dma_type = VAS_DMA_TYPE_INJECT;
750 winctx->tc_mode = rxattr->tc_mode;
752 winctx->min_scope = VAS_SCOPE_LOCAL;
753 winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
754 if (rxwin->vinst->virq)
755 winctx->irq_port = rxwin->vinst->irq_port;
758 static bool rx_win_args_valid(enum vas_cop_type cop,
759 struct vas_rx_win_attr *attr)
761 pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
762 attr->fault_win, attr->notify_disable,
763 attr->intr_disable, attr->notify_early,
766 if (cop >= VAS_COP_TYPE_MAX)
769 if (cop != VAS_COP_TYPE_FTW &&
770 attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
773 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
776 if (!attr->wcreds_max)
780 /* cannot be fault or user window if it is nx */
781 if (attr->fault_win || attr->user_win)
784 * Section 3.1.4.32: NX Windows must not disable notification,
785 * and must not enable interrupts or early notification.
787 if (attr->notify_disable || !attr->intr_disable ||
790 } else if (attr->fault_win) {
791 /* cannot be both fault and user window */
796 * Section 3.1.4.32: Fault windows must disable notification
797 * but not interrupts.
799 if (!attr->notify_disable || attr->intr_disable)
802 } else if (attr->user_win) {
804 * User receive windows are only for fast-thread-wakeup
805 * (FTW). They don't need a FIFO and must disable interrupts
807 if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
810 /* Rx window must be one of NX or Fault or User window. */
817 void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
819 memset(rxattr, 0, sizeof(*rxattr));
821 if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
822 cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
823 rxattr->pin_win = true;
824 rxattr->nx_win = true;
825 rxattr->fault_win = false;
826 rxattr->intr_disable = true;
827 rxattr->rx_wcred_mode = true;
828 rxattr->tx_wcred_mode = true;
829 rxattr->rx_win_ord_mode = true;
830 rxattr->tx_win_ord_mode = true;
831 } else if (cop == VAS_COP_TYPE_FAULT) {
832 rxattr->pin_win = true;
833 rxattr->fault_win = true;
834 rxattr->notify_disable = true;
835 rxattr->rx_wcred_mode = true;
836 rxattr->rx_win_ord_mode = true;
837 rxattr->rej_no_credit = true;
838 rxattr->tc_mode = VAS_THRESH_DISABLED;
839 } else if (cop == VAS_COP_TYPE_FTW) {
840 rxattr->user_win = true;
841 rxattr->intr_disable = true;
844 * As noted in the VAS Workbook we disable credit checks.
845 * If we enable credit checks in the future, we must also
846 * implement a mechanism to return the user credits or new
847 * paste operations will fail.
851 EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
853 struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
854 struct vas_rx_win_attr *rxattr)
856 struct pnv_vas_window *rxwin;
857 struct vas_winctx winctx;
858 struct vas_instance *vinst;
860 trace_vas_rx_win_open(current, vasid, cop, rxattr);
862 if (!rx_win_args_valid(cop, rxattr))
863 return ERR_PTR(-EINVAL);
865 vinst = find_vas_instance(vasid);
867 pr_devel("vasid %d not found!\n", vasid);
868 return ERR_PTR(-EINVAL);
870 pr_devel("Found instance %d\n", vasid);
872 rxwin = vas_window_alloc(vinst);
874 pr_devel("Unable to allocate memory for Rx window\n");
875 return (struct vas_window *)rxwin;
878 rxwin->tx_win = false;
879 rxwin->nx_win = rxattr->nx_win;
880 rxwin->user_win = rxattr->user_win;
881 rxwin->vas_win.cop = cop;
882 rxwin->vas_win.wcreds_max = rxattr->wcreds_max;
884 init_winctx_for_rxwin(rxwin, rxattr, &winctx);
885 init_winctx_regs(rxwin, &winctx);
887 set_vinst_win(vinst, rxwin);
889 return &rxwin->vas_win;
891 EXPORT_SYMBOL_GPL(vas_rx_win_open);
893 void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
895 memset(txattr, 0, sizeof(*txattr));
897 if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
898 cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
899 txattr->rej_no_credit = false;
900 txattr->rx_wcred_mode = true;
901 txattr->tx_wcred_mode = true;
902 txattr->rx_win_ord_mode = true;
903 txattr->tx_win_ord_mode = true;
904 } else if (cop == VAS_COP_TYPE_FTW) {
905 txattr->user_win = true;
908 EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
910 static void init_winctx_for_txwin(struct pnv_vas_window *txwin,
911 struct vas_tx_win_attr *txattr,
912 struct vas_winctx *winctx)
915 * We first zero all fields and only set non-zero ones. Following
916 * are some fields set to 0/false for the stated reason:
918 * ->notify_os_intr_reg In powernv, send intrs to HV
919 * ->rsvd_txbuf_count Not supported yet.
920 * ->notify_disable False for NX windows
921 * ->xtra_write False for NX windows
922 * ->notify_early NA for NX windows
923 * ->lnotify_lpid NA for Tx windows
924 * ->lnotify_pid NA for Tx windows
925 * ->lnotify_tid NA for Tx windows
926 * ->tx_win_cred_mode Ignore for now for NX windows
927 * ->rx_win_cred_mode Ignore for now for NX windows
929 memset(winctx, 0, sizeof(struct vas_winctx));
931 winctx->wcreds_max = txwin->vas_win.wcreds_max;
933 winctx->user_win = txattr->user_win;
934 winctx->nx_win = txwin->rxwin->nx_win;
935 winctx->pin_win = txattr->pin_win;
936 winctx->rej_no_credit = txattr->rej_no_credit;
937 winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
939 winctx->rx_wcred_mode = txattr->rx_wcred_mode;
940 winctx->tx_wcred_mode = txattr->tx_wcred_mode;
941 winctx->rx_word_mode = txattr->rx_win_ord_mode;
942 winctx->tx_word_mode = txattr->tx_win_ord_mode;
943 winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
945 winctx->intr_disable = true;
947 winctx->data_stamp = true;
949 winctx->lpid = txattr->lpid;
950 winctx->pidr = txattr->pidr;
951 winctx->rx_win_id = txwin->rxwin->vas_win.winid;
953 * IRQ and fault window setup is successful. Set fault window
954 * for the send window so that ready to handle faults.
956 if (txwin->vinst->virq)
957 winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid;
959 winctx->dma_type = VAS_DMA_TYPE_INJECT;
960 winctx->tc_mode = txattr->tc_mode;
961 winctx->min_scope = VAS_SCOPE_LOCAL;
962 winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
963 if (txwin->vinst->virq)
964 winctx->irq_port = txwin->vinst->irq_port;
966 winctx->pswid = txattr->pswid ? txattr->pswid :
967 encode_pswid(txwin->vinst->vas_id,
968 txwin->vas_win.winid);
971 static bool tx_win_args_valid(enum vas_cop_type cop,
972 struct vas_tx_win_attr *attr)
974 if (attr->tc_mode != VAS_THRESH_DISABLED)
977 if (cop > VAS_COP_TYPE_MAX)
980 if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
983 if (attr->user_win) {
984 if (attr->rsvd_txbuf_count)
987 if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP &&
988 cop != VAS_COP_TYPE_GZIP_HIPRI)
995 struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
996 struct vas_tx_win_attr *attr)
999 struct pnv_vas_window *txwin;
1000 struct pnv_vas_window *rxwin;
1001 struct vas_winctx winctx;
1002 struct vas_instance *vinst;
1004 trace_vas_tx_win_open(current, vasid, cop, attr);
1006 if (!tx_win_args_valid(cop, attr))
1007 return ERR_PTR(-EINVAL);
1010 * If caller did not specify a vasid but specified the PSWID of a
1011 * receive window (applicable only to FTW windows), use the vasid
1012 * from that receive window.
1014 if (vasid == -1 && attr->pswid)
1015 decode_pswid(attr->pswid, &vasid, NULL);
1017 vinst = find_vas_instance(vasid);
1019 pr_devel("vasid %d not found!\n", vasid);
1020 return ERR_PTR(-EINVAL);
1023 rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
1024 if (IS_ERR(rxwin)) {
1025 pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
1026 return (struct vas_window *)rxwin;
1029 txwin = vas_window_alloc(vinst);
1030 if (IS_ERR(txwin)) {
1031 rc = PTR_ERR(txwin);
1035 txwin->vas_win.cop = cop;
1037 txwin->rxwin = rxwin;
1038 txwin->nx_win = txwin->rxwin->nx_win;
1039 txwin->user_win = attr->user_win;
1040 txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
1042 init_winctx_for_txwin(txwin, attr, &winctx);
1044 init_winctx_regs(txwin, &winctx);
1047 * If its a kernel send window, map the window address into the
1048 * kernel's address space. For user windows, user must issue an
1049 * mmap() to map the window into their address space.
1051 * NOTE: If kernel ever resubmits a user CRB after handling a page
1052 * fault, we will need to map this into kernel as well.
1054 if (!txwin->user_win) {
1055 txwin->paste_kaddr = map_paste_region(txwin);
1056 if (IS_ERR(txwin->paste_kaddr)) {
1057 rc = PTR_ERR(txwin->paste_kaddr);
1062 * Interrupt hanlder or fault window setup failed. Means
1063 * NX can not generate fault for page fault. So not
1064 * opening for user space tx window.
1070 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
1074 vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
1077 set_vinst_win(vinst, txwin);
1079 return &txwin->vas_win;
1082 vas_window_free(txwin);
1089 EXPORT_SYMBOL_GPL(vas_tx_win_open);
1091 int vas_copy_crb(void *crb, int offset)
1093 return vas_copy(crb, offset);
1095 EXPORT_SYMBOL_GPL(vas_copy_crb);
1097 #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
1098 int vas_paste_crb(struct vas_window *vwin, int offset, bool re)
1100 struct pnv_vas_window *txwin;
1105 txwin = container_of(vwin, struct pnv_vas_window, vas_win);
1106 trace_vas_paste_crb(current, txwin);
1109 * Only NX windows are supported for now and hardware assumes
1110 * report-enable flag is set for NX windows. Ensure software
1113 WARN_ON_ONCE(txwin->nx_win && !re);
1115 addr = txwin->paste_kaddr;
1118 * Set the REPORT_ENABLE bit (equivalent to writing
1119 * to 1K offset of the paste address)
1121 val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
1126 * Map the raw CR value from vas_paste() to an error code (there
1127 * is just pass or fail for now though).
1129 rc = vas_paste(addr, offset);
1135 pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid,
1136 read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
1140 EXPORT_SYMBOL_GPL(vas_paste_crb);
1143 * If credit checking is enabled for this window, poll for the return
1144 * of window credits (i.e for NX engines to process any outstanding CRBs).
1145 * Since NX-842 waits for the CRBs to be processed before closing the
1146 * window, we should not have to wait for too long.
1148 * TODO: We retry in 10ms intervals now. We could/should probably peek at
1149 * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
1150 * CRBs on the FIFO and compute the delay dynamically on each retry.
1151 * But that is not really needed until we support NX-GZIP access from
1152 * user space. (NX-842 driver waits for CSB and Fast thread-wakeup
1153 * doesn't use credit checking).
1155 static void poll_window_credits(struct pnv_vas_window *window)
1161 val = read_hvwc_reg(window, VREG(WINCTL));
1163 mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
1165 mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
1170 if (window->tx_win) {
1171 val = read_hvwc_reg(window, VREG(TX_WCRED));
1172 creds = GET_FIELD(VAS_TX_WCRED, val);
1174 val = read_hvwc_reg(window, VREG(LRX_WCRED));
1175 creds = GET_FIELD(VAS_LRX_WCRED, val);
1179 * Takes around few milliseconds to complete all pending requests
1180 * and return credits.
1181 * TODO: Scan fault FIFO and invalidate CRBs points to this window
1182 * and issue CRB Kill to stop all pending requests. Need only
1183 * if there is a bug in NX or fault handling in kernel.
1185 if (creds < window->vas_win.wcreds_max) {
1187 set_current_state(TASK_UNINTERRUPTIBLE);
1188 schedule_timeout(msecs_to_jiffies(10));
1191 * Process can not close send window until all credits are
1194 if (!(count % 1000))
1195 pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
1196 vas_window_pid(&window->vas_win),
1197 window->vas_win.winid,
1205 * Wait for the window to go to "not-busy" state. It should only take a
1206 * short time to queue a CRB, so window should not be busy for too long.
1207 * Trying 5ms intervals.
1209 static void poll_window_busy_state(struct pnv_vas_window *window)
1216 val = read_hvwc_reg(window, VREG(WIN_STATUS));
1217 busy = GET_FIELD(VAS_WIN_BUSY, val);
1220 set_current_state(TASK_UNINTERRUPTIBLE);
1221 schedule_timeout(msecs_to_jiffies(10));
1224 * Takes around few milliseconds to process all pending
1227 if (!(count % 1000))
1228 pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
1229 vas_window_pid(&window->vas_win),
1230 window->vas_win.winid, count);
1237 * Have the hardware cast a window out of cache and wait for it to
1240 * NOTE: It can take a relatively long time to cast the window context
1241 * out of the cache. It is not strictly necessary to cast out if:
1243 * - we clear the "Pin Window" bit (so hardware is free to evict)
1245 * - we re-initialize the window context when it is reassigned.
1247 * We do the former in vas_win_close() and latter in vas_win_open().
1248 * So, ignoring the cast-out for now. We can add it as needed. If
1249 * casting out becomes necessary we should consider offloading the
1250 * job to a worker thread, so the window close can proceed quickly.
1252 static void poll_window_castout(struct pnv_vas_window *window)
1258 * Unpin and close a window so no new requests are accepted and the
1259 * hardware can evict this window from cache if necessary.
1261 static void unpin_close_window(struct pnv_vas_window *window)
1265 val = read_hvwc_reg(window, VREG(WINCTL));
1266 val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
1267 val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
1268 write_hvwc_reg(window, VREG(WINCTL), val);
1274 * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
1275 * - Disable new paste operations (unmap paste address)
1276 * - Poll for the "Window Busy" bit to be cleared
1277 * - Clear the Open/Enable bit for the Window.
1278 * - Poll for return of window Credits (implies FIFO empty for Rx win?)
1279 * - Unpin and cast window context out of cache
1281 * Besides the hardware, kernel has some bookkeeping of course.
1283 int vas_win_close(struct vas_window *vwin)
1285 struct pnv_vas_window *window;
1290 window = container_of(vwin, struct pnv_vas_window, vas_win);
1292 if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
1293 pr_devel("Attempting to close an active Rx window!\n");
1298 unmap_paste_region(window);
1300 poll_window_busy_state(window);
1302 unpin_close_window(window);
1304 poll_window_credits(window);
1306 clear_vinst_win(window);
1308 poll_window_castout(window);
1310 /* if send window, drop reference to matching receive window */
1311 if (window->tx_win) {
1312 if (window->user_win) {
1313 mm_context_remove_vas_window(vwin->task_ref.mm);
1314 put_vas_user_win_ref(&vwin->task_ref);
1316 put_rx_win(window->rxwin);
1319 vas_window_free(window);
1323 EXPORT_SYMBOL_GPL(vas_win_close);
1326 * Return credit for the given window.
1327 * Send windows and fault window uses credit mechanism as follows:
1330 * - The default number of credits available for each send window is
1331 * 1024. It means 1024 requests can be issued asynchronously at the
1332 * same time. If the credit is not available, that request will be
1333 * returned with RMA_Busy.
1334 * - One credit is taken when NX request is issued.
1335 * - This credit is returned after NX processed that request.
1336 * - If NX encounters translation error, kernel will return the
1337 * credit on the specific send window after processing the fault CRB.
1340 * - The total number credits available is FIFO_SIZE/CRB_SIZE.
1341 * Means 4MB/128 in the current implementation. If credit is not
1342 * available, RMA_Reject is returned.
1343 * - A credit is taken when NX pastes CRB in fault FIFO.
1344 * - The kernel with return credit on fault window after reading entry
1347 void vas_return_credit(struct pnv_vas_window *window, bool tx)
1352 if (tx) { /* send window */
1353 val = SET_FIELD(VAS_TX_WCRED, val, 1);
1354 write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
1356 val = SET_FIELD(VAS_LRX_WCRED, val, 1);
1357 write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
1361 struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
1364 struct pnv_vas_window *window;
1368 pr_devel("%s: called for pswid 0!\n", __func__);
1369 return ERR_PTR(-ESRCH);
1372 decode_pswid(pswid, NULL, &winid);
1374 if (winid >= VAS_WINDOWS_PER_CHIP)
1375 return ERR_PTR(-ESRCH);
1378 * If application closes the window before the hardware
1379 * returns the fault CRB, we should wait in vas_win_close()
1380 * for the pending requests. so the window must be active
1381 * and the process alive.
1383 * If its a kernel process, we should not get any faults and
1384 * should not get here.
1386 window = vinst->windows[winid];
1389 pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n",
1390 winid, pswid, vinst);
1395 * Do some sanity checks on the decoded window. Window should be
1396 * NX GZIP user send window. FTW windows should not incur faults
1397 * since their CRBs are ignored (not queued on FIFO or processed
1400 if (!window->tx_win || !window->user_win || !window->nx_win ||
1401 window->vas_win.cop == VAS_COP_TYPE_FAULT ||
1402 window->vas_win.cop == VAS_COP_TYPE_FTW) {
1403 pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
1404 winid, window->tx_win, window->user_win,
1405 window->nx_win, window->vas_win.cop);
1412 static struct vas_window *vas_user_win_open(int vas_id, u64 flags,
1413 enum vas_cop_type cop_type)
1415 struct vas_tx_win_attr txattr = {};
1417 vas_init_tx_win_attr(&txattr, cop_type);
1419 txattr.lpid = mfspr(SPRN_LPID);
1420 txattr.pidr = mfspr(SPRN_PID);
1421 txattr.user_win = true;
1422 txattr.rsvd_txbuf_count = false;
1423 txattr.pswid = false;
1425 pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
1428 return vas_tx_win_open(vas_id, cop_type, &txattr);
1431 static u64 vas_user_win_paste_addr(struct vas_window *txwin)
1433 struct pnv_vas_window *win;
1436 win = container_of(txwin, struct pnv_vas_window, vas_win);
1437 vas_win_paste_addr(win, &paste_addr, NULL);
1442 static int vas_user_win_close(struct vas_window *txwin)
1444 vas_win_close(txwin);
1449 static const struct vas_user_win_ops vops = {
1450 .open_win = vas_user_win_open,
1451 .paste_addr = vas_user_win_paste_addr,
1452 .close_win = vas_user_win_close,
1456 * Supporting only nx-gzip coprocessor type now, but this API code
1457 * extended to other coprocessor types later.
1459 int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
1463 return vas_register_coproc_api(mod, cop_type, name, &vops);
1465 EXPORT_SYMBOL_GPL(vas_register_api_powernv);
1467 void vas_unregister_api_powernv(void)
1469 vas_unregister_coproc_api();
1471 EXPORT_SYMBOL_GPL(vas_unregister_api_powernv);