1 // SPDX-License-Identifier: GPL-2.0
3 * channel program interfaces
5 * Copyright IBM Corp. 2017
7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
12 #include <linux/slab.h>
13 #include <linux/iommu.h>
14 #include <linux/vfio.h>
15 #include <asm/idals.h>
17 #include "vfio_ccw_cp.h"
20 * Max length for ccw chain.
21 * XXX: Limit to 256, need to check more?
23 #define CCWCHAIN_LEN_MAX 256
26 /* Starting guest physical I/O address. */
27 unsigned long pa_iova;
28 /* Array that stores PFNs of the pages need to pin. */
29 unsigned long *pa_iova_pfn;
30 /* Array that receives PFNs of the pages pinned. */
31 unsigned long *pa_pfn;
32 /* Number of pages pinned from @pa_iova. */
36 struct pfn_array_table {
37 struct pfn_array *pat_pa;
42 struct list_head next;
44 /* Guest physical address of the current chain. */
46 /* Count of the valid ccws in chain. */
48 /* Pinned PAGEs for the original data. */
49 struct pfn_array_table *ch_pat;
53 * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory
54 * @pa: pfn_array on which to perform the operation
55 * @mdev: the mediated device to perform pin/unpin operations
56 * @iova: target guest physical address
57 * @len: number of bytes that should be pinned from @iova
59 * Attempt to allocate memory for PFNs, and pin user pages in memory.
62 * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
63 * this structure will be filled in by this function.
66 * Number of pages pinned on success.
67 * If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially,
69 * If no pages were pinned, returns -errno.
71 static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
72 u64 iova, unsigned int len)
79 if (pa->pa_nr || pa->pa_iova_pfn)
84 pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
88 pa->pa_iova_pfn = kcalloc(pa->pa_nr,
89 sizeof(*pa->pa_iova_pfn) +
92 if (unlikely(!pa->pa_iova_pfn)) {
96 pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
98 pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
99 for (i = 1; i < pa->pa_nr; i++)
100 pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
102 ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
103 IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
107 } else if (ret > 0 && ret != pa->pa_nr) {
108 vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
117 kfree(pa->pa_iova_pfn);
118 pa->pa_iova_pfn = NULL;
123 /* Unpin the pages before releasing the memory. */
124 static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
126 vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
128 kfree(pa->pa_iova_pfn);
131 static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
133 pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
134 if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
144 static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
149 for (i = 0; i < pat->pat_nr; i++)
150 pfn_array_unpin_free(pat->pat_pa + i, mdev);
159 static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
162 struct pfn_array *pa = pat->pat_pa;
163 unsigned long iova_pfn = iova >> PAGE_SHIFT;
166 for (i = 0; i < pat->pat_nr; i++, pa++)
167 for (j = 0; j < pa->pa_nr; j++)
168 if (pa->pa_iova_pfn[j] == iova_pfn)
173 /* Create the list idal words for a pfn_array_table. */
174 static inline void pfn_array_table_idal_create_words(
175 struct pfn_array_table *pat,
176 unsigned long *idaws)
178 struct pfn_array *pa;
182 * Idal words (execept the first one) rely on the memory being 4k
183 * aligned. If a user virtual address is 4K aligned, then it's
184 * corresponding kernel physical address will also be 4K aligned. Thus
185 * there will be no problem here to simply use the phys to create an
189 for (i = 0; i < pat->pat_nr; i++) {
190 pa = pat->pat_pa + i;
191 for (j = 0; j < pa->pa_nr; j++) {
192 idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
194 idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
202 * Within the domain (@mdev), copy @n bytes from a guest physical
203 * address (@iova) to a host physical address (@to).
205 static long copy_from_iova(struct device *mdev,
209 struct pfn_array pa = {0};
214 ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
219 for (i = 0; i < pa.pa_nr; i++) {
220 from = pa.pa_pfn[i] << PAGE_SHIFT;
223 from += iova & (PAGE_SIZE - 1);
224 m -= iova & (PAGE_SIZE - 1);
228 memcpy(to + (n - l), (void *)from, m);
235 pfn_array_unpin_free(&pa, mdev);
240 static long copy_ccw_from_iova(struct channel_program *cp,
241 struct ccw1 *to, u64 iova,
249 ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
253 if (!cp->orb.cmd.fmt) {
255 for (i = 0; i < len; i++) {
256 ccw0 = *(struct ccw0 *)pccw1;
257 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
258 pccw1->cmd_code = CCW_CMD_TIC;
262 pccw1->cmd_code = ccw0.cmd_code;
263 pccw1->flags = ccw0.flags;
264 pccw1->count = ccw0.count;
266 pccw1->cda = ccw0.cda;
275 * Helpers to operate ccwchain.
277 #define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
279 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
281 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
283 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
286 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
288 static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
290 struct ccwchain *chain;
294 /* Make ccw address aligned to 8. */
295 size = ((sizeof(*chain) + 7L) & -8L) +
296 sizeof(*chain->ch_ccw) * len +
297 sizeof(*chain->ch_pat) * len;
298 chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
302 data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
303 chain->ch_ccw = (struct ccw1 *)data;
305 data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
306 chain->ch_pat = (struct pfn_array_table *)data;
310 list_add_tail(&chain->next, &cp->ccwchain_list);
315 static void ccwchain_free(struct ccwchain *chain)
317 list_del(&chain->next);
321 /* Free resource for a ccw that allocated memory for its cda. */
322 static void ccwchain_cda_free(struct ccwchain *chain, int idx)
324 struct ccw1 *ccw = chain->ch_ccw + idx;
326 if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
331 kfree((void *)(u64)ccw->cda);
334 /* Unpin the pages then free the memory resources. */
335 static void cp_unpin_free(struct channel_program *cp)
337 struct ccwchain *chain, *temp;
340 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
341 for (i = 0; i < chain->ch_len; i++) {
342 pfn_array_table_unpin_free(chain->ch_pat + i,
344 ccwchain_cda_free(chain, i);
346 ccwchain_free(chain);
351 * ccwchain_calc_length - calculate the length of the ccw chain.
352 * @iova: guest physical address of the target ccw chain
353 * @cp: channel_program on which to perform the operation
355 * This is the chain length not considering any TICs.
356 * You need to do a new round for each TIC target.
358 * The program is also validated for absence of not yet supported
359 * indirect data addressing scenarios.
361 * Returns: the length of the ccw chain or -errno.
363 static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
365 struct ccw1 *ccw, *p;
369 * Copy current chain from guest to host kernel.
370 * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
371 * So copying 2K is enough (safe).
373 p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
377 cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
388 * As we don't want to fail direct addressing even if the
389 * orb specified one of the unsupported formats, we defer
390 * checking for IDAWs in unsupported formats to here.
392 if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
397 if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
401 } while (cnt < CCWCHAIN_LEN_MAX + 1);
403 if (cnt == CCWCHAIN_LEN_MAX + 1)
410 static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
412 struct ccwchain *chain;
413 u32 ccw_head, ccw_tail;
415 list_for_each_entry(chain, &cp->ccwchain_list, next) {
416 ccw_head = chain->ch_iova;
417 ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1);
419 if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail))
426 static int ccwchain_loop_tic(struct ccwchain *chain,
427 struct channel_program *cp);
429 static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
431 struct ccwchain *chain;
434 /* May transfer to an existing chain. */
435 if (tic_target_chain_exists(tic, cp))
438 /* Get chain length. */
439 len = ccwchain_calc_length(tic->cda, cp);
443 /* Need alloc a new chain for this one. */
444 chain = ccwchain_alloc(cp, len);
447 chain->ch_iova = tic->cda;
449 /* Copy the new chain from user. */
450 ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
452 ccwchain_free(chain);
456 /* Loop for tics on this new chain. */
457 return ccwchain_loop_tic(chain, cp);
461 static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
466 for (i = 0; i < chain->ch_len; i++) {
467 tic = chain->ch_ccw + i;
469 if (!ccw_is_tic(tic))
472 ret = ccwchain_handle_tic(tic, cp);
480 static int ccwchain_fetch_tic(struct ccwchain *chain,
482 struct channel_program *cp)
484 struct ccw1 *ccw = chain->ch_ccw + idx;
485 struct ccwchain *iter;
486 u32 ccw_head, ccw_tail;
488 list_for_each_entry(iter, &cp->ccwchain_list, next) {
489 ccw_head = iter->ch_iova;
490 ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1);
492 if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) {
493 ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) +
494 (ccw->cda - ccw_head));
502 static int ccwchain_fetch_direct(struct ccwchain *chain,
504 struct channel_program *cp)
507 struct pfn_array_table *pat;
508 unsigned long *idaws;
511 ccw = chain->ch_ccw + idx;
515 * We just want the translation result of any direct ccw
516 * to be an IDA ccw, so let's add the IDA flag for it.
517 * Although the flag will be ignored by firmware.
519 ccw->flags |= CCW_FLAG_IDA;
524 * Pin data page(s) in memory.
525 * The number of pages actually is the count of the idaws which will be
526 * needed when translating a direct ccw to a idal ccw.
528 pat = chain->ch_pat + idx;
529 ret = pfn_array_table_init(pat, 1);
533 ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
537 /* Translate this direct ccw to a idal ccw. */
538 idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
543 ccw->cda = (__u32) virt_to_phys(idaws);
544 ccw->flags |= CCW_FLAG_IDA;
546 pfn_array_table_idal_create_words(pat, idaws);
551 pfn_array_table_unpin_free(pat, cp->mdev);
557 static int ccwchain_fetch_idal(struct ccwchain *chain,
559 struct channel_program *cp)
562 struct pfn_array_table *pat;
563 unsigned long *idaws;
565 unsigned int idaw_nr, idaw_len;
568 ccw = chain->ch_ccw + idx;
573 /* Calculate size of idaws. */
574 ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
577 idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
578 idaw_len = idaw_nr * sizeof(*idaws);
580 /* Pin data page(s) in memory. */
581 pat = chain->ch_pat + idx;
582 ret = pfn_array_table_init(pat, idaw_nr);
586 /* Translate idal ccw to use new allocated idaws. */
587 idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
593 ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
597 ccw->cda = virt_to_phys(idaws);
599 for (i = 0; i < idaw_nr; i++) {
600 idaw_iova = *(idaws + i);
602 ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
608 pfn_array_table_idal_create_words(pat, idaws);
615 pfn_array_table_unpin_free(pat, cp->mdev);
623 * To reduce memory copy, we'll pin the cda page in memory,
624 * and to get rid of the cda 2G limitiaion of ccw1, we'll translate
625 * direct ccws to idal ccws.
627 static int ccwchain_fetch_one(struct ccwchain *chain,
629 struct channel_program *cp)
631 struct ccw1 *ccw = chain->ch_ccw + idx;
633 if (ccw_is_test(ccw) || ccw_is_noop(ccw))
637 return ccwchain_fetch_tic(chain, idx, cp);
639 if (ccw_is_idal(ccw))
640 return ccwchain_fetch_idal(chain, idx, cp);
642 return ccwchain_fetch_direct(chain, idx, cp);
646 * cp_init() - allocate ccwchains for a channel program.
647 * @cp: channel_program on which to perform the operation
648 * @mdev: the mediated device to perform pin/unpin operations
649 * @orb: control block for the channel program from the guest
651 * This creates one or more ccwchain(s), and copies the raw data of
652 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
655 * 1. Supports only prefetch enabled mode.
656 * 2. Supports idal(c64) ccw chaining.
657 * 3. Supports 4k idaw.
660 * %0 on success and a negative error value on failure.
662 int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
664 u64 iova = orb->cmd.cpa;
665 struct ccwchain *chain;
670 * Only support prefetch enable mode now.
675 INIT_LIST_HEAD(&cp->ccwchain_list);
676 memcpy(&cp->orb, orb, sizeof(*orb));
679 /* Get chain length. */
680 len = ccwchain_calc_length(iova, cp);
684 /* Alloc mem for the head chain. */
685 chain = ccwchain_alloc(cp, len);
688 chain->ch_iova = iova;
690 /* Copy the head chain from guest. */
691 ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
693 ccwchain_free(chain);
697 /* Now loop for its TICs. */
698 ret = ccwchain_loop_tic(chain, cp);
701 /* It is safe to force: if not set but idals used
702 * ccwchain_calc_length returns an error.
711 * cp_free() - free resources for channel program.
712 * @cp: channel_program on which to perform the operation
714 * This unpins the memory pages and frees the memory space occupied by
715 * @cp, which must have been returned by a previous call to cp_init().
716 * Otherwise, undefined behavior occurs.
718 void cp_free(struct channel_program *cp)
724 * cp_prefetch() - translate a guest physical address channel program to
725 * a real-device runnable channel program.
726 * @cp: channel_program on which to perform the operation
728 * This function translates the guest-physical-address channel program
729 * and stores the result to ccwchain list. @cp must have been
730 * initialized by a previous call with cp_init(). Otherwise, undefined
732 * For each chain composing the channel program:
733 * - On entry ch_len holds the count of CCWs to be translated.
734 * - On exit ch_len is adjusted to the count of successfully translated CCWs.
735 * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
737 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
738 * as helpers to do ccw chain translation inside the kernel. Basically
739 * they accept a channel program issued by a virtual machine, and
740 * translate the channel program to a real-device runnable channel
743 * These APIs will copy the ccws into kernel-space buffers, and update
744 * the guest phsical addresses with their corresponding host physical
745 * addresses. Then channel I/O device drivers could issue the
746 * translated channel program to real devices to perform an I/O
749 * These interfaces are designed to support translation only for
750 * channel programs, which are generated and formatted by a
751 * guest. Thus this will make it possible for things like VFIO to
752 * leverage the interfaces to passthrough a channel I/O mediated
755 * We support direct ccw chaining by translating them to idal ccws.
758 * %0 on success and a negative error value on failure.
760 int cp_prefetch(struct channel_program *cp)
762 struct ccwchain *chain;
765 list_for_each_entry(chain, &cp->ccwchain_list, next) {
767 for (idx = 0; idx < len; idx++) {
768 ret = ccwchain_fetch_one(chain, idx, cp);
776 /* Only cleanup the chain elements that were actually translated. */
778 list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
785 * cp_get_orb() - get the orb of the channel program
786 * @cp: channel_program on which to perform the operation
787 * @intparm: new intparm for the returned orb
788 * @lpm: candidate value of the logical-path mask for the returned orb
790 * This function returns the address of the updated orb of the channel
791 * program. Channel I/O device drivers could use this orb to issue a
794 union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
797 struct ccwchain *chain;
802 orb->cmd.intparm = intparm;
804 orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
806 if (orb->cmd.lpm == 0)
809 chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
811 orb->cmd.cpa = (__u32) __pa(cpa);
817 * cp_update_scsw() - update scsw for a channel program.
818 * @cp: channel_program on which to perform the operation
819 * @scsw: I/O results of the channel program and also the target to be
822 * @scsw contains the I/O results of the channel program that pointed
823 * to by @cp. However what @scsw->cpa stores is a host physical
824 * address, which is meaningless for the guest, which is waiting for
827 * This function updates @scsw->cpa to its coressponding guest physical
830 void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
832 struct ccwchain *chain;
833 u32 cpa = scsw->cmd.cpa;
834 u32 ccw_head, ccw_tail;
838 * For now, only update the cmd.cpa part. We may need to deal with
839 * other portions of the schib as well, even if we don't return them
840 * in the ioctl directly. Path status changes etc.
842 list_for_each_entry(chain, &cp->ccwchain_list, next) {
843 ccw_head = (u32)(u64)chain->ch_ccw;
844 ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1);
846 if ((ccw_head <= cpa) && (cpa <= ccw_tail)) {
848 * (cpa - ccw_head) is the offset value of the host
849 * physical ccw to its chain head.
850 * Adding this value to the guest physical ccw chain
851 * head gets us the guest cpa.
853 cpa = chain->ch_iova + (cpa - ccw_head);
862 * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
863 * @cp: channel_program on which to perform the operation
864 * @iova: the iova to check
866 * If the @iova is currently pinned for the ccw chain, return true;
869 bool cp_iova_pinned(struct channel_program *cp, u64 iova)
871 struct ccwchain *chain;
874 list_for_each_entry(chain, &cp->ccwchain_list, next) {
875 for (i = 0; i < chain->ch_len; i++)
876 if (pfn_array_table_iova_pinned(chain->ch_pat + i,