GNU Linux-libre 4.9.304-gnu1
[releases.git] / drivers / misc / mic / vop / vop_vringh.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2016 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel Virtio Over PCIe (VOP) driver.
19  *
20  */
21 #include <linux/sched.h>
22 #include <linux/poll.h>
23 #include <linux/dma-mapping.h>
24
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27
28 #include <linux/mic_ioctl.h>
29 #include "vop_main.h"
30
31 /* Helper API to obtain the VOP PCIe device */
32 static inline struct device *vop_dev(struct vop_vdev *vdev)
33 {
34         return vdev->vpdev->dev.parent;
35 }
36
37 /* Helper API to check if a virtio device is initialized */
38 static inline int vop_vdev_inited(struct vop_vdev *vdev)
39 {
40         if (!vdev)
41                 return -EINVAL;
42         /* Device has not been created yet */
43         if (!vdev->dd || !vdev->dd->type) {
44                 dev_err(vop_dev(vdev), "%s %d err %d\n",
45                         __func__, __LINE__, -EINVAL);
46                 return -EINVAL;
47         }
48         /* Device has been removed/deleted */
49         if (vdev->dd->type == -1) {
50                 dev_dbg(vop_dev(vdev), "%s %d err %d\n",
51                         __func__, __LINE__, -ENODEV);
52                 return -ENODEV;
53         }
54         return 0;
55 }
56
57 static void _vop_notify(struct vringh *vrh)
58 {
59         struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
60         struct vop_vdev *vdev = vvrh->vdev;
61         struct vop_device *vpdev = vdev->vpdev;
62         s8 db = vdev->dc->h2c_vdev_db;
63
64         if (db != -1)
65                 vpdev->hw_ops->send_intr(vpdev, db);
66 }
67
68 static void vop_virtio_init_post(struct vop_vdev *vdev)
69 {
70         struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
71         struct vop_device *vpdev = vdev->vpdev;
72         int i, used_size;
73
74         for (i = 0; i < vdev->dd->num_vq; i++) {
75                 used_size = PAGE_ALIGN(sizeof(u16) * 3 +
76                                 sizeof(struct vring_used_elem) *
77                                 le16_to_cpu(vqconfig->num));
78                 if (!le64_to_cpu(vqconfig[i].used_address)) {
79                         dev_warn(vop_dev(vdev), "used_address zero??\n");
80                         continue;
81                 }
82                 vdev->vvr[i].vrh.vring.used =
83                         (void __force *)vpdev->hw_ops->ioremap(
84                         vpdev,
85                         le64_to_cpu(vqconfig[i].used_address),
86                         used_size);
87         }
88
89         vdev->dc->used_address_updated = 0;
90
91         dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
92                  __func__, vdev->virtio_id);
93 }
94
95 static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
96 {
97         int i;
98
99         dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
100                 __func__, vdev->dd->status, vdev->virtio_id);
101
102         for (i = 0; i < vdev->dd->num_vq; i++)
103                 /*
104                  * Avoid lockdep false positive. The + 1 is for the vop
105                  * mutex which is held in the reset devices code path.
106                  */
107                 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
108
109         /* 0 status means "reset" */
110         vdev->dd->status = 0;
111         vdev->dc->vdev_reset = 0;
112         vdev->dc->host_ack = 1;
113
114         for (i = 0; i < vdev->dd->num_vq; i++) {
115                 struct vringh *vrh = &vdev->vvr[i].vrh;
116
117                 vdev->vvr[i].vring.info->avail_idx = 0;
118                 vrh->completed = 0;
119                 vrh->last_avail_idx = 0;
120                 vrh->last_used_idx = 0;
121         }
122
123         for (i = 0; i < vdev->dd->num_vq; i++)
124                 mutex_unlock(&vdev->vvr[i].vr_mutex);
125 }
126
127 static void vop_virtio_reset_devices(struct vop_info *vi)
128 {
129         struct list_head *pos, *tmp;
130         struct vop_vdev *vdev;
131
132         list_for_each_safe(pos, tmp, &vi->vdev_list) {
133                 vdev = list_entry(pos, struct vop_vdev, list);
134                 vop_virtio_device_reset(vdev);
135                 vdev->poll_wake = 1;
136                 wake_up(&vdev->waitq);
137         }
138 }
139
140 static void vop_bh_handler(struct work_struct *work)
141 {
142         struct vop_vdev *vdev = container_of(work, struct vop_vdev,
143                         virtio_bh_work);
144
145         if (vdev->dc->used_address_updated)
146                 vop_virtio_init_post(vdev);
147
148         if (vdev->dc->vdev_reset)
149                 vop_virtio_device_reset(vdev);
150
151         vdev->poll_wake = 1;
152         wake_up(&vdev->waitq);
153 }
154
155 static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
156 {
157         struct vop_vdev *vdev = data;
158         struct vop_device *vpdev = vdev->vpdev;
159
160         vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
161         schedule_work(&vdev->virtio_bh_work);
162         return IRQ_HANDLED;
163 }
164
165 static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
166 {
167         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
168         int ret = 0, retry, i;
169         struct vop_device *vpdev = vdev->vpdev;
170         struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
171         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
172         s8 db = bootparam->h2c_config_db;
173
174         mutex_lock(&vi->vop_mutex);
175         for (i = 0; i < vdev->dd->num_vq; i++)
176                 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
177
178         if (db == -1 || vdev->dd->type == -1) {
179                 ret = -EIO;
180                 goto exit;
181         }
182
183         memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
184         vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
185         vpdev->hw_ops->send_intr(vpdev, db);
186
187         for (retry = 100; retry--;) {
188                 ret = wait_event_timeout(wake, vdev->dc->guest_ack,
189                                          msecs_to_jiffies(100));
190                 if (ret)
191                         break;
192         }
193
194         dev_dbg(vop_dev(vdev),
195                 "%s %d retry: %d\n", __func__, __LINE__, retry);
196         vdev->dc->config_change = 0;
197         vdev->dc->guest_ack = 0;
198 exit:
199         for (i = 0; i < vdev->dd->num_vq; i++)
200                 mutex_unlock(&vdev->vvr[i].vr_mutex);
201         mutex_unlock(&vi->vop_mutex);
202         return ret;
203 }
204
205 static int vop_copy_dp_entry(struct vop_vdev *vdev,
206                              struct mic_device_desc *argp, __u8 *type,
207                              struct mic_device_desc **devpage)
208 {
209         struct vop_device *vpdev = vdev->vpdev;
210         struct mic_device_desc *devp;
211         struct mic_vqconfig *vqconfig;
212         int ret = 0, i;
213         bool slot_found = false;
214
215         vqconfig = mic_vq_config(argp);
216         for (i = 0; i < argp->num_vq; i++) {
217                 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
218                         ret =  -EINVAL;
219                         dev_err(vop_dev(vdev), "%s %d err %d\n",
220                                 __func__, __LINE__, ret);
221                         goto exit;
222                 }
223         }
224
225         /* Find the first free device page entry */
226         for (i = sizeof(struct mic_bootparam);
227                 i < MIC_DP_SIZE - mic_total_desc_size(argp);
228                 i += mic_total_desc_size(devp)) {
229                 devp = vpdev->hw_ops->get_dp(vpdev) + i;
230                 if (devp->type == 0 || devp->type == -1) {
231                         slot_found = true;
232                         break;
233                 }
234         }
235         if (!slot_found) {
236                 ret =  -EINVAL;
237                 dev_err(vop_dev(vdev), "%s %d err %d\n",
238                         __func__, __LINE__, ret);
239                 goto exit;
240         }
241         /*
242          * Save off the type before doing the memcpy. Type will be set in the
243          * end after completing all initialization for the new device.
244          */
245         *type = argp->type;
246         argp->type = 0;
247         memcpy(devp, argp, mic_desc_size(argp));
248
249         *devpage = devp;
250 exit:
251         return ret;
252 }
253
254 static void vop_init_device_ctrl(struct vop_vdev *vdev,
255                                  struct mic_device_desc *devpage)
256 {
257         struct mic_device_ctrl *dc;
258
259         dc = (void *)devpage + mic_aligned_desc_size(devpage);
260
261         dc->config_change = 0;
262         dc->guest_ack = 0;
263         dc->vdev_reset = 0;
264         dc->host_ack = 0;
265         dc->used_address_updated = 0;
266         dc->c2h_vdev_db = -1;
267         dc->h2c_vdev_db = -1;
268         vdev->dc = dc;
269 }
270
271 static int vop_virtio_add_device(struct vop_vdev *vdev,
272                                  struct mic_device_desc *argp)
273 {
274         struct vop_info *vi = vdev->vi;
275         struct vop_device *vpdev = vi->vpdev;
276         struct mic_device_desc *dd = NULL;
277         struct mic_vqconfig *vqconfig;
278         int vr_size, i, j, ret;
279         u8 type = 0;
280         s8 db = -1;
281         char irqname[16];
282         struct mic_bootparam *bootparam;
283         u16 num;
284         dma_addr_t vr_addr;
285
286         bootparam = vpdev->hw_ops->get_dp(vpdev);
287         init_waitqueue_head(&vdev->waitq);
288         INIT_LIST_HEAD(&vdev->list);
289         vdev->vpdev = vpdev;
290
291         ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
292         if (ret) {
293                 dev_err(vop_dev(vdev), "%s %d err %d\n",
294                         __func__, __LINE__, ret);
295                 return ret;
296         }
297
298         vop_init_device_ctrl(vdev, dd);
299
300         vdev->dd = dd;
301         vdev->virtio_id = type;
302         vqconfig = mic_vq_config(dd);
303         INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
304
305         for (i = 0; i < dd->num_vq; i++) {
306                 struct vop_vringh *vvr = &vdev->vvr[i];
307                 struct mic_vring *vr = &vdev->vvr[i].vring;
308
309                 num = le16_to_cpu(vqconfig[i].num);
310                 mutex_init(&vvr->vr_mutex);
311                 vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
312                         sizeof(struct _mic_vring_info));
313                 vr->va = (void *)
314                         __get_free_pages(GFP_KERNEL | __GFP_ZERO,
315                                          get_order(vr_size));
316                 if (!vr->va) {
317                         ret = -ENOMEM;
318                         dev_err(vop_dev(vdev), "%s %d err %d\n",
319                                 __func__, __LINE__, ret);
320                         goto err;
321                 }
322                 vr->len = vr_size;
323                 vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
324                 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
325                 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
326                                          DMA_BIDIRECTIONAL);
327                 if (dma_mapping_error(&vpdev->dev, vr_addr)) {
328                         free_pages((unsigned long)vr->va, get_order(vr_size));
329                         ret = -ENOMEM;
330                         dev_err(vop_dev(vdev), "%s %d err %d\n",
331                                 __func__, __LINE__, ret);
332                         goto err;
333                 }
334                 vqconfig[i].address = cpu_to_le64(vr_addr);
335
336                 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
337                 ret = vringh_init_kern(&vvr->vrh,
338                                        *(u32 *)mic_vq_features(vdev->dd),
339                                        num, false, vr->vr.desc, vr->vr.avail,
340                                        vr->vr.used);
341                 if (ret) {
342                         dev_err(vop_dev(vdev), "%s %d err %d\n",
343                                 __func__, __LINE__, ret);
344                         goto err;
345                 }
346                 vringh_kiov_init(&vvr->riov, NULL, 0);
347                 vringh_kiov_init(&vvr->wiov, NULL, 0);
348                 vvr->head = USHRT_MAX;
349                 vvr->vdev = vdev;
350                 vvr->vrh.notify = _vop_notify;
351                 dev_dbg(&vpdev->dev,
352                         "%s %d index %d va %p info %p vr_size 0x%x\n",
353                         __func__, __LINE__, i, vr->va, vr->info, vr_size);
354                 vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
355                                         get_order(VOP_INT_DMA_BUF_SIZE));
356                 vvr->buf_da = dma_map_single(&vpdev->dev,
357                                           vvr->buf, VOP_INT_DMA_BUF_SIZE,
358                                           DMA_BIDIRECTIONAL);
359         }
360
361         snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
362                  vdev->virtio_id);
363         vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
364         vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
365                         _vop_virtio_intr_handler, irqname, vdev,
366                         vdev->virtio_db);
367         if (IS_ERR(vdev->virtio_cookie)) {
368                 ret = PTR_ERR(vdev->virtio_cookie);
369                 dev_dbg(&vpdev->dev, "request irq failed\n");
370                 goto err;
371         }
372
373         vdev->dc->c2h_vdev_db = vdev->virtio_db;
374
375         /*
376          * Order the type update with previous stores. This write barrier
377          * is paired with the corresponding read barrier before the uncached
378          * system memory read of the type, on the card while scanning the
379          * device page.
380          */
381         smp_wmb();
382         dd->type = type;
383         argp->type = type;
384
385         if (bootparam) {
386                 db = bootparam->h2c_config_db;
387                 if (db != -1)
388                         vpdev->hw_ops->send_intr(vpdev, db);
389         }
390         dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
391         return 0;
392 err:
393         vqconfig = mic_vq_config(dd);
394         for (j = 0; j < i; j++) {
395                 struct vop_vringh *vvr = &vdev->vvr[j];
396
397                 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
398                                  vvr->vring.len, DMA_BIDIRECTIONAL);
399                 free_pages((unsigned long)vvr->vring.va,
400                            get_order(vvr->vring.len));
401         }
402         return ret;
403 }
404
405 static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
406                            struct vop_device *vpdev)
407 {
408         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
409         s8 db;
410         int ret, retry;
411         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
412
413         devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
414         db = bootparam->h2c_config_db;
415         if (db != -1)
416                 vpdev->hw_ops->send_intr(vpdev, db);
417         else
418                 goto done;
419         for (retry = 15; retry--;) {
420                 ret = wait_event_timeout(wake, devp->guest_ack,
421                                          msecs_to_jiffies(1000));
422                 if (ret)
423                         break;
424         }
425 done:
426         devp->config_change = 0;
427         devp->guest_ack = 0;
428 }
429
430 static void vop_virtio_del_device(struct vop_vdev *vdev)
431 {
432         struct vop_info *vi = vdev->vi;
433         struct vop_device *vpdev = vdev->vpdev;
434         int i;
435         struct mic_vqconfig *vqconfig;
436         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
437
438         if (!bootparam)
439                 goto skip_hot_remove;
440         vop_dev_remove(vi, vdev->dc, vpdev);
441 skip_hot_remove:
442         vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
443         flush_work(&vdev->virtio_bh_work);
444         vqconfig = mic_vq_config(vdev->dd);
445         for (i = 0; i < vdev->dd->num_vq; i++) {
446                 struct vop_vringh *vvr = &vdev->vvr[i];
447
448                 dma_unmap_single(&vpdev->dev,
449                                  vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
450                                  DMA_BIDIRECTIONAL);
451                 free_pages((unsigned long)vvr->buf,
452                            get_order(VOP_INT_DMA_BUF_SIZE));
453                 vringh_kiov_cleanup(&vvr->riov);
454                 vringh_kiov_cleanup(&vvr->wiov);
455                 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
456                                  vvr->vring.len, DMA_BIDIRECTIONAL);
457                 free_pages((unsigned long)vvr->vring.va,
458                            get_order(vvr->vring.len));
459         }
460         /*
461          * Order the type update with previous stores. This write barrier
462          * is paired with the corresponding read barrier before the uncached
463          * system memory read of the type, on the card while scanning the
464          * device page.
465          */
466         smp_wmb();
467         vdev->dd->type = -1;
468 }
469
470 /*
471  * vop_sync_dma - Wrapper for synchronous DMAs.
472  *
473  * @dev - The address of the pointer to the device instance used
474  * for DMA registration.
475  * @dst - destination DMA address.
476  * @src - source DMA address.
477  * @len - size of the transfer.
478  *
479  * Return DMA_SUCCESS on success
480  */
481 static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
482                         size_t len)
483 {
484         int err = 0;
485         struct dma_device *ddev;
486         struct dma_async_tx_descriptor *tx;
487         struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
488         struct dma_chan *vop_ch = vi->dma_ch;
489
490         if (!vop_ch) {
491                 err = -EBUSY;
492                 goto error;
493         }
494         ddev = vop_ch->device;
495         tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
496                 DMA_PREP_FENCE);
497         if (!tx) {
498                 err = -ENOMEM;
499                 goto error;
500         } else {
501                 dma_cookie_t cookie;
502
503                 cookie = tx->tx_submit(tx);
504                 if (dma_submit_error(cookie)) {
505                         err = -ENOMEM;
506                         goto error;
507                 }
508                 dma_async_issue_pending(vop_ch);
509                 err = dma_sync_wait(vop_ch, cookie);
510         }
511 error:
512         if (err)
513                 dev_err(&vi->vpdev->dev, "%s %d err %d\n",
514                         __func__, __LINE__, err);
515         return err;
516 }
517
518 #define VOP_USE_DMA true
519
520 /*
521  * Initiates the copies across the PCIe bus from card memory to a user
522  * space buffer. When transfers are done using DMA, source/destination
523  * addresses and transfer length must follow the alignment requirements of
524  * the MIC DMA engine.
525  */
526 static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
527                                    size_t len, u64 daddr, size_t dlen,
528                                    int vr_idx)
529 {
530         struct vop_device *vpdev = vdev->vpdev;
531         void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
532         struct vop_vringh *vvr = &vdev->vvr[vr_idx];
533         struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
534         size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
535         bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
536         size_t dma_offset, partlen;
537         int err;
538
539         if (!VOP_USE_DMA) {
540                 if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
541                         err = -EFAULT;
542                         dev_err(vop_dev(vdev), "%s %d err %d\n",
543                                 __func__, __LINE__, err);
544                         goto err;
545                 }
546                 vdev->in_bytes += len;
547                 err = 0;
548                 goto err;
549         }
550
551         dma_offset = daddr - round_down(daddr, dma_alignment);
552         daddr -= dma_offset;
553         len += dma_offset;
554         /*
555          * X100 uses DMA addresses as seen by the card so adding
556          * the aperture base is not required for DMA. However x200
557          * requires DMA addresses to be an offset into the bar so
558          * add the aperture base for x200.
559          */
560         if (x200)
561                 daddr += vpdev->aper->pa;
562         while (len) {
563                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
564                 err = vop_sync_dma(vdev, vvr->buf_da, daddr,
565                                    ALIGN(partlen, dma_alignment));
566                 if (err) {
567                         dev_err(vop_dev(vdev), "%s %d err %d\n",
568                                 __func__, __LINE__, err);
569                         goto err;
570                 }
571                 if (copy_to_user(ubuf, vvr->buf + dma_offset,
572                                  partlen - dma_offset)) {
573                         err = -EFAULT;
574                         dev_err(vop_dev(vdev), "%s %d err %d\n",
575                                 __func__, __LINE__, err);
576                         goto err;
577                 }
578                 daddr += partlen;
579                 ubuf += partlen;
580                 dbuf += partlen;
581                 vdev->in_bytes_dma += partlen;
582                 vdev->in_bytes += partlen;
583                 len -= partlen;
584                 dma_offset = 0;
585         }
586         err = 0;
587 err:
588         vpdev->hw_ops->iounmap(vpdev, dbuf);
589         dev_dbg(vop_dev(vdev),
590                 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
591                 __func__, ubuf, dbuf, len, vr_idx);
592         return err;
593 }
594
595 /*
596  * Initiates copies across the PCIe bus from a user space buffer to card
597  * memory. When transfers are done using DMA, source/destination addresses
598  * and transfer length must follow the alignment requirements of the MIC
599  * DMA engine.
600  */
601 static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
602                                      size_t len, u64 daddr, size_t dlen,
603                                      int vr_idx)
604 {
605         struct vop_device *vpdev = vdev->vpdev;
606         void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
607         struct vop_vringh *vvr = &vdev->vvr[vr_idx];
608         struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
609         size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
610         bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
611         size_t partlen;
612         bool dma = VOP_USE_DMA;
613         int err = 0;
614         size_t offset = 0;
615
616         if (daddr & (dma_alignment - 1)) {
617                 vdev->tx_dst_unaligned += len;
618                 dma = false;
619         } else if (ALIGN(len, dma_alignment) > dlen) {
620                 vdev->tx_len_unaligned += len;
621                 dma = false;
622         }
623
624         if (!dma)
625                 goto memcpy;
626
627         /*
628          * X100 uses DMA addresses as seen by the card so adding
629          * the aperture base is not required for DMA. However x200
630          * requires DMA addresses to be an offset into the bar so
631          * add the aperture base for x200.
632          */
633         if (x200)
634                 daddr += vpdev->aper->pa;
635         while (len) {
636                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
637
638                 if (copy_from_user(vvr->buf, ubuf, partlen)) {
639                         err = -EFAULT;
640                         dev_err(vop_dev(vdev), "%s %d err %d\n",
641                                 __func__, __LINE__, err);
642                         goto err;
643                 }
644                 err = vop_sync_dma(vdev, daddr, vvr->buf_da,
645                                    ALIGN(partlen, dma_alignment));
646                 if (err) {
647                         dev_err(vop_dev(vdev), "%s %d err %d\n",
648                                 __func__, __LINE__, err);
649                         goto err;
650                 }
651                 daddr += partlen;
652                 ubuf += partlen;
653                 dbuf += partlen;
654                 vdev->out_bytes_dma += partlen;
655                 vdev->out_bytes += partlen;
656                 len -= partlen;
657         }
658 memcpy:
659         /*
660          * We are copying to IO below and should ideally use something
661          * like copy_from_user_toio(..) if it existed.
662          */
663         while (len) {
664                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
665
666                 if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
667                         err = -EFAULT;
668                         dev_err(vop_dev(vdev), "%s %d err %d\n",
669                                 __func__, __LINE__, err);
670                         goto err;
671                 }
672                 memcpy_toio(dbuf + offset, vvr->buf, partlen);
673                 offset += partlen;
674                 vdev->out_bytes += partlen;
675                 len -= partlen;
676         }
677         err = 0;
678 err:
679         vpdev->hw_ops->iounmap(vpdev, dbuf);
680         dev_dbg(vop_dev(vdev),
681                 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
682                 __func__, ubuf, dbuf, len, vr_idx);
683         return err;
684 }
685
686 #define MIC_VRINGH_READ true
687
688 /* Determine the total number of bytes consumed in a VRINGH KIOV */
689 static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
690 {
691         int i;
692         u32 total = iov->consumed;
693
694         for (i = 0; i < iov->i; i++)
695                 total += iov->iov[i].iov_len;
696         return total;
697 }
698
699 /*
700  * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
701  * This API is heavily based on the vringh_iov_xfer(..) implementation
702  * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
703  * and vringh_iov_push_kern(..) directly is because there is no
704  * way to override the VRINGH xfer(..) routines as of v3.10.
705  */
706 static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
707                            void __user *ubuf, size_t len, bool read, int vr_idx,
708                            size_t *out_len)
709 {
710         int ret = 0;
711         size_t partlen, tot_len = 0;
712
713         while (len && iov->i < iov->used) {
714                 struct kvec *kiov = &iov->iov[iov->i];
715
716                 partlen = min(kiov->iov_len, len);
717                 if (read)
718                         ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
719                                                       (u64)kiov->iov_base,
720                                                       kiov->iov_len,
721                                                       vr_idx);
722                 else
723                         ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
724                                                         (u64)kiov->iov_base,
725                                                         kiov->iov_len,
726                                                         vr_idx);
727                 if (ret) {
728                         dev_err(vop_dev(vdev), "%s %d err %d\n",
729                                 __func__, __LINE__, ret);
730                         break;
731                 }
732                 len -= partlen;
733                 ubuf += partlen;
734                 tot_len += partlen;
735                 iov->consumed += partlen;
736                 kiov->iov_len -= partlen;
737                 kiov->iov_base += partlen;
738                 if (!kiov->iov_len) {
739                         /* Fix up old iov element then increment. */
740                         kiov->iov_len = iov->consumed;
741                         kiov->iov_base -= iov->consumed;
742
743                         iov->consumed = 0;
744                         iov->i++;
745                 }
746         }
747         *out_len = tot_len;
748         return ret;
749 }
750
751 /*
752  * Use the standard VRINGH infrastructure in the kernel to fetch new
753  * descriptors, initiate the copies and update the used ring.
754  */
755 static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
756 {
757         int ret = 0;
758         u32 iovcnt = copy->iovcnt;
759         struct iovec iov;
760         struct iovec __user *u_iov = copy->iov;
761         void __user *ubuf = NULL;
762         struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
763         struct vringh_kiov *riov = &vvr->riov;
764         struct vringh_kiov *wiov = &vvr->wiov;
765         struct vringh *vrh = &vvr->vrh;
766         u16 *head = &vvr->head;
767         struct mic_vring *vr = &vvr->vring;
768         size_t len = 0, out_len;
769
770         copy->out_len = 0;
771         /* Fetch a new IOVEC if all previous elements have been processed */
772         if (riov->i == riov->used && wiov->i == wiov->used) {
773                 ret = vringh_getdesc_kern(vrh, riov, wiov,
774                                           head, GFP_KERNEL);
775                 /* Check if there are available descriptors */
776                 if (ret <= 0)
777                         return ret;
778         }
779         while (iovcnt) {
780                 if (!len) {
781                         /* Copy over a new iovec from user space. */
782                         ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
783                         if (ret) {
784                                 ret = -EINVAL;
785                                 dev_err(vop_dev(vdev), "%s %d err %d\n",
786                                         __func__, __LINE__, ret);
787                                 break;
788                         }
789                         len = iov.iov_len;
790                         ubuf = iov.iov_base;
791                 }
792                 /* Issue all the read descriptors first */
793                 ret = vop_vringh_copy(vdev, riov, ubuf, len,
794                                       MIC_VRINGH_READ, copy->vr_idx, &out_len);
795                 if (ret) {
796                         dev_err(vop_dev(vdev), "%s %d err %d\n",
797                                 __func__, __LINE__, ret);
798                         break;
799                 }
800                 len -= out_len;
801                 ubuf += out_len;
802                 copy->out_len += out_len;
803                 /* Issue the write descriptors next */
804                 ret = vop_vringh_copy(vdev, wiov, ubuf, len,
805                                       !MIC_VRINGH_READ, copy->vr_idx, &out_len);
806                 if (ret) {
807                         dev_err(vop_dev(vdev), "%s %d err %d\n",
808                                 __func__, __LINE__, ret);
809                         break;
810                 }
811                 len -= out_len;
812                 ubuf += out_len;
813                 copy->out_len += out_len;
814                 if (!len) {
815                         /* One user space iovec is now completed */
816                         iovcnt--;
817                         u_iov++;
818                 }
819                 /* Exit loop if all elements in KIOVs have been processed. */
820                 if (riov->i == riov->used && wiov->i == wiov->used)
821                         break;
822         }
823         /*
824          * Update the used ring if a descriptor was available and some data was
825          * copied in/out and the user asked for a used ring update.
826          */
827         if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
828                 u32 total = 0;
829
830                 /* Determine the total data consumed */
831                 total += vop_vringh_iov_consumed(riov);
832                 total += vop_vringh_iov_consumed(wiov);
833                 vringh_complete_kern(vrh, *head, total);
834                 *head = USHRT_MAX;
835                 if (vringh_need_notify_kern(vrh) > 0)
836                         vringh_notify(vrh);
837                 vringh_kiov_cleanup(riov);
838                 vringh_kiov_cleanup(wiov);
839                 /* Update avail idx for user space */
840                 vr->info->avail_idx = vrh->last_avail_idx;
841         }
842         return ret;
843 }
844
845 static inline int vop_verify_copy_args(struct vop_vdev *vdev,
846                                        struct mic_copy_desc *copy)
847 {
848         if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
849                 return -EINVAL;
850         return 0;
851 }
852
853 /* Copy a specified number of virtio descriptors in a chain */
854 static int vop_virtio_copy_desc(struct vop_vdev *vdev,
855                                 struct mic_copy_desc *copy)
856 {
857         int err;
858         struct vop_vringh *vvr;
859
860         err = vop_verify_copy_args(vdev, copy);
861         if (err)
862                 return err;
863
864         vvr = &vdev->vvr[copy->vr_idx];
865         mutex_lock(&vvr->vr_mutex);
866         if (!vop_vdevup(vdev)) {
867                 err = -ENODEV;
868                 dev_err(vop_dev(vdev), "%s %d err %d\n",
869                         __func__, __LINE__, err);
870                 goto err;
871         }
872         err = _vop_virtio_copy(vdev, copy);
873         if (err) {
874                 dev_err(vop_dev(vdev), "%s %d err %d\n",
875                         __func__, __LINE__, err);
876         }
877 err:
878         mutex_unlock(&vvr->vr_mutex);
879         return err;
880 }
881
882 static int vop_open(struct inode *inode, struct file *f)
883 {
884         struct vop_vdev *vdev;
885         struct vop_info *vi = container_of(f->private_data,
886                 struct vop_info, miscdev);
887
888         vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
889         if (!vdev)
890                 return -ENOMEM;
891         vdev->vi = vi;
892         mutex_init(&vdev->vdev_mutex);
893         f->private_data = vdev;
894         init_completion(&vdev->destroy);
895         complete(&vdev->destroy);
896         return 0;
897 }
898
899 static int vop_release(struct inode *inode, struct file *f)
900 {
901         struct vop_vdev *vdev = f->private_data, *vdev_tmp;
902         struct vop_info *vi = vdev->vi;
903         struct list_head *pos, *tmp;
904         bool found = false;
905
906         mutex_lock(&vdev->vdev_mutex);
907         if (vdev->deleted)
908                 goto unlock;
909         mutex_lock(&vi->vop_mutex);
910         list_for_each_safe(pos, tmp, &vi->vdev_list) {
911                 vdev_tmp = list_entry(pos, struct vop_vdev, list);
912                 if (vdev == vdev_tmp) {
913                         vop_virtio_del_device(vdev);
914                         list_del(pos);
915                         found = true;
916                         break;
917                 }
918         }
919         mutex_unlock(&vi->vop_mutex);
920 unlock:
921         mutex_unlock(&vdev->vdev_mutex);
922         if (!found)
923                 wait_for_completion(&vdev->destroy);
924         f->private_data = NULL;
925         kfree(vdev);
926         return 0;
927 }
928
929 static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
930 {
931         struct vop_vdev *vdev = f->private_data;
932         struct vop_info *vi = vdev->vi;
933         void __user *argp = (void __user *)arg;
934         int ret;
935
936         switch (cmd) {
937         case MIC_VIRTIO_ADD_DEVICE:
938         {
939                 struct mic_device_desc dd, *dd_config;
940
941                 if (copy_from_user(&dd, argp, sizeof(dd)))
942                         return -EFAULT;
943
944                 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
945                     dd.num_vq > MIC_MAX_VRINGS)
946                         return -EINVAL;
947
948                 dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
949                 if (!dd_config)
950                         return -ENOMEM;
951                 if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
952                         ret = -EFAULT;
953                         goto free_ret;
954                 }
955                 /* Ensure desc has not changed between the two reads */
956                 if (memcmp(&dd, dd_config, sizeof(dd))) {
957                         ret = -EINVAL;
958                         goto free_ret;
959                 }
960                 mutex_lock(&vdev->vdev_mutex);
961                 mutex_lock(&vi->vop_mutex);
962                 ret = vop_virtio_add_device(vdev, dd_config);
963                 if (ret)
964                         goto unlock_ret;
965                 list_add_tail(&vdev->list, &vi->vdev_list);
966 unlock_ret:
967                 mutex_unlock(&vi->vop_mutex);
968                 mutex_unlock(&vdev->vdev_mutex);
969 free_ret:
970                 kfree(dd_config);
971                 return ret;
972         }
973         case MIC_VIRTIO_COPY_DESC:
974         {
975                 struct mic_copy_desc copy;
976
977                 mutex_lock(&vdev->vdev_mutex);
978                 ret = vop_vdev_inited(vdev);
979                 if (ret)
980                         goto _unlock_ret;
981
982                 if (copy_from_user(&copy, argp, sizeof(copy))) {
983                         ret = -EFAULT;
984                         goto _unlock_ret;
985                 }
986
987                 ret = vop_virtio_copy_desc(vdev, &copy);
988                 if (ret < 0)
989                         goto _unlock_ret;
990                 if (copy_to_user(
991                         &((struct mic_copy_desc __user *)argp)->out_len,
992                         &copy.out_len, sizeof(copy.out_len)))
993                         ret = -EFAULT;
994 _unlock_ret:
995                 mutex_unlock(&vdev->vdev_mutex);
996                 return ret;
997         }
998         case MIC_VIRTIO_CONFIG_CHANGE:
999         {
1000                 void *buf;
1001
1002                 mutex_lock(&vdev->vdev_mutex);
1003                 ret = vop_vdev_inited(vdev);
1004                 if (ret)
1005                         goto __unlock_ret;
1006                 buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
1007                 if (!buf) {
1008                         ret = -ENOMEM;
1009                         goto __unlock_ret;
1010                 }
1011                 if (copy_from_user(buf, argp, vdev->dd->config_len)) {
1012                         ret = -EFAULT;
1013                         goto done;
1014                 }
1015                 ret = vop_virtio_config_change(vdev, buf);
1016 done:
1017                 kfree(buf);
1018 __unlock_ret:
1019                 mutex_unlock(&vdev->vdev_mutex);
1020                 return ret;
1021         }
1022         default:
1023                 return -ENOIOCTLCMD;
1024         };
1025         return 0;
1026 }
1027
1028 /*
1029  * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
1030  * not when previously enqueued buffers may be available. This means that
1031  * in the card->host (TX) path, when userspace is unblocked by poll it
1032  * must drain all available descriptors or it can stall.
1033  */
1034 static unsigned int vop_poll(struct file *f, poll_table *wait)
1035 {
1036         struct vop_vdev *vdev = f->private_data;
1037         int mask = 0;
1038
1039         mutex_lock(&vdev->vdev_mutex);
1040         if (vop_vdev_inited(vdev)) {
1041                 mask = POLLERR;
1042                 goto done;
1043         }
1044         poll_wait(f, &vdev->waitq, wait);
1045         if (vop_vdev_inited(vdev)) {
1046                 mask = POLLERR;
1047         } else if (vdev->poll_wake) {
1048                 vdev->poll_wake = 0;
1049                 mask = POLLIN | POLLOUT;
1050         }
1051 done:
1052         mutex_unlock(&vdev->vdev_mutex);
1053         return mask;
1054 }
1055
1056 static inline int
1057 vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1058                  unsigned long *size, unsigned long *pa)
1059 {
1060         struct vop_device *vpdev = vdev->vpdev;
1061         unsigned long start = MIC_DP_SIZE;
1062         int i;
1063
1064         /*
1065          * MMAP interface is as follows:
1066          * offset                               region
1067          * 0x0                                  virtio device_page
1068          * 0x1000                               first vring
1069          * 0x1000 + size of 1st vring           second vring
1070          * ....
1071          */
1072         if (!offset) {
1073                 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1074                 *size = MIC_DP_SIZE;
1075                 return 0;
1076         }
1077
1078         for (i = 0; i < vdev->dd->num_vq; i++) {
1079                 struct vop_vringh *vvr = &vdev->vvr[i];
1080
1081                 if (offset == start) {
1082                         *pa = virt_to_phys(vvr->vring.va);
1083                         *size = vvr->vring.len;
1084                         return 0;
1085                 }
1086                 start += vvr->vring.len;
1087         }
1088         return -1;
1089 }
1090
1091 /*
1092  * Maps the device page and virtio rings to user space for readonly access.
1093  */
1094 static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1095 {
1096         struct vop_vdev *vdev = f->private_data;
1097         unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1098         unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1099         int i, err;
1100
1101         err = vop_vdev_inited(vdev);
1102         if (err)
1103                 goto ret;
1104         if (vma->vm_flags & VM_WRITE) {
1105                 err = -EACCES;
1106                 goto ret;
1107         }
1108         while (size_rem) {
1109                 i = vop_query_offset(vdev, offset, &size, &pa);
1110                 if (i < 0) {
1111                         err = -EINVAL;
1112                         goto ret;
1113                 }
1114                 err = remap_pfn_range(vma, vma->vm_start + offset,
1115                                       pa >> PAGE_SHIFT, size,
1116                                       vma->vm_page_prot);
1117                 if (err)
1118                         goto ret;
1119                 size_rem -= size;
1120                 offset += size;
1121         }
1122 ret:
1123         return err;
1124 }
1125
1126 static const struct file_operations vop_fops = {
1127         .open = vop_open,
1128         .release = vop_release,
1129         .unlocked_ioctl = vop_ioctl,
1130         .poll = vop_poll,
1131         .mmap = vop_mmap,
1132         .owner = THIS_MODULE,
1133 };
1134
1135 int vop_host_init(struct vop_info *vi)
1136 {
1137         int rc;
1138         struct miscdevice *mdev;
1139         struct vop_device *vpdev = vi->vpdev;
1140
1141         INIT_LIST_HEAD(&vi->vdev_list);
1142         vi->dma_ch = vpdev->dma_ch;
1143         mdev = &vi->miscdev;
1144         mdev->minor = MISC_DYNAMIC_MINOR;
1145         snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1146         mdev->name = vi->name;
1147         mdev->fops = &vop_fops;
1148         mdev->parent = &vpdev->dev;
1149
1150         rc = misc_register(mdev);
1151         if (rc)
1152                 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1153         return rc;
1154 }
1155
1156 void vop_host_uninit(struct vop_info *vi)
1157 {
1158         struct list_head *pos, *tmp;
1159         struct vop_vdev *vdev;
1160
1161         mutex_lock(&vi->vop_mutex);
1162         vop_virtio_reset_devices(vi);
1163         list_for_each_safe(pos, tmp, &vi->vdev_list) {
1164                 vdev = list_entry(pos, struct vop_vdev, list);
1165                 list_del(pos);
1166                 reinit_completion(&vdev->destroy);
1167                 mutex_unlock(&vi->vop_mutex);
1168                 mutex_lock(&vdev->vdev_mutex);
1169                 vop_virtio_del_device(vdev);
1170                 vdev->deleted = true;
1171                 mutex_unlock(&vdev->vdev_mutex);
1172                 complete(&vdev->destroy);
1173                 mutex_lock(&vi->vop_mutex);
1174         }
1175         mutex_unlock(&vi->vop_mutex);
1176         misc_deregister(&vi->miscdev);
1177 }