GNU Linux-libre 4.14.332-gnu1
[releases.git] / drivers / vhost / vsock.c
1 /*
2  * vhost transport for vsock
3  *
4  * Copyright (C) 2013-2015 Red Hat, Inc.
5  * Author: Asias He <asias@redhat.com>
6  *         Stefan Hajnoczi <stefanha@redhat.com>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2.
9  */
10 #include <linux/miscdevice.h>
11 #include <linux/atomic.h>
12 #include <linux/module.h>
13 #include <linux/mutex.h>
14 #include <linux/vmalloc.h>
15 #include <net/sock.h>
16 #include <linux/virtio_vsock.h>
17 #include <linux/vhost.h>
18 #include <linux/hashtable.h>
19
20 #include <net/af_vsock.h>
21 #include "vhost.h"
22
23 #define VHOST_VSOCK_DEFAULT_HOST_CID    2
24 /* Max number of bytes transferred before requeueing the job.
25  * Using this limit prevents one virtqueue from starving others. */
26 #define VHOST_VSOCK_WEIGHT 0x80000
27 /* Max number of packets transferred before requeueing the job.
28  * Using this limit prevents one virtqueue from starving others with
29  * small pkts.
30  */
31 #define VHOST_VSOCK_PKT_WEIGHT 256
32
33 enum {
34         VHOST_VSOCK_FEATURES = VHOST_FEATURES,
35 };
36
37 /* Used to track all the vhost_vsock instances on the system. */
38 static DEFINE_SPINLOCK(vhost_vsock_lock);
39 static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
40
41 struct vhost_vsock {
42         struct vhost_dev dev;
43         struct vhost_virtqueue vqs[2];
44
45         /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */
46         struct hlist_node hash;
47
48         struct vhost_work send_pkt_work;
49         spinlock_t send_pkt_list_lock;
50         struct list_head send_pkt_list; /* host->guest pending packets */
51
52         atomic_t queued_replies;
53
54         u32 guest_cid;
55 };
56
57 static u32 vhost_transport_get_local_cid(void)
58 {
59         return VHOST_VSOCK_DEFAULT_HOST_CID;
60 }
61
62 /* Callers that dereference the return value must hold vhost_vsock_lock or the
63  * RCU read lock.
64  */
65 static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
66 {
67         struct vhost_vsock *vsock;
68
69         hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
70                 u32 other_cid = vsock->guest_cid;
71
72                 /* Skip instances that have no CID yet */
73                 if (other_cid == 0)
74                         continue;
75
76                 if (other_cid == guest_cid) {
77                         return vsock;
78                 }
79         }
80
81         return NULL;
82 }
83
84 static void
85 vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
86                             struct vhost_virtqueue *vq)
87 {
88         struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
89         int pkts = 0, total_len = 0;
90         bool added = false;
91         bool restart_tx = false;
92
93         mutex_lock(&vq->mutex);
94
95         if (!vq->private_data)
96                 goto out;
97
98         /* Avoid further vmexits, we're already processing the virtqueue */
99         vhost_disable_notify(&vsock->dev, vq);
100
101         do {
102                 struct virtio_vsock_pkt *pkt;
103                 struct iov_iter iov_iter;
104                 unsigned out, in;
105                 size_t nbytes;
106                 size_t iov_len, payload_len;
107                 int head;
108
109                 spin_lock_bh(&vsock->send_pkt_list_lock);
110                 if (list_empty(&vsock->send_pkt_list)) {
111                         spin_unlock_bh(&vsock->send_pkt_list_lock);
112                         vhost_enable_notify(&vsock->dev, vq);
113                         break;
114                 }
115
116                 pkt = list_first_entry(&vsock->send_pkt_list,
117                                        struct virtio_vsock_pkt, list);
118                 list_del_init(&pkt->list);
119                 spin_unlock_bh(&vsock->send_pkt_list_lock);
120
121                 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
122                                          &out, &in, NULL, NULL);
123                 if (head < 0) {
124                         spin_lock_bh(&vsock->send_pkt_list_lock);
125                         list_add(&pkt->list, &vsock->send_pkt_list);
126                         spin_unlock_bh(&vsock->send_pkt_list_lock);
127                         break;
128                 }
129
130                 if (head == vq->num) {
131                         spin_lock_bh(&vsock->send_pkt_list_lock);
132                         list_add(&pkt->list, &vsock->send_pkt_list);
133                         spin_unlock_bh(&vsock->send_pkt_list_lock);
134
135                         /* We cannot finish yet if more buffers snuck in while
136                          * re-enabling notify.
137                          */
138                         if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
139                                 vhost_disable_notify(&vsock->dev, vq);
140                                 continue;
141                         }
142                         break;
143                 }
144
145                 if (out) {
146                         virtio_transport_free_pkt(pkt);
147                         vq_err(vq, "Expected 0 output buffers, got %u\n", out);
148                         break;
149                 }
150
151                 iov_len = iov_length(&vq->iov[out], in);
152                 if (iov_len < sizeof(pkt->hdr)) {
153                         virtio_transport_free_pkt(pkt);
154                         vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
155                         break;
156                 }
157
158                 iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
159                 payload_len = pkt->len - pkt->off;
160
161                 /* If the packet is greater than the space available in the
162                  * buffer, we split it using multiple buffers.
163                  */
164                 if (payload_len > iov_len - sizeof(pkt->hdr))
165                         payload_len = iov_len - sizeof(pkt->hdr);
166
167                 /* Set the correct length in the header */
168                 pkt->hdr.len = cpu_to_le32(payload_len);
169
170                 nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
171                 if (nbytes != sizeof(pkt->hdr)) {
172                         virtio_transport_free_pkt(pkt);
173                         vq_err(vq, "Faulted on copying pkt hdr\n");
174                         break;
175                 }
176
177                 nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
178                                       &iov_iter);
179                 if (nbytes != payload_len) {
180                         virtio_transport_free_pkt(pkt);
181                         vq_err(vq, "Faulted on copying pkt buf\n");
182                         break;
183                 }
184
185                 /* Deliver to monitoring devices all packets that we
186                  * will transmit.
187                  */
188                 virtio_transport_deliver_tap_pkt(pkt);
189
190                 vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
191                 added = true;
192
193                 pkt->off += payload_len;
194                 total_len += payload_len;
195
196                 /* If we didn't send all the payload we can requeue the packet
197                  * to send it with the next available buffer.
198                  */
199                 if (pkt->off < pkt->len) {
200                         spin_lock_bh(&vsock->send_pkt_list_lock);
201                         list_add(&pkt->list, &vsock->send_pkt_list);
202                         spin_unlock_bh(&vsock->send_pkt_list_lock);
203                 } else {
204                         if (pkt->reply) {
205                                 int val;
206
207                                 val = atomic_dec_return(&vsock->queued_replies);
208
209                                 /* Do we have resources to resume tx
210                                  * processing?
211                                  */
212                                 if (val + 1 == tx_vq->num)
213                                         restart_tx = true;
214                         }
215
216                         virtio_transport_free_pkt(pkt);
217                 }
218         } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
219         if (added)
220                 vhost_signal(&vsock->dev, vq);
221
222 out:
223         mutex_unlock(&vq->mutex);
224
225         if (restart_tx)
226                 vhost_poll_queue(&tx_vq->poll);
227 }
228
229 static void vhost_transport_send_pkt_work(struct vhost_work *work)
230 {
231         struct vhost_virtqueue *vq;
232         struct vhost_vsock *vsock;
233
234         vsock = container_of(work, struct vhost_vsock, send_pkt_work);
235         vq = &vsock->vqs[VSOCK_VQ_RX];
236
237         vhost_transport_do_send_pkt(vsock, vq);
238 }
239
240 static int
241 vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
242 {
243         struct vhost_vsock *vsock;
244         int len = pkt->len;
245
246         rcu_read_lock();
247
248         /* Find the vhost_vsock according to guest context id  */
249         vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
250         if (!vsock) {
251                 rcu_read_unlock();
252                 virtio_transport_free_pkt(pkt);
253                 return -ENODEV;
254         }
255
256         if (pkt->reply)
257                 atomic_inc(&vsock->queued_replies);
258
259         spin_lock_bh(&vsock->send_pkt_list_lock);
260         list_add_tail(&pkt->list, &vsock->send_pkt_list);
261         spin_unlock_bh(&vsock->send_pkt_list_lock);
262
263         vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
264
265         rcu_read_unlock();
266         return len;
267 }
268
269 static int
270 vhost_transport_cancel_pkt(struct vsock_sock *vsk)
271 {
272         struct vhost_vsock *vsock;
273         struct virtio_vsock_pkt *pkt, *n;
274         int cnt = 0;
275         int ret = -ENODEV;
276         LIST_HEAD(freeme);
277
278         rcu_read_lock();
279
280         /* Find the vhost_vsock according to guest context id  */
281         vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
282         if (!vsock)
283                 goto out;
284
285         spin_lock_bh(&vsock->send_pkt_list_lock);
286         list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
287                 if (pkt->vsk != vsk)
288                         continue;
289                 list_move(&pkt->list, &freeme);
290         }
291         spin_unlock_bh(&vsock->send_pkt_list_lock);
292
293         list_for_each_entry_safe(pkt, n, &freeme, list) {
294                 if (pkt->reply)
295                         cnt++;
296                 list_del(&pkt->list);
297                 virtio_transport_free_pkt(pkt);
298         }
299
300         if (cnt) {
301                 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
302                 int new_cnt;
303
304                 new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
305                 if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
306                         vhost_poll_queue(&tx_vq->poll);
307         }
308
309         ret = 0;
310 out:
311         rcu_read_unlock();
312         return ret;
313 }
314
315 static struct virtio_vsock_pkt *
316 vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
317                       unsigned int out, unsigned int in)
318 {
319         struct virtio_vsock_pkt *pkt;
320         struct iov_iter iov_iter;
321         size_t nbytes;
322         size_t len;
323
324         if (in != 0) {
325                 vq_err(vq, "Expected 0 input buffers, got %u\n", in);
326                 return NULL;
327         }
328
329         pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
330         if (!pkt)
331                 return NULL;
332
333         len = iov_length(vq->iov, out);
334         iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
335
336         nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
337         if (nbytes != sizeof(pkt->hdr)) {
338                 vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
339                        sizeof(pkt->hdr), nbytes);
340                 kfree(pkt);
341                 return NULL;
342         }
343
344         if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
345                 pkt->len = le32_to_cpu(pkt->hdr.len);
346
347         /* No payload */
348         if (!pkt->len)
349                 return pkt;
350
351         /* The pkt is too big */
352         if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
353                 kfree(pkt);
354                 return NULL;
355         }
356
357         pkt->buf = kvmalloc(pkt->len, GFP_KERNEL);
358         if (!pkt->buf) {
359                 kfree(pkt);
360                 return NULL;
361         }
362
363         nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
364         if (nbytes != pkt->len) {
365                 vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
366                        pkt->len, nbytes);
367                 virtio_transport_free_pkt(pkt);
368                 return NULL;
369         }
370
371         return pkt;
372 }
373
374 /* Is there space left for replies to rx packets? */
375 static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
376 {
377         struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
378         int val;
379
380         smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
381         val = atomic_read(&vsock->queued_replies);
382
383         return val < vq->num;
384 }
385
386 static struct virtio_transport vhost_transport = {
387         .transport = {
388                 .get_local_cid            = vhost_transport_get_local_cid,
389
390                 .init                     = virtio_transport_do_socket_init,
391                 .destruct                 = virtio_transport_destruct,
392                 .release                  = virtio_transport_release,
393                 .connect                  = virtio_transport_connect,
394                 .shutdown                 = virtio_transport_shutdown,
395                 .cancel_pkt               = vhost_transport_cancel_pkt,
396
397                 .dgram_enqueue            = virtio_transport_dgram_enqueue,
398                 .dgram_dequeue            = virtio_transport_dgram_dequeue,
399                 .dgram_bind               = virtio_transport_dgram_bind,
400                 .dgram_allow              = virtio_transport_dgram_allow,
401
402                 .stream_enqueue           = virtio_transport_stream_enqueue,
403                 .stream_dequeue           = virtio_transport_stream_dequeue,
404                 .stream_has_data          = virtio_transport_stream_has_data,
405                 .stream_has_space         = virtio_transport_stream_has_space,
406                 .stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
407                 .stream_is_active         = virtio_transport_stream_is_active,
408                 .stream_allow             = virtio_transport_stream_allow,
409
410                 .notify_poll_in           = virtio_transport_notify_poll_in,
411                 .notify_poll_out          = virtio_transport_notify_poll_out,
412                 .notify_recv_init         = virtio_transport_notify_recv_init,
413                 .notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
414                 .notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
415                 .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
416                 .notify_send_init         = virtio_transport_notify_send_init,
417                 .notify_send_pre_block    = virtio_transport_notify_send_pre_block,
418                 .notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
419                 .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
420
421                 .set_buffer_size          = virtio_transport_set_buffer_size,
422                 .set_min_buffer_size      = virtio_transport_set_min_buffer_size,
423                 .set_max_buffer_size      = virtio_transport_set_max_buffer_size,
424                 .get_buffer_size          = virtio_transport_get_buffer_size,
425                 .get_min_buffer_size      = virtio_transport_get_min_buffer_size,
426                 .get_max_buffer_size      = virtio_transport_get_max_buffer_size,
427         },
428
429         .send_pkt = vhost_transport_send_pkt,
430 };
431
432 static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
433 {
434         struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
435                                                   poll.work);
436         struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
437                                                  dev);
438         struct virtio_vsock_pkt *pkt;
439         int head, pkts = 0, total_len = 0;
440         unsigned int out, in;
441         bool added = false;
442
443         mutex_lock(&vq->mutex);
444
445         if (!vq->private_data)
446                 goto out;
447
448         vhost_disable_notify(&vsock->dev, vq);
449         do {
450                 u32 len;
451
452                 if (!vhost_vsock_more_replies(vsock)) {
453                         /* Stop tx until the device processes already
454                          * pending replies.  Leave tx virtqueue
455                          * callbacks disabled.
456                          */
457                         goto no_more_replies;
458                 }
459
460                 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
461                                          &out, &in, NULL, NULL);
462                 if (head < 0)
463                         break;
464
465                 if (head == vq->num) {
466                         if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
467                                 vhost_disable_notify(&vsock->dev, vq);
468                                 continue;
469                         }
470                         break;
471                 }
472
473                 pkt = vhost_vsock_alloc_pkt(vq, out, in);
474                 if (!pkt) {
475                         vq_err(vq, "Faulted on pkt\n");
476                         continue;
477                 }
478
479                 len = pkt->len;
480
481                 /* Deliver to monitoring devices all received packets */
482                 virtio_transport_deliver_tap_pkt(pkt);
483
484                 /* Only accept correctly addressed packets */
485                 if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
486                     le64_to_cpu(pkt->hdr.dst_cid) ==
487                     vhost_transport_get_local_cid())
488                         virtio_transport_recv_pkt(&vhost_transport, pkt);
489                 else
490                         virtio_transport_free_pkt(pkt);
491
492                 len += sizeof(pkt->hdr);
493                 vhost_add_used(vq, head, 0);
494                 total_len += len;
495                 added = true;
496         } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
497
498 no_more_replies:
499         if (added)
500                 vhost_signal(&vsock->dev, vq);
501
502 out:
503         mutex_unlock(&vq->mutex);
504 }
505
506 static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
507 {
508         struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
509                                                 poll.work);
510         struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
511                                                  dev);
512
513         vhost_transport_do_send_pkt(vsock, vq);
514 }
515
516 static int vhost_vsock_start(struct vhost_vsock *vsock)
517 {
518         struct vhost_virtqueue *vq;
519         size_t i;
520         int ret;
521
522         mutex_lock(&vsock->dev.mutex);
523
524         ret = vhost_dev_check_owner(&vsock->dev);
525         if (ret)
526                 goto err;
527
528         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
529                 vq = &vsock->vqs[i];
530
531                 mutex_lock(&vq->mutex);
532
533                 if (!vhost_vq_access_ok(vq)) {
534                         ret = -EFAULT;
535                         goto err_vq;
536                 }
537
538                 if (!vq->private_data) {
539                         vq->private_data = vsock;
540                         ret = vhost_vq_init_access(vq);
541                         if (ret)
542                                 goto err_vq;
543                 }
544
545                 mutex_unlock(&vq->mutex);
546         }
547
548         /* Some packets may have been queued before the device was started,
549          * let's kick the send worker to send them.
550          */
551         vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
552
553         mutex_unlock(&vsock->dev.mutex);
554         return 0;
555
556 err_vq:
557         vq->private_data = NULL;
558         mutex_unlock(&vq->mutex);
559
560         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
561                 vq = &vsock->vqs[i];
562
563                 mutex_lock(&vq->mutex);
564                 vq->private_data = NULL;
565                 mutex_unlock(&vq->mutex);
566         }
567 err:
568         mutex_unlock(&vsock->dev.mutex);
569         return ret;
570 }
571
572 static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
573 {
574         size_t i;
575         int ret = 0;
576
577         mutex_lock(&vsock->dev.mutex);
578
579         if (check_owner) {
580                 ret = vhost_dev_check_owner(&vsock->dev);
581                 if (ret)
582                         goto err;
583         }
584
585         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
586                 struct vhost_virtqueue *vq = &vsock->vqs[i];
587
588                 mutex_lock(&vq->mutex);
589                 vq->private_data = NULL;
590                 mutex_unlock(&vq->mutex);
591         }
592
593 err:
594         mutex_unlock(&vsock->dev.mutex);
595         return ret;
596 }
597
598 static void vhost_vsock_free(struct vhost_vsock *vsock)
599 {
600         kvfree(vsock);
601 }
602
603 static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
604 {
605         struct vhost_virtqueue **vqs;
606         struct vhost_vsock *vsock;
607         int ret;
608
609         /* This struct is large and allocation could fail, fall back to vmalloc
610          * if there is no other way.
611          */
612         vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
613         if (!vsock)
614                 return -ENOMEM;
615
616         vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
617         if (!vqs) {
618                 ret = -ENOMEM;
619                 goto out;
620         }
621
622         vsock->guest_cid = 0; /* no CID assigned yet */
623
624         atomic_set(&vsock->queued_replies, 0);
625
626         vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
627         vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
628         vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
629         vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
630
631         vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
632                        VHOST_VSOCK_PKT_WEIGHT, VHOST_VSOCK_WEIGHT);
633
634         file->private_data = vsock;
635         spin_lock_init(&vsock->send_pkt_list_lock);
636         INIT_LIST_HEAD(&vsock->send_pkt_list);
637         vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
638         return 0;
639
640 out:
641         vhost_vsock_free(vsock);
642         return ret;
643 }
644
645 static void vhost_vsock_flush(struct vhost_vsock *vsock)
646 {
647         int i;
648
649         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
650                 if (vsock->vqs[i].handle_kick)
651                         vhost_poll_flush(&vsock->vqs[i].poll);
652         vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
653 }
654
655 static void vhost_vsock_reset_orphans(struct sock *sk)
656 {
657         struct vsock_sock *vsk = vsock_sk(sk);
658
659         /* vmci_transport.c doesn't take sk_lock here either.  At least we're
660          * under vsock_table_lock so the sock cannot disappear while we're
661          * executing.
662          */
663
664         /* If the peer is still valid, no need to reset connection */
665         if (vhost_vsock_get(vsk->remote_addr.svm_cid))
666                 return;
667
668         /* If the close timeout is pending, let it expire.  This avoids races
669          * with the timeout callback.
670          */
671         if (vsk->close_work_scheduled)
672                 return;
673
674         sock_set_flag(sk, SOCK_DONE);
675         vsk->peer_shutdown = SHUTDOWN_MASK;
676         sk->sk_state = SS_UNCONNECTED;
677         sk->sk_err = ECONNRESET;
678         sk->sk_error_report(sk);
679 }
680
681 static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
682 {
683         struct vhost_vsock *vsock = file->private_data;
684
685         spin_lock_bh(&vhost_vsock_lock);
686         if (vsock->guest_cid)
687                 hash_del_rcu(&vsock->hash);
688         spin_unlock_bh(&vhost_vsock_lock);
689
690         /* Wait for other CPUs to finish using vsock */
691         synchronize_rcu();
692
693         /* Iterating over all connections for all CIDs to find orphans is
694          * inefficient.  Room for improvement here. */
695         vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
696
697         /* Don't check the owner, because we are in the release path, so we
698          * need to stop the vsock device in any case.
699          * vhost_vsock_stop() can not fail in this case, so we don't need to
700          * check the return code.
701          */
702         vhost_vsock_stop(vsock, false);
703         vhost_vsock_flush(vsock);
704         vhost_dev_stop(&vsock->dev);
705
706         spin_lock_bh(&vsock->send_pkt_list_lock);
707         while (!list_empty(&vsock->send_pkt_list)) {
708                 struct virtio_vsock_pkt *pkt;
709
710                 pkt = list_first_entry(&vsock->send_pkt_list,
711                                 struct virtio_vsock_pkt, list);
712                 list_del_init(&pkt->list);
713                 virtio_transport_free_pkt(pkt);
714         }
715         spin_unlock_bh(&vsock->send_pkt_list_lock);
716
717         vhost_dev_cleanup(&vsock->dev, false);
718         kfree(vsock->dev.vqs);
719         vhost_vsock_free(vsock);
720         return 0;
721 }
722
723 static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
724 {
725         struct vhost_vsock *other;
726
727         /* Refuse reserved CIDs */
728         if (guest_cid <= VMADDR_CID_HOST ||
729             guest_cid == U32_MAX)
730                 return -EINVAL;
731
732         /* 64-bit CIDs are not yet supported */
733         if (guest_cid > U32_MAX)
734                 return -EINVAL;
735
736         /* Refuse if CID is already in use */
737         spin_lock_bh(&vhost_vsock_lock);
738         other = vhost_vsock_get(guest_cid);
739         if (other && other != vsock) {
740                 spin_unlock_bh(&vhost_vsock_lock);
741                 return -EADDRINUSE;
742         }
743
744         if (vsock->guest_cid)
745                 hash_del_rcu(&vsock->hash);
746
747         vsock->guest_cid = guest_cid;
748         hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
749         spin_unlock_bh(&vhost_vsock_lock);
750
751         return 0;
752 }
753
754 static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
755 {
756         struct vhost_virtqueue *vq;
757         int i;
758
759         if (features & ~VHOST_VSOCK_FEATURES)
760                 return -EOPNOTSUPP;
761
762         mutex_lock(&vsock->dev.mutex);
763         if ((features & (1 << VHOST_F_LOG_ALL)) &&
764             !vhost_log_access_ok(&vsock->dev)) {
765                 mutex_unlock(&vsock->dev.mutex);
766                 return -EFAULT;
767         }
768
769         for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
770                 vq = &vsock->vqs[i];
771                 mutex_lock(&vq->mutex);
772                 vq->acked_features = features;
773                 mutex_unlock(&vq->mutex);
774         }
775         mutex_unlock(&vsock->dev.mutex);
776         return 0;
777 }
778
779 static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
780                                   unsigned long arg)
781 {
782         struct vhost_vsock *vsock = f->private_data;
783         void __user *argp = (void __user *)arg;
784         u64 guest_cid;
785         u64 features;
786         int start;
787         int r;
788
789         switch (ioctl) {
790         case VHOST_VSOCK_SET_GUEST_CID:
791                 if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
792                         return -EFAULT;
793                 return vhost_vsock_set_cid(vsock, guest_cid);
794         case VHOST_VSOCK_SET_RUNNING:
795                 if (copy_from_user(&start, argp, sizeof(start)))
796                         return -EFAULT;
797                 if (start)
798                         return vhost_vsock_start(vsock);
799                 else
800                         return vhost_vsock_stop(vsock, true);
801         case VHOST_GET_FEATURES:
802                 features = VHOST_VSOCK_FEATURES;
803                 if (copy_to_user(argp, &features, sizeof(features)))
804                         return -EFAULT;
805                 return 0;
806         case VHOST_SET_FEATURES:
807                 if (copy_from_user(&features, argp, sizeof(features)))
808                         return -EFAULT;
809                 return vhost_vsock_set_features(vsock, features);
810         default:
811                 mutex_lock(&vsock->dev.mutex);
812                 r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
813                 if (r == -ENOIOCTLCMD)
814                         r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
815                 else
816                         vhost_vsock_flush(vsock);
817                 mutex_unlock(&vsock->dev.mutex);
818                 return r;
819         }
820 }
821
822 static const struct file_operations vhost_vsock_fops = {
823         .owner          = THIS_MODULE,
824         .open           = vhost_vsock_dev_open,
825         .release        = vhost_vsock_dev_release,
826         .llseek         = noop_llseek,
827         .unlocked_ioctl = vhost_vsock_dev_ioctl,
828 };
829
830 static struct miscdevice vhost_vsock_misc = {
831         .minor = VHOST_VSOCK_MINOR,
832         .name = "vhost-vsock",
833         .fops = &vhost_vsock_fops,
834 };
835
836 static int __init vhost_vsock_init(void)
837 {
838         int ret;
839
840         ret = vsock_core_init(&vhost_transport.transport);
841         if (ret < 0)
842                 return ret;
843         return misc_register(&vhost_vsock_misc);
844 };
845
846 static void __exit vhost_vsock_exit(void)
847 {
848         misc_deregister(&vhost_vsock_misc);
849         vsock_core_exit();
850 };
851
852 module_init(vhost_vsock_init);
853 module_exit(vhost_vsock_exit);
854 MODULE_LICENSE("GPL v2");
855 MODULE_AUTHOR("Asias He");
856 MODULE_DESCRIPTION("vhost transport for vsock ");
857 MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
858 MODULE_ALIAS("devname:vhost-vsock");