1 // SPDX-License-Identifier: GPL-2.0-only
3 * common code for virtio vsock
5 * Copyright (C) 2013-2015 Red Hat, Inc.
6 * Author: Asias He <asias@redhat.com>
7 * Stefan Hajnoczi <stefanha@redhat.com>
9 #include <linux/spinlock.h>
10 #include <linux/module.h>
11 #include <linux/sched/signal.h>
12 #include <linux/ctype.h>
13 #include <linux/list.h>
14 #include <linux/virtio_vsock.h>
15 #include <uapi/linux/vsockmon.h>
18 #include <net/af_vsock.h>
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/vsock_virtio_transport_common.h>
23 /* How long to wait for graceful shutdown of a connection */
24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
26 /* Threshold for detecting small packets to copy */
27 #define GOOD_COPY_LEN 128
29 static const struct virtio_transport *
30 virtio_transport_get_ops(struct vsock_sock *vsk)
32 const struct vsock_transport *t = vsock_core_get_transport(vsk);
37 return container_of(t, struct virtio_transport, transport);
40 /* Returns a new packet on success, otherwise returns NULL.
42 * If NULL is returned, errp is set to a negative errno.
44 static struct sk_buff *
45 virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
52 const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
53 struct virtio_vsock_hdr *hdr;
58 skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
62 hdr = virtio_vsock_hdr(skb);
63 hdr->type = cpu_to_le16(info->type);
64 hdr->op = cpu_to_le16(info->op);
65 hdr->src_cid = cpu_to_le64(src_cid);
66 hdr->dst_cid = cpu_to_le64(dst_cid);
67 hdr->src_port = cpu_to_le32(src_port);
68 hdr->dst_port = cpu_to_le32(dst_port);
69 hdr->flags = cpu_to_le32(info->flags);
70 hdr->len = cpu_to_le32(len);
71 hdr->buf_alloc = cpu_to_le32(0);
72 hdr->fwd_cnt = cpu_to_le32(0);
74 if (info->msg && len > 0) {
75 payload = skb_put(skb, len);
76 err = memcpy_from_msg(payload, info->msg, len);
80 if (msg_data_left(info->msg) == 0 &&
81 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
82 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
84 if (info->msg->msg_flags & MSG_EOR)
85 hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
90 virtio_vsock_skb_set_reply(skb);
92 trace_virtio_transport_alloc_pkt(src_cid, src_port,
99 if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
100 WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
112 static struct sk_buff *virtio_transport_build_skb(void *opaque)
114 struct virtio_vsock_hdr *pkt_hdr;
115 struct sk_buff *pkt = opaque;
116 struct af_vsockmon_hdr *hdr;
121 /* A packet could be split to fit the RX buffer, so we can retrieve
122 * the payload length from the header and the buffer pointer taking
123 * care of the offset in the original packet.
125 pkt_hdr = virtio_vsock_hdr(pkt);
126 payload_len = pkt->len;
127 payload_buf = pkt->data;
129 skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
134 hdr = skb_put(skb, sizeof(*hdr));
136 /* pkt->hdr is little-endian so no need to byteswap here */
137 hdr->src_cid = pkt_hdr->src_cid;
138 hdr->src_port = pkt_hdr->src_port;
139 hdr->dst_cid = pkt_hdr->dst_cid;
140 hdr->dst_port = pkt_hdr->dst_port;
142 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
143 hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
144 memset(hdr->reserved, 0, sizeof(hdr->reserved));
146 switch (le16_to_cpu(pkt_hdr->op)) {
147 case VIRTIO_VSOCK_OP_REQUEST:
148 case VIRTIO_VSOCK_OP_RESPONSE:
149 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
151 case VIRTIO_VSOCK_OP_RST:
152 case VIRTIO_VSOCK_OP_SHUTDOWN:
153 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
155 case VIRTIO_VSOCK_OP_RW:
156 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
158 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
159 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
160 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
163 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
167 skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
170 skb_put_data(skb, payload_buf, payload_len);
176 void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
178 if (virtio_vsock_skb_tap_delivered(skb))
181 vsock_deliver_tap(virtio_transport_build_skb, skb);
182 virtio_vsock_skb_set_tap_delivered(skb);
184 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
186 static u16 virtio_transport_get_type(struct sock *sk)
188 if (sk->sk_type == SOCK_STREAM)
189 return VIRTIO_VSOCK_TYPE_STREAM;
191 return VIRTIO_VSOCK_TYPE_SEQPACKET;
194 /* This function can only be used on connecting/connected sockets,
195 * since a socket assigned to a transport is required.
197 * Do not use on listener sockets!
199 static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
200 struct virtio_vsock_pkt_info *info)
202 u32 src_cid, src_port, dst_cid, dst_port;
203 const struct virtio_transport *t_ops;
204 struct virtio_vsock_sock *vvs;
205 u32 pkt_len = info->pkt_len;
208 info->type = virtio_transport_get_type(sk_vsock(vsk));
210 t_ops = virtio_transport_get_ops(vsk);
211 if (unlikely(!t_ops))
214 src_cid = t_ops->transport.get_local_cid();
215 src_port = vsk->local_addr.svm_port;
216 if (!info->remote_cid) {
217 dst_cid = vsk->remote_addr.svm_cid;
218 dst_port = vsk->remote_addr.svm_port;
220 dst_cid = info->remote_cid;
221 dst_port = info->remote_port;
226 /* we can send less than pkt_len bytes */
227 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
228 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
230 /* virtio_transport_get_credit might return less than pkt_len credit */
231 pkt_len = virtio_transport_get_credit(vvs, pkt_len);
233 /* Do not send zero length OP_RW pkt */
234 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
237 skb = virtio_transport_alloc_skb(info, pkt_len,
241 virtio_transport_put_credit(vvs, pkt_len);
245 virtio_transport_inc_tx_pkt(vvs, skb);
247 return t_ops->send_pkt(skb);
250 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
253 if (vvs->rx_bytes + len > vvs->buf_alloc)
256 vvs->rx_bytes += len;
260 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
263 vvs->rx_bytes -= len;
267 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
269 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
271 spin_lock_bh(&vvs->rx_lock);
272 vvs->last_fwd_cnt = vvs->fwd_cnt;
273 hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
274 hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
275 spin_unlock_bh(&vvs->rx_lock);
277 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
279 u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
283 spin_lock_bh(&vvs->tx_lock);
284 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
288 spin_unlock_bh(&vvs->tx_lock);
292 EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
294 void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
296 spin_lock_bh(&vvs->tx_lock);
297 vvs->tx_cnt -= credit;
298 spin_unlock_bh(&vvs->tx_lock);
300 EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
302 static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
304 struct virtio_vsock_pkt_info info = {
305 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
309 return virtio_transport_send_pkt_info(vsk, &info);
313 virtio_transport_stream_do_peek(struct vsock_sock *vsk,
317 struct virtio_vsock_sock *vvs = vsk->trans;
318 size_t bytes, total = 0, off;
319 struct sk_buff *skb, *tmp;
322 spin_lock_bh(&vvs->rx_lock);
324 skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
330 while (total < len && off < skb->len) {
332 if (bytes > skb->len - off)
333 bytes = skb->len - off;
335 /* sk_lock is held by caller so no one else can dequeue.
336 * Unlock rx_lock since memcpy_to_msg() may sleep.
338 spin_unlock_bh(&vvs->rx_lock);
340 err = memcpy_to_msg(msg, skb->data + off, bytes);
344 spin_lock_bh(&vvs->rx_lock);
351 spin_unlock_bh(&vvs->rx_lock);
362 virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
366 struct virtio_vsock_sock *vvs = vsk->trans;
367 size_t bytes, total = 0;
374 spin_lock_bh(&vvs->rx_lock);
375 while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
376 skb = skb_peek(&vvs->rx_queue);
379 if (bytes > skb->len)
382 /* sk_lock is held by caller so no one else can dequeue.
383 * Unlock rx_lock since memcpy_to_msg() may sleep.
385 spin_unlock_bh(&vvs->rx_lock);
387 err = memcpy_to_msg(msg, skb->data, bytes);
391 spin_lock_bh(&vvs->rx_lock);
394 skb_pull(skb, bytes);
397 u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
399 virtio_transport_dec_rx_pkt(vvs, pkt_len);
400 __skb_unlink(skb, &vvs->rx_queue);
405 fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
406 free_space = vvs->buf_alloc - fwd_cnt_delta;
407 low_rx_bytes = (vvs->rx_bytes <
408 sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
410 spin_unlock_bh(&vvs->rx_lock);
412 /* To reduce the number of credit update messages,
413 * don't update credits as long as lots of space is available.
414 * Note: the limit chosen here is arbitrary. Setting the limit
415 * too high causes extra messages. Too low causes transmitter
416 * stalls. As stalls are in theory more expensive than extra
417 * messages, we set the limit to a high value. TODO: experiment
418 * with different values. Also send credit update message when
419 * number of bytes in rx queue is not enough to wake up reader.
422 (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes))
423 virtio_transport_send_credit_update(vsk);
433 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
437 struct virtio_vsock_sock *vvs = vsk->trans;
438 int dequeued_len = 0;
439 size_t user_buf_len = msg_data_left(msg);
440 bool msg_ready = false;
443 spin_lock_bh(&vvs->rx_lock);
445 if (vvs->msg_count == 0) {
446 spin_unlock_bh(&vvs->rx_lock);
451 struct virtio_vsock_hdr *hdr;
454 skb = __skb_dequeue(&vvs->rx_queue);
457 hdr = virtio_vsock_hdr(skb);
458 pkt_len = (size_t)le32_to_cpu(hdr->len);
460 if (dequeued_len >= 0) {
461 size_t bytes_to_copy;
463 bytes_to_copy = min(user_buf_len, pkt_len);
468 /* sk_lock is held by caller so no one else can dequeue.
469 * Unlock rx_lock since memcpy_to_msg() may sleep.
471 spin_unlock_bh(&vvs->rx_lock);
473 err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
475 /* Copy of message failed. Rest of
476 * fragments will be freed without copy.
480 user_buf_len -= bytes_to_copy;
483 spin_lock_bh(&vvs->rx_lock);
486 if (dequeued_len >= 0)
487 dequeued_len += pkt_len;
490 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
494 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
495 msg->msg_flags |= MSG_EOR;
498 virtio_transport_dec_rx_pkt(vvs, pkt_len);
502 spin_unlock_bh(&vvs->rx_lock);
504 virtio_transport_send_credit_update(vsk);
510 virtio_transport_stream_dequeue(struct vsock_sock *vsk,
512 size_t len, int flags)
514 if (flags & MSG_PEEK)
515 return virtio_transport_stream_do_peek(vsk, msg, len);
517 return virtio_transport_stream_do_dequeue(vsk, msg, len);
519 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
522 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
526 if (flags & MSG_PEEK)
529 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
531 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
534 virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
538 struct virtio_vsock_sock *vvs = vsk->trans;
540 spin_lock_bh(&vvs->tx_lock);
542 if (len > vvs->peer_buf_alloc) {
543 spin_unlock_bh(&vvs->tx_lock);
547 spin_unlock_bh(&vvs->tx_lock);
549 return virtio_transport_stream_enqueue(vsk, msg, len);
551 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
554 virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
556 size_t len, int flags)
560 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
562 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
564 struct virtio_vsock_sock *vvs = vsk->trans;
567 spin_lock_bh(&vvs->rx_lock);
568 bytes = vvs->rx_bytes;
569 spin_unlock_bh(&vvs->rx_lock);
573 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
575 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
577 struct virtio_vsock_sock *vvs = vsk->trans;
580 spin_lock_bh(&vvs->rx_lock);
581 msg_count = vvs->msg_count;
582 spin_unlock_bh(&vvs->rx_lock);
586 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
588 static s64 virtio_transport_has_space(struct vsock_sock *vsk)
590 struct virtio_vsock_sock *vvs = vsk->trans;
593 bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
600 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
602 struct virtio_vsock_sock *vvs = vsk->trans;
605 spin_lock_bh(&vvs->tx_lock);
606 bytes = virtio_transport_has_space(vsk);
607 spin_unlock_bh(&vvs->tx_lock);
611 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
613 int virtio_transport_do_socket_init(struct vsock_sock *vsk,
614 struct vsock_sock *psk)
616 struct virtio_vsock_sock *vvs;
618 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
624 if (psk && psk->trans) {
625 struct virtio_vsock_sock *ptrans = psk->trans;
627 vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
630 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
631 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
633 vvs->buf_alloc = vsk->buffer_size;
635 spin_lock_init(&vvs->rx_lock);
636 spin_lock_init(&vvs->tx_lock);
637 skb_queue_head_init(&vvs->rx_queue);
641 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
643 /* sk_lock held by the caller */
644 void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
646 struct virtio_vsock_sock *vvs = vsk->trans;
648 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
649 *val = VIRTIO_VSOCK_MAX_BUF_SIZE;
651 vvs->buf_alloc = *val;
653 virtio_transport_send_credit_update(vsk);
655 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
658 virtio_transport_notify_poll_in(struct vsock_sock *vsk,
660 bool *data_ready_now)
662 *data_ready_now = vsock_stream_has_data(vsk) >= target;
666 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
669 virtio_transport_notify_poll_out(struct vsock_sock *vsk,
671 bool *space_avail_now)
675 free_space = vsock_stream_has_space(vsk);
677 *space_avail_now = true;
678 else if (free_space == 0)
679 *space_avail_now = false;
683 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
685 int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
686 size_t target, struct vsock_transport_recv_notify_data *data)
690 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
692 int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
693 size_t target, struct vsock_transport_recv_notify_data *data)
697 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
699 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
700 size_t target, struct vsock_transport_recv_notify_data *data)
704 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
706 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
707 size_t target, ssize_t copied, bool data_read,
708 struct vsock_transport_recv_notify_data *data)
712 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
714 int virtio_transport_notify_send_init(struct vsock_sock *vsk,
715 struct vsock_transport_send_notify_data *data)
719 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
721 int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
722 struct vsock_transport_send_notify_data *data)
726 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
728 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
729 struct vsock_transport_send_notify_data *data)
733 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
735 int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
736 ssize_t written, struct vsock_transport_send_notify_data *data)
740 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
742 u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
744 return vsk->buffer_size;
746 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
748 bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
752 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
754 bool virtio_transport_stream_allow(u32 cid, u32 port)
758 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
760 int virtio_transport_dgram_bind(struct vsock_sock *vsk,
761 struct sockaddr_vm *addr)
765 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
767 bool virtio_transport_dgram_allow(u32 cid, u32 port)
771 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
773 int virtio_transport_connect(struct vsock_sock *vsk)
775 struct virtio_vsock_pkt_info info = {
776 .op = VIRTIO_VSOCK_OP_REQUEST,
780 return virtio_transport_send_pkt_info(vsk, &info);
782 EXPORT_SYMBOL_GPL(virtio_transport_connect);
784 int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
786 struct virtio_vsock_pkt_info info = {
787 .op = VIRTIO_VSOCK_OP_SHUTDOWN,
788 .flags = (mode & RCV_SHUTDOWN ?
789 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
790 (mode & SEND_SHUTDOWN ?
791 VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
795 return virtio_transport_send_pkt_info(vsk, &info);
797 EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
800 virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
801 struct sockaddr_vm *remote_addr,
807 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
810 virtio_transport_stream_enqueue(struct vsock_sock *vsk,
814 struct virtio_vsock_pkt_info info = {
815 .op = VIRTIO_VSOCK_OP_RW,
821 return virtio_transport_send_pkt_info(vsk, &info);
823 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
825 void virtio_transport_destruct(struct vsock_sock *vsk)
827 struct virtio_vsock_sock *vvs = vsk->trans;
831 EXPORT_SYMBOL_GPL(virtio_transport_destruct);
833 static int virtio_transport_reset(struct vsock_sock *vsk,
836 struct virtio_vsock_pkt_info info = {
837 .op = VIRTIO_VSOCK_OP_RST,
842 /* Send RST only if the original pkt is not a RST pkt */
843 if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
846 return virtio_transport_send_pkt_info(vsk, &info);
849 /* Normally packets are associated with a socket. There may be no socket if an
850 * attempt was made to connect to a socket that does not exist.
852 static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
855 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
856 struct virtio_vsock_pkt_info info = {
857 .op = VIRTIO_VSOCK_OP_RST,
858 .type = le16_to_cpu(hdr->type),
861 struct sk_buff *reply;
863 /* Send RST only if the original pkt is not a RST pkt */
864 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
867 reply = virtio_transport_alloc_skb(&info, 0,
868 le64_to_cpu(hdr->dst_cid),
869 le32_to_cpu(hdr->dst_port),
870 le64_to_cpu(hdr->src_cid),
871 le32_to_cpu(hdr->src_port));
880 return t->send_pkt(reply);
883 /* This function should be called with sk_lock held and SOCK_DONE set */
884 static void virtio_transport_remove_sock(struct vsock_sock *vsk)
886 struct virtio_vsock_sock *vvs = vsk->trans;
888 /* We don't need to take rx_lock, as the socket is closing and we are
891 __skb_queue_purge(&vvs->rx_queue);
892 vsock_remove_sock(vsk);
895 static void virtio_transport_wait_close(struct sock *sk, long timeout)
898 DEFINE_WAIT_FUNC(wait, woken_wake_function);
900 add_wait_queue(sk_sleep(sk), &wait);
903 if (sk_wait_event(sk, &timeout,
904 sock_flag(sk, SOCK_DONE), &wait))
906 } while (!signal_pending(current) && timeout);
908 remove_wait_queue(sk_sleep(sk), &wait);
912 static void virtio_transport_do_close(struct vsock_sock *vsk,
915 struct sock *sk = sk_vsock(vsk);
917 sock_set_flag(sk, SOCK_DONE);
918 vsk->peer_shutdown = SHUTDOWN_MASK;
919 if (vsock_stream_has_data(vsk) <= 0)
920 sk->sk_state = TCP_CLOSING;
921 sk->sk_state_change(sk);
923 if (vsk->close_work_scheduled &&
924 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
925 vsk->close_work_scheduled = false;
927 virtio_transport_remove_sock(vsk);
929 /* Release refcnt obtained when we scheduled the timeout */
934 static void virtio_transport_close_timeout(struct work_struct *work)
936 struct vsock_sock *vsk =
937 container_of(work, struct vsock_sock, close_work.work);
938 struct sock *sk = sk_vsock(vsk);
943 if (!sock_flag(sk, SOCK_DONE)) {
944 (void)virtio_transport_reset(vsk, NULL);
946 virtio_transport_do_close(vsk, false);
949 vsk->close_work_scheduled = false;
955 /* User context, vsk->sk is locked */
956 static bool virtio_transport_close(struct vsock_sock *vsk)
958 struct sock *sk = &vsk->sk;
960 if (!(sk->sk_state == TCP_ESTABLISHED ||
961 sk->sk_state == TCP_CLOSING))
964 /* Already received SHUTDOWN from peer, reply with RST */
965 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
966 (void)virtio_transport_reset(vsk, NULL);
970 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
971 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
973 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
974 virtio_transport_wait_close(sk, sk->sk_lingertime);
976 if (sock_flag(sk, SOCK_DONE)) {
981 INIT_DELAYED_WORK(&vsk->close_work,
982 virtio_transport_close_timeout);
983 vsk->close_work_scheduled = true;
984 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
988 void virtio_transport_release(struct vsock_sock *vsk)
990 struct sock *sk = &vsk->sk;
991 bool remove_sock = true;
993 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
994 remove_sock = virtio_transport_close(vsk);
997 sock_set_flag(sk, SOCK_DONE);
998 virtio_transport_remove_sock(vsk);
1001 EXPORT_SYMBOL_GPL(virtio_transport_release);
1004 virtio_transport_recv_connecting(struct sock *sk,
1005 struct sk_buff *skb)
1007 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1008 struct vsock_sock *vsk = vsock_sk(sk);
1012 switch (le16_to_cpu(hdr->op)) {
1013 case VIRTIO_VSOCK_OP_RESPONSE:
1014 sk->sk_state = TCP_ESTABLISHED;
1015 sk->sk_socket->state = SS_CONNECTED;
1016 vsock_insert_connected(vsk);
1017 sk->sk_state_change(sk);
1019 case VIRTIO_VSOCK_OP_INVALID:
1021 case VIRTIO_VSOCK_OP_RST:
1033 virtio_transport_reset(vsk, skb);
1034 sk->sk_state = TCP_CLOSE;
1036 sk_error_report(sk);
1041 virtio_transport_recv_enqueue(struct vsock_sock *vsk,
1042 struct sk_buff *skb)
1044 struct virtio_vsock_sock *vvs = vsk->trans;
1045 bool can_enqueue, free_pkt = false;
1046 struct virtio_vsock_hdr *hdr;
1049 hdr = virtio_vsock_hdr(skb);
1050 len = le32_to_cpu(hdr->len);
1052 spin_lock_bh(&vvs->rx_lock);
1054 can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
1060 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
1063 /* Try to copy small packets into the buffer of last packet queued,
1064 * to avoid wasting memory queueing the entire buffer with a small
1067 if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
1068 struct virtio_vsock_hdr *last_hdr;
1069 struct sk_buff *last_skb;
1071 last_skb = skb_peek_tail(&vvs->rx_queue);
1072 last_hdr = virtio_vsock_hdr(last_skb);
1074 /* If there is space in the last packet queued, we copy the
1075 * new packet in its buffer. We avoid this if the last packet
1076 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
1077 * delimiter of SEQPACKET message, so 'pkt' is the first packet
1080 if (skb->len < skb_tailroom(last_skb) &&
1081 !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
1082 memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
1084 last_hdr->flags |= hdr->flags;
1085 le32_add_cpu(&last_hdr->len, len);
1090 __skb_queue_tail(&vvs->rx_queue, skb);
1093 spin_unlock_bh(&vvs->rx_lock);
1099 virtio_transport_recv_connected(struct sock *sk,
1100 struct sk_buff *skb)
1102 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1103 struct vsock_sock *vsk = vsock_sk(sk);
1106 switch (le16_to_cpu(hdr->op)) {
1107 case VIRTIO_VSOCK_OP_RW:
1108 virtio_transport_recv_enqueue(vsk, skb);
1109 vsock_data_ready(sk);
1111 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
1112 virtio_transport_send_credit_update(vsk);
1114 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
1115 sk->sk_write_space(sk);
1117 case VIRTIO_VSOCK_OP_SHUTDOWN:
1118 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
1119 vsk->peer_shutdown |= RCV_SHUTDOWN;
1120 if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
1121 vsk->peer_shutdown |= SEND_SHUTDOWN;
1122 if (vsk->peer_shutdown == SHUTDOWN_MASK) {
1123 if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) {
1124 (void)virtio_transport_reset(vsk, NULL);
1125 virtio_transport_do_close(vsk, true);
1127 /* Remove this socket anyway because the remote peer sent
1128 * the shutdown. This way a new connection will succeed
1129 * if the remote peer uses the same source port,
1130 * even if the old socket is still unreleased, but now disconnected.
1132 vsock_remove_sock(vsk);
1134 if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
1135 sk->sk_state_change(sk);
1137 case VIRTIO_VSOCK_OP_RST:
1138 virtio_transport_do_close(vsk, true);
1150 virtio_transport_recv_disconnecting(struct sock *sk,
1151 struct sk_buff *skb)
1153 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1154 struct vsock_sock *vsk = vsock_sk(sk);
1156 if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
1157 virtio_transport_do_close(vsk, true);
1161 virtio_transport_send_response(struct vsock_sock *vsk,
1162 struct sk_buff *skb)
1164 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1165 struct virtio_vsock_pkt_info info = {
1166 .op = VIRTIO_VSOCK_OP_RESPONSE,
1167 .remote_cid = le64_to_cpu(hdr->src_cid),
1168 .remote_port = le32_to_cpu(hdr->src_port),
1173 return virtio_transport_send_pkt_info(vsk, &info);
1176 static bool virtio_transport_space_update(struct sock *sk,
1177 struct sk_buff *skb)
1179 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1180 struct vsock_sock *vsk = vsock_sk(sk);
1181 struct virtio_vsock_sock *vvs = vsk->trans;
1182 bool space_available;
1184 /* Listener sockets are not associated with any transport, so we are
1185 * not able to take the state to see if there is space available in the
1186 * remote peer, but since they are only used to receive requests, we
1187 * can assume that there is always space available in the other peer.
1192 /* buf_alloc and fwd_cnt is always included in the hdr */
1193 spin_lock_bh(&vvs->tx_lock);
1194 vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
1195 vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
1196 space_available = virtio_transport_has_space(vsk);
1197 spin_unlock_bh(&vvs->tx_lock);
1198 return space_available;
1201 /* Handle server socket */
1203 virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
1204 struct virtio_transport *t)
1206 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1207 struct vsock_sock *vsk = vsock_sk(sk);
1208 struct vsock_sock *vchild;
1212 if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
1213 virtio_transport_reset_no_sock(t, skb);
1217 if (sk_acceptq_is_full(sk)) {
1218 virtio_transport_reset_no_sock(t, skb);
1222 child = vsock_create_connected(sk);
1224 virtio_transport_reset_no_sock(t, skb);
1228 sk_acceptq_added(sk);
1230 lock_sock_nested(child, SINGLE_DEPTH_NESTING);
1232 child->sk_state = TCP_ESTABLISHED;
1234 vchild = vsock_sk(child);
1235 vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
1236 le32_to_cpu(hdr->dst_port));
1237 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
1238 le32_to_cpu(hdr->src_port));
1240 ret = vsock_assign_transport(vchild, vsk);
1241 /* Transport assigned (looking at remote_addr) must be the same
1242 * where we received the request.
1244 if (ret || vchild->transport != &t->transport) {
1245 release_sock(child);
1246 virtio_transport_reset_no_sock(t, skb);
1251 if (virtio_transport_space_update(child, skb))
1252 child->sk_write_space(child);
1254 vsock_insert_connected(vchild);
1255 vsock_enqueue_accept(sk, child);
1256 virtio_transport_send_response(vchild, skb);
1258 release_sock(child);
1260 sk->sk_data_ready(sk);
1264 static bool virtio_transport_valid_type(u16 type)
1266 return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
1267 (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
1270 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
1273 void virtio_transport_recv_pkt(struct virtio_transport *t,
1274 struct sk_buff *skb)
1276 struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
1277 struct sockaddr_vm src, dst;
1278 struct vsock_sock *vsk;
1280 bool space_available;
1282 vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
1283 le32_to_cpu(hdr->src_port));
1284 vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
1285 le32_to_cpu(hdr->dst_port));
1287 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
1288 dst.svm_cid, dst.svm_port,
1289 le32_to_cpu(hdr->len),
1290 le16_to_cpu(hdr->type),
1291 le16_to_cpu(hdr->op),
1292 le32_to_cpu(hdr->flags),
1293 le32_to_cpu(hdr->buf_alloc),
1294 le32_to_cpu(hdr->fwd_cnt));
1296 if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
1297 (void)virtio_transport_reset_no_sock(t, skb);
1301 /* The socket must be in connected or bound table
1302 * otherwise send reset back
1304 sk = vsock_find_connected_socket(&src, &dst);
1306 sk = vsock_find_bound_socket(&dst);
1308 (void)virtio_transport_reset_no_sock(t, skb);
1313 if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
1314 (void)virtio_transport_reset_no_sock(t, skb);
1319 if (!skb_set_owner_sk_safe(skb, sk)) {
1320 WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n");
1328 /* Check if sk has been closed before lock_sock */
1329 if (sock_flag(sk, SOCK_DONE)) {
1330 (void)virtio_transport_reset_no_sock(t, skb);
1336 space_available = virtio_transport_space_update(sk, skb);
1338 /* Update CID in case it has changed after a transport reset event */
1339 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
1340 vsk->local_addr.svm_cid = dst.svm_cid;
1342 if (space_available)
1343 sk->sk_write_space(sk);
1345 switch (sk->sk_state) {
1347 virtio_transport_recv_listen(sk, skb, t);
1351 virtio_transport_recv_connecting(sk, skb);
1354 case TCP_ESTABLISHED:
1355 virtio_transport_recv_connected(sk, skb);
1358 virtio_transport_recv_disconnecting(sk, skb);
1362 (void)virtio_transport_reset_no_sock(t, skb);
1369 /* Release refcnt obtained when we fetched this socket out of the
1370 * bound or connected list.
1378 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
1380 /* Remove skbs found in a queue that have a vsk that matches.
1382 * Each skb is freed.
1384 * Returns the count of skbs that were reply packets.
1386 int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
1388 struct sk_buff_head freeme;
1389 struct sk_buff *skb, *tmp;
1392 skb_queue_head_init(&freeme);
1394 spin_lock_bh(&queue->lock);
1395 skb_queue_walk_safe(queue, skb, tmp) {
1396 if (vsock_sk(skb->sk) != vsk)
1399 __skb_unlink(skb, queue);
1400 __skb_queue_tail(&freeme, skb);
1402 if (virtio_vsock_skb_reply(skb))
1405 spin_unlock_bh(&queue->lock);
1407 __skb_queue_purge(&freeme);
1411 EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
1413 MODULE_LICENSE("GPL v2");
1414 MODULE_AUTHOR("Asias He");
1415 MODULE_DESCRIPTION("common code for virtio vsock");