1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 #include <linux/bpf-cgroup.h>
123 static atomic_long_t unix_nr_socks;
124 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
125 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
127 /* SMP locking strategy:
128 * hash table is protected with spinlock.
129 * each socket state is protected by separate spinlock.
132 static unsigned int unix_unbound_hash(struct sock *sk)
134 unsigned long hash = (unsigned long)sk;
140 return hash & UNIX_HASH_MOD;
143 static unsigned int unix_bsd_hash(struct inode *i)
145 return i->i_ino & UNIX_HASH_MOD;
148 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
149 int addr_len, int type)
151 __wsum csum = csum_partial(sunaddr, addr_len, 0);
154 hash = (__force unsigned int)csum_fold(csum);
158 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
161 static void unix_table_double_lock(struct net *net,
162 unsigned int hash1, unsigned int hash2)
164 if (hash1 == hash2) {
165 spin_lock(&net->unx.table.locks[hash1]);
172 spin_lock(&net->unx.table.locks[hash1]);
173 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
176 static void unix_table_double_unlock(struct net *net,
177 unsigned int hash1, unsigned int hash2)
179 if (hash1 == hash2) {
180 spin_unlock(&net->unx.table.locks[hash1]);
184 spin_unlock(&net->unx.table.locks[hash1]);
185 spin_unlock(&net->unx.table.locks[hash2]);
188 #ifdef CONFIG_SECURITY_NETWORK
189 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
191 UNIXCB(skb).secid = scm->secid;
194 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
196 scm->secid = UNIXCB(skb).secid;
199 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
201 return (scm->secid == UNIXCB(skb).secid);
204 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
210 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
214 #endif /* CONFIG_SECURITY_NETWORK */
216 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
218 return unix_peer(osk) == sk;
221 static inline int unix_may_send(struct sock *sk, struct sock *osk)
223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
226 static inline int unix_recvq_full(const struct sock *sk)
228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
231 static inline int unix_recvq_full_lockless(const struct sock *sk)
233 return skb_queue_len_lockless(&sk->sk_receive_queue) >
234 READ_ONCE(sk->sk_max_ack_backlog);
237 struct sock *unix_peer_get(struct sock *s)
245 unix_state_unlock(s);
248 EXPORT_SYMBOL_GPL(unix_peer_get);
250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
253 struct unix_address *addr;
255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
259 refcount_set(&addr->refcnt, 1);
260 addr->len = addr_len;
261 memcpy(addr->name, sunaddr, addr_len);
266 static inline void unix_release_addr(struct unix_address *addr)
268 if (refcount_dec_and_test(&addr->refcnt))
273 * Check unix socket name:
274 * - should be not zero length.
275 * - if started by not zero, should be NULL terminated (FS object)
276 * - if started by zero, it is abstract name.
279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
282 addr_len > sizeof(*sunaddr))
285 if (sunaddr->sun_family != AF_UNIX)
291 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
293 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
294 short offset = offsetof(struct sockaddr_storage, __data);
296 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
298 /* This may look like an off by one error but it is a bit more
299 * subtle. 108 is the longest valid AF_UNIX path for a binding.
300 * sun_path[108] doesn't as such exist. However in kernel space
301 * we are guaranteed that it is a valid memory location in our
302 * kernel address buffer because syscall functions always pass
303 * a pointer of struct sockaddr_storage which has a bigger buffer
304 * than 108. Also, we must terminate sun_path for strlen() in
307 addr->__data[addr_len - offset] = 0;
309 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
310 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
311 * know the actual buffer.
313 return strlen(addr->__data) + offset + 1;
316 static void __unix_remove_socket(struct sock *sk)
318 sk_del_node_init(sk);
321 static void __unix_insert_socket(struct net *net, struct sock *sk)
323 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
324 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
327 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
328 struct unix_address *addr, unsigned int hash)
330 __unix_remove_socket(sk);
331 smp_store_release(&unix_sk(sk)->addr, addr);
334 __unix_insert_socket(net, sk);
337 static void unix_remove_socket(struct net *net, struct sock *sk)
339 spin_lock(&net->unx.table.locks[sk->sk_hash]);
340 __unix_remove_socket(sk);
341 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
344 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
346 spin_lock(&net->unx.table.locks[sk->sk_hash]);
347 __unix_insert_socket(net, sk);
348 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
351 static void unix_insert_bsd_socket(struct sock *sk)
353 spin_lock(&bsd_socket_locks[sk->sk_hash]);
354 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
355 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
358 static void unix_remove_bsd_socket(struct sock *sk)
360 if (!hlist_unhashed(&sk->sk_bind_node)) {
361 spin_lock(&bsd_socket_locks[sk->sk_hash]);
362 __sk_del_bind_node(sk);
363 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
365 sk_node_init(&sk->sk_bind_node);
369 static struct sock *__unix_find_socket_byname(struct net *net,
370 struct sockaddr_un *sunname,
371 int len, unsigned int hash)
375 sk_for_each(s, &net->unx.table.buckets[hash]) {
376 struct unix_sock *u = unix_sk(s);
378 if (u->addr->len == len &&
379 !memcmp(u->addr->name, sunname, len))
385 static inline struct sock *unix_find_socket_byname(struct net *net,
386 struct sockaddr_un *sunname,
387 int len, unsigned int hash)
391 spin_lock(&net->unx.table.locks[hash]);
392 s = __unix_find_socket_byname(net, sunname, len, hash);
395 spin_unlock(&net->unx.table.locks[hash]);
399 static struct sock *unix_find_socket_byinode(struct inode *i)
401 unsigned int hash = unix_bsd_hash(i);
404 spin_lock(&bsd_socket_locks[hash]);
405 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
406 struct dentry *dentry = unix_sk(s)->path.dentry;
408 if (dentry && d_backing_inode(dentry) == i) {
410 spin_unlock(&bsd_socket_locks[hash]);
414 spin_unlock(&bsd_socket_locks[hash]);
418 /* Support code for asymmetrically connected dgram sockets
420 * If a datagram socket is connected to a socket not itself connected
421 * to the first socket (eg, /dev/log), clients may only enqueue more
422 * messages if the present receive queue of the server socket is not
423 * "too large". This means there's a second writeability condition
424 * poll and sendmsg need to test. The dgram recv code will do a wake
425 * up on the peer_wait wait queue of a socket upon reception of a
426 * datagram which needs to be propagated to sleeping would-be writers
427 * since these might not have sent anything so far. This can't be
428 * accomplished via poll_wait because the lifetime of the server
429 * socket might be less than that of its clients if these break their
430 * association with it or if the server socket is closed while clients
431 * are still connected to it and there's no way to inform "a polling
432 * implementation" that it should let go of a certain wait queue
434 * In order to propagate a wake up, a wait_queue_entry_t of the client
435 * socket is enqueued on the peer_wait queue of the server socket
436 * whose wake function does a wake_up on the ordinary client socket
437 * wait queue. This connection is established whenever a write (or
438 * poll for write) hit the flow control condition and broken when the
439 * association to the server socket is dissolved or after a wake up
443 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
447 wait_queue_head_t *u_sleep;
449 u = container_of(q, struct unix_sock, peer_wake);
451 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
453 u->peer_wake.private = NULL;
455 /* relaying can only happen while the wq still exists */
456 u_sleep = sk_sleep(&u->sk);
458 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
463 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
465 struct unix_sock *u, *u_other;
469 u_other = unix_sk(other);
471 spin_lock(&u_other->peer_wait.lock);
473 if (!u->peer_wake.private) {
474 u->peer_wake.private = other;
475 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
480 spin_unlock(&u_other->peer_wait.lock);
484 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
487 struct unix_sock *u, *u_other;
490 u_other = unix_sk(other);
491 spin_lock(&u_other->peer_wait.lock);
493 if (u->peer_wake.private == other) {
494 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
495 u->peer_wake.private = NULL;
498 spin_unlock(&u_other->peer_wait.lock);
501 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
504 unix_dgram_peer_wake_disconnect(sk, other);
505 wake_up_interruptible_poll(sk_sleep(sk),
512 * - unix_peer(sk) == other
513 * - association is stable
515 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
519 connected = unix_dgram_peer_wake_connect(sk, other);
521 /* If other is SOCK_DEAD, we want to make sure we signal
522 * POLLOUT, such that a subsequent write() can get a
523 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
524 * to other and its full, we will hang waiting for POLLOUT.
526 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
530 unix_dgram_peer_wake_disconnect(sk, other);
535 static int unix_writable(const struct sock *sk)
537 return sk->sk_state != TCP_LISTEN &&
538 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
541 static void unix_write_space(struct sock *sk)
543 struct socket_wq *wq;
546 if (unix_writable(sk)) {
547 wq = rcu_dereference(sk->sk_wq);
548 if (skwq_has_sleeper(wq))
549 wake_up_interruptible_sync_poll(&wq->wait,
550 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
551 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
556 /* When dgram socket disconnects (or changes its peer), we clear its receive
557 * queue of packets arrived from previous peer. First, it allows to do
558 * flow control based only on wmem_alloc; second, sk connected to peer
559 * may receive messages only from that peer. */
560 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
562 if (!skb_queue_empty(&sk->sk_receive_queue)) {
563 skb_queue_purge(&sk->sk_receive_queue);
564 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
566 /* If one link of bidirectional dgram pipe is disconnected,
567 * we signal error. Messages are lost. Do not make this,
568 * when peer was not connected to us.
570 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
571 WRITE_ONCE(other->sk_err, ECONNRESET);
572 sk_error_report(other);
575 other->sk_state = TCP_CLOSE;
578 static void unix_sock_destructor(struct sock *sk)
580 struct unix_sock *u = unix_sk(sk);
582 skb_queue_purge(&sk->sk_receive_queue);
584 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
585 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
586 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
587 if (!sock_flag(sk, SOCK_DEAD)) {
588 pr_info("Attempt to release alive unix socket: %p\n", sk);
593 unix_release_addr(u->addr);
595 atomic_long_dec(&unix_nr_socks);
596 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
597 #ifdef UNIX_REFCNT_DEBUG
598 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
599 atomic_long_read(&unix_nr_socks));
603 static void unix_release_sock(struct sock *sk, int embrion)
605 struct unix_sock *u = unix_sk(sk);
611 unix_remove_socket(sock_net(sk), sk);
612 unix_remove_bsd_socket(sk);
617 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
619 u->path.dentry = NULL;
621 state = sk->sk_state;
622 sk->sk_state = TCP_CLOSE;
624 skpair = unix_peer(sk);
625 unix_peer(sk) = NULL;
627 unix_state_unlock(sk);
629 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
631 kfree_skb(u->oob_skb);
636 wake_up_interruptible_all(&u->peer_wait);
638 if (skpair != NULL) {
639 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
640 unix_state_lock(skpair);
642 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
643 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
644 WRITE_ONCE(skpair->sk_err, ECONNRESET);
645 unix_state_unlock(skpair);
646 skpair->sk_state_change(skpair);
647 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
650 unix_dgram_peer_wake_disconnect(sk, skpair);
651 sock_put(skpair); /* It may now die */
654 /* Try to flush out this socket. Throw out buffers at least */
656 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
657 if (state == TCP_LISTEN)
658 unix_release_sock(skb->sk, 1);
659 /* passed fds are erased in the kfree_skb hook */
660 UNIXCB(skb).consumed = skb->len;
669 /* ---- Socket is dead now and most probably destroyed ---- */
672 * Fixme: BSD difference: In BSD all sockets connected to us get
673 * ECONNRESET and we die on the spot. In Linux we behave
674 * like files and pipes do and wait for the last
677 * Can't we simply set sock->err?
679 * What the above comment does talk about? --ANK(980817)
682 if (READ_ONCE(unix_tot_inflight))
683 unix_gc(); /* Garbage collect fds */
686 static void init_peercred(struct sock *sk)
688 const struct cred *old_cred;
691 spin_lock(&sk->sk_peer_lock);
692 old_pid = sk->sk_peer_pid;
693 old_cred = sk->sk_peer_cred;
694 sk->sk_peer_pid = get_pid(task_tgid(current));
695 sk->sk_peer_cred = get_current_cred();
696 spin_unlock(&sk->sk_peer_lock);
702 static void copy_peercred(struct sock *sk, struct sock *peersk)
704 const struct cred *old_cred;
708 spin_lock(&sk->sk_peer_lock);
709 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
711 spin_lock(&peersk->sk_peer_lock);
712 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
714 old_pid = sk->sk_peer_pid;
715 old_cred = sk->sk_peer_cred;
716 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
717 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
719 spin_unlock(&sk->sk_peer_lock);
720 spin_unlock(&peersk->sk_peer_lock);
726 static int unix_listen(struct socket *sock, int backlog)
729 struct sock *sk = sock->sk;
730 struct unix_sock *u = unix_sk(sk);
733 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
734 goto out; /* Only stream/seqpacket sockets accept */
737 goto out; /* No listens on an unbound socket */
739 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
741 if (backlog > sk->sk_max_ack_backlog)
742 wake_up_interruptible_all(&u->peer_wait);
743 sk->sk_max_ack_backlog = backlog;
744 sk->sk_state = TCP_LISTEN;
745 /* set credentials so connect can copy them */
750 unix_state_unlock(sk);
755 static int unix_release(struct socket *);
756 static int unix_bind(struct socket *, struct sockaddr *, int);
757 static int unix_stream_connect(struct socket *, struct sockaddr *,
758 int addr_len, int flags);
759 static int unix_socketpair(struct socket *, struct socket *);
760 static int unix_accept(struct socket *, struct socket *, int, bool);
761 static int unix_getname(struct socket *, struct sockaddr *, int);
762 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
763 static __poll_t unix_dgram_poll(struct file *, struct socket *,
765 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
767 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
769 static int unix_shutdown(struct socket *, int);
770 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
771 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
772 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
773 struct pipe_inode_info *, size_t size,
775 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
776 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
777 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
778 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
779 static int unix_dgram_connect(struct socket *, struct sockaddr *,
781 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
782 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
785 static int unix_set_peek_off(struct sock *sk, int val)
787 struct unix_sock *u = unix_sk(sk);
789 if (mutex_lock_interruptible(&u->iolock))
792 WRITE_ONCE(sk->sk_peek_off, val);
793 mutex_unlock(&u->iolock);
798 #ifdef CONFIG_PROC_FS
799 static int unix_count_nr_fds(struct sock *sk)
805 spin_lock(&sk->sk_receive_queue.lock);
806 skb = skb_peek(&sk->sk_receive_queue);
808 u = unix_sk(skb->sk);
809 nr_fds += atomic_read(&u->scm_stat.nr_fds);
810 skb = skb_peek_next(skb, &sk->sk_receive_queue);
812 spin_unlock(&sk->sk_receive_queue.lock);
817 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
819 struct sock *sk = sock->sk;
820 unsigned char s_state;
825 s_state = READ_ONCE(sk->sk_state);
828 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
829 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
830 * SOCK_DGRAM is ordinary. So, no lock is needed.
832 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
833 nr_fds = atomic_read(&u->scm_stat.nr_fds);
834 else if (s_state == TCP_LISTEN)
835 nr_fds = unix_count_nr_fds(sk);
837 seq_printf(m, "scm_fds: %u\n", nr_fds);
841 #define unix_show_fdinfo NULL
844 static const struct proto_ops unix_stream_ops = {
846 .owner = THIS_MODULE,
847 .release = unix_release,
849 .connect = unix_stream_connect,
850 .socketpair = unix_socketpair,
851 .accept = unix_accept,
852 .getname = unix_getname,
856 .compat_ioctl = unix_compat_ioctl,
858 .listen = unix_listen,
859 .shutdown = unix_shutdown,
860 .sendmsg = unix_stream_sendmsg,
861 .recvmsg = unix_stream_recvmsg,
862 .read_skb = unix_stream_read_skb,
863 .mmap = sock_no_mmap,
864 .splice_read = unix_stream_splice_read,
865 .set_peek_off = unix_set_peek_off,
866 .show_fdinfo = unix_show_fdinfo,
869 static const struct proto_ops unix_dgram_ops = {
871 .owner = THIS_MODULE,
872 .release = unix_release,
874 .connect = unix_dgram_connect,
875 .socketpair = unix_socketpair,
876 .accept = sock_no_accept,
877 .getname = unix_getname,
878 .poll = unix_dgram_poll,
881 .compat_ioctl = unix_compat_ioctl,
883 .listen = sock_no_listen,
884 .shutdown = unix_shutdown,
885 .sendmsg = unix_dgram_sendmsg,
886 .read_skb = unix_read_skb,
887 .recvmsg = unix_dgram_recvmsg,
888 .mmap = sock_no_mmap,
889 .set_peek_off = unix_set_peek_off,
890 .show_fdinfo = unix_show_fdinfo,
893 static const struct proto_ops unix_seqpacket_ops = {
895 .owner = THIS_MODULE,
896 .release = unix_release,
898 .connect = unix_stream_connect,
899 .socketpair = unix_socketpair,
900 .accept = unix_accept,
901 .getname = unix_getname,
902 .poll = unix_dgram_poll,
905 .compat_ioctl = unix_compat_ioctl,
907 .listen = unix_listen,
908 .shutdown = unix_shutdown,
909 .sendmsg = unix_seqpacket_sendmsg,
910 .recvmsg = unix_seqpacket_recvmsg,
911 .mmap = sock_no_mmap,
912 .set_peek_off = unix_set_peek_off,
913 .show_fdinfo = unix_show_fdinfo,
916 static void unix_close(struct sock *sk, long timeout)
918 /* Nothing to do here, unix socket does not need a ->close().
919 * This is merely for sockmap.
923 static void unix_unhash(struct sock *sk)
925 /* Nothing to do here, unix socket does not need a ->unhash().
926 * This is merely for sockmap.
930 static bool unix_bpf_bypass_getsockopt(int level, int optname)
932 if (level == SOL_SOCKET) {
944 struct proto unix_dgram_proto = {
946 .owner = THIS_MODULE,
947 .obj_size = sizeof(struct unix_sock),
949 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
950 #ifdef CONFIG_BPF_SYSCALL
951 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
955 struct proto unix_stream_proto = {
956 .name = "UNIX-STREAM",
957 .owner = THIS_MODULE,
958 .obj_size = sizeof(struct unix_sock),
960 .unhash = unix_unhash,
961 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
962 #ifdef CONFIG_BPF_SYSCALL
963 .psock_update_sk_prot = unix_stream_bpf_update_proto,
967 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
973 atomic_long_inc(&unix_nr_socks);
974 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
979 if (type == SOCK_STREAM)
980 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
981 else /*dgram and seqpacket */
982 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
989 sock_init_data(sock, sk);
991 sk->sk_hash = unix_unbound_hash(sk);
992 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
993 sk->sk_write_space = unix_write_space;
994 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
995 sk->sk_destruct = unix_sock_destructor;
997 u->path.dentry = NULL;
999 spin_lock_init(&u->lock);
1000 atomic_long_set(&u->inflight, 0);
1001 INIT_LIST_HEAD(&u->link);
1002 mutex_init(&u->iolock); /* single task reading lock */
1003 mutex_init(&u->bindlock); /* single task binding lock */
1004 init_waitqueue_head(&u->peer_wait);
1005 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1006 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1007 unix_insert_unbound_socket(net, sk);
1009 sock_prot_inuse_add(net, sk->sk_prot, 1);
1014 atomic_long_dec(&unix_nr_socks);
1015 return ERR_PTR(err);
1018 static int unix_create(struct net *net, struct socket *sock, int protocol,
1023 if (protocol && protocol != PF_UNIX)
1024 return -EPROTONOSUPPORT;
1026 sock->state = SS_UNCONNECTED;
1028 switch (sock->type) {
1030 sock->ops = &unix_stream_ops;
1033 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1037 sock->type = SOCK_DGRAM;
1040 sock->ops = &unix_dgram_ops;
1042 case SOCK_SEQPACKET:
1043 sock->ops = &unix_seqpacket_ops;
1046 return -ESOCKTNOSUPPORT;
1049 sk = unix_create1(net, sock, kern, sock->type);
1056 static int unix_release(struct socket *sock)
1058 struct sock *sk = sock->sk;
1063 sk->sk_prot->close(sk, 0);
1064 unix_release_sock(sk, 0);
1070 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1073 struct inode *inode;
1078 unix_mkname_bsd(sunaddr, addr_len);
1079 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1083 err = path_permission(&path, MAY_WRITE);
1087 err = -ECONNREFUSED;
1088 inode = d_backing_inode(path.dentry);
1089 if (!S_ISSOCK(inode->i_mode))
1092 sk = unix_find_socket_byinode(inode);
1097 if (sk->sk_type == type)
1111 return ERR_PTR(err);
1114 static struct sock *unix_find_abstract(struct net *net,
1115 struct sockaddr_un *sunaddr,
1116 int addr_len, int type)
1118 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1119 struct dentry *dentry;
1122 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1124 return ERR_PTR(-ECONNREFUSED);
1126 dentry = unix_sk(sk)->path.dentry;
1128 touch_atime(&unix_sk(sk)->path);
1133 static struct sock *unix_find_other(struct net *net,
1134 struct sockaddr_un *sunaddr,
1135 int addr_len, int type)
1139 if (sunaddr->sun_path[0])
1140 sk = unix_find_bsd(sunaddr, addr_len, type);
1142 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1147 static int unix_autobind(struct sock *sk)
1149 unsigned int new_hash, old_hash = sk->sk_hash;
1150 struct unix_sock *u = unix_sk(sk);
1151 struct net *net = sock_net(sk);
1152 struct unix_address *addr;
1153 u32 lastnum, ordernum;
1156 err = mutex_lock_interruptible(&u->bindlock);
1164 addr = kzalloc(sizeof(*addr) +
1165 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1169 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1170 addr->name->sun_family = AF_UNIX;
1171 refcount_set(&addr->refcnt, 1);
1173 ordernum = get_random_u32();
1174 lastnum = ordernum & 0xFFFFF;
1176 ordernum = (ordernum + 1) & 0xFFFFF;
1177 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1179 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1180 unix_table_double_lock(net, old_hash, new_hash);
1182 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1183 unix_table_double_unlock(net, old_hash, new_hash);
1185 /* __unix_find_socket_byname() may take long time if many names
1186 * are already in use.
1190 if (ordernum == lastnum) {
1191 /* Give up if all names seems to be in use. */
1193 unix_release_addr(addr);
1200 __unix_set_addr_hash(net, sk, addr, new_hash);
1201 unix_table_double_unlock(net, old_hash, new_hash);
1204 out: mutex_unlock(&u->bindlock);
1208 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1211 umode_t mode = S_IFSOCK |
1212 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1213 unsigned int new_hash, old_hash = sk->sk_hash;
1214 struct unix_sock *u = unix_sk(sk);
1215 struct net *net = sock_net(sk);
1216 struct mnt_idmap *idmap;
1217 struct unix_address *addr;
1218 struct dentry *dentry;
1222 addr_len = unix_mkname_bsd(sunaddr, addr_len);
1223 addr = unix_create_addr(sunaddr, addr_len);
1228 * Get the parent directory, calculate the hash for last
1231 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1232 if (IS_ERR(dentry)) {
1233 err = PTR_ERR(dentry);
1238 * All right, let's create it.
1240 idmap = mnt_idmap(parent.mnt);
1241 err = security_path_mknod(&parent, dentry, mode, 0);
1243 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1246 err = mutex_lock_interruptible(&u->bindlock);
1252 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1253 unix_table_double_lock(net, old_hash, new_hash);
1254 u->path.mnt = mntget(parent.mnt);
1255 u->path.dentry = dget(dentry);
1256 __unix_set_addr_hash(net, sk, addr, new_hash);
1257 unix_table_double_unlock(net, old_hash, new_hash);
1258 unix_insert_bsd_socket(sk);
1259 mutex_unlock(&u->bindlock);
1260 done_path_create(&parent, dentry);
1264 mutex_unlock(&u->bindlock);
1267 /* failed after successful mknod? unlink what we'd created... */
1268 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1270 done_path_create(&parent, dentry);
1272 unix_release_addr(addr);
1273 return err == -EEXIST ? -EADDRINUSE : err;
1276 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1279 unsigned int new_hash, old_hash = sk->sk_hash;
1280 struct unix_sock *u = unix_sk(sk);
1281 struct net *net = sock_net(sk);
1282 struct unix_address *addr;
1285 addr = unix_create_addr(sunaddr, addr_len);
1289 err = mutex_lock_interruptible(&u->bindlock);
1298 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1299 unix_table_double_lock(net, old_hash, new_hash);
1301 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1304 __unix_set_addr_hash(net, sk, addr, new_hash);
1305 unix_table_double_unlock(net, old_hash, new_hash);
1306 mutex_unlock(&u->bindlock);
1310 unix_table_double_unlock(net, old_hash, new_hash);
1313 mutex_unlock(&u->bindlock);
1315 unix_release_addr(addr);
1319 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1321 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1322 struct sock *sk = sock->sk;
1325 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1326 sunaddr->sun_family == AF_UNIX)
1327 return unix_autobind(sk);
1329 err = unix_validate_addr(sunaddr, addr_len);
1333 if (sunaddr->sun_path[0])
1334 err = unix_bind_bsd(sk, sunaddr, addr_len);
1336 err = unix_bind_abstract(sk, sunaddr, addr_len);
1341 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1343 if (unlikely(sk1 == sk2) || !sk2) {
1344 unix_state_lock(sk1);
1348 unix_state_lock(sk1);
1349 unix_state_lock_nested(sk2);
1351 unix_state_lock(sk2);
1352 unix_state_lock_nested(sk1);
1356 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1358 if (unlikely(sk1 == sk2) || !sk2) {
1359 unix_state_unlock(sk1);
1362 unix_state_unlock(sk1);
1363 unix_state_unlock(sk2);
1366 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1367 int alen, int flags)
1369 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1370 struct sock *sk = sock->sk;
1375 if (alen < offsetofend(struct sockaddr, sa_family))
1378 if (addr->sa_family != AF_UNSPEC) {
1379 err = unix_validate_addr(sunaddr, alen);
1383 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1387 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1388 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1389 !unix_sk(sk)->addr) {
1390 err = unix_autobind(sk);
1396 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1397 if (IS_ERR(other)) {
1398 err = PTR_ERR(other);
1402 unix_state_double_lock(sk, other);
1404 /* Apparently VFS overslept socket death. Retry. */
1405 if (sock_flag(other, SOCK_DEAD)) {
1406 unix_state_double_unlock(sk, other);
1412 if (!unix_may_send(sk, other))
1415 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1419 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1422 * 1003.1g breaking connected state with AF_UNSPEC
1425 unix_state_double_lock(sk, other);
1429 * If it was connected, reconnect.
1431 if (unix_peer(sk)) {
1432 struct sock *old_peer = unix_peer(sk);
1434 unix_peer(sk) = other;
1436 sk->sk_state = TCP_CLOSE;
1437 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1439 unix_state_double_unlock(sk, other);
1441 if (other != old_peer)
1442 unix_dgram_disconnected(sk, old_peer);
1445 unix_peer(sk) = other;
1446 unix_state_double_unlock(sk, other);
1452 unix_state_double_unlock(sk, other);
1458 static long unix_wait_for_peer(struct sock *other, long timeo)
1459 __releases(&unix_sk(other)->lock)
1461 struct unix_sock *u = unix_sk(other);
1465 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1467 sched = !sock_flag(other, SOCK_DEAD) &&
1468 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1469 unix_recvq_full_lockless(other);
1471 unix_state_unlock(other);
1474 timeo = schedule_timeout(timeo);
1476 finish_wait(&u->peer_wait, &wait);
1480 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1481 int addr_len, int flags)
1483 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1484 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1485 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1486 struct net *net = sock_net(sk);
1487 struct sk_buff *skb = NULL;
1492 err = unix_validate_addr(sunaddr, addr_len);
1496 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1500 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1501 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1502 err = unix_autobind(sk);
1507 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1509 /* First of all allocate resources.
1510 If we will make it after state is locked,
1511 we will have to recheck all again in any case.
1514 /* create new sock for complete connection */
1515 newsk = unix_create1(net, NULL, 0, sock->type);
1516 if (IS_ERR(newsk)) {
1517 err = PTR_ERR(newsk);
1524 /* Allocate skb for sending to listening sock */
1525 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1530 /* Find listening sock. */
1531 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1532 if (IS_ERR(other)) {
1533 err = PTR_ERR(other);
1538 /* Latch state of peer */
1539 unix_state_lock(other);
1541 /* Apparently VFS overslept socket death. Retry. */
1542 if (sock_flag(other, SOCK_DEAD)) {
1543 unix_state_unlock(other);
1548 err = -ECONNREFUSED;
1549 if (other->sk_state != TCP_LISTEN)
1551 if (other->sk_shutdown & RCV_SHUTDOWN)
1554 if (unix_recvq_full(other)) {
1559 timeo = unix_wait_for_peer(other, timeo);
1561 err = sock_intr_errno(timeo);
1562 if (signal_pending(current))
1570 It is tricky place. We need to grab our state lock and cannot
1571 drop lock on peer. It is dangerous because deadlock is
1572 possible. Connect to self case and simultaneous
1573 attempt to connect are eliminated by checking socket
1574 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1575 check this before attempt to grab lock.
1577 Well, and we have to recheck the state after socket locked.
1583 /* This is ok... continue with connect */
1585 case TCP_ESTABLISHED:
1586 /* Socket is already connected */
1594 unix_state_lock_nested(sk);
1596 if (sk->sk_state != st) {
1597 unix_state_unlock(sk);
1598 unix_state_unlock(other);
1603 err = security_unix_stream_connect(sk, other, newsk);
1605 unix_state_unlock(sk);
1609 /* The way is open! Fastly set all the necessary fields... */
1612 unix_peer(newsk) = sk;
1613 newsk->sk_state = TCP_ESTABLISHED;
1614 newsk->sk_type = sk->sk_type;
1615 init_peercred(newsk);
1616 newu = unix_sk(newsk);
1617 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1618 otheru = unix_sk(other);
1620 /* copy address information from listening to new sock
1622 * The contents of *(otheru->addr) and otheru->path
1623 * are seen fully set up here, since we have found
1624 * otheru in hash under its lock. Insertion into the
1625 * hash chain we'd found it in had been done in an
1626 * earlier critical area protected by the chain's lock,
1627 * the same one where we'd set *(otheru->addr) contents,
1628 * as well as otheru->path and otheru->addr itself.
1630 * Using smp_store_release() here to set newu->addr
1631 * is enough to make those stores, as well as stores
1632 * to newu->path visible to anyone who gets newu->addr
1633 * by smp_load_acquire(). IOW, the same warranties
1634 * as for unix_sock instances bound in unix_bind() or
1635 * in unix_autobind().
1637 if (otheru->path.dentry) {
1638 path_get(&otheru->path);
1639 newu->path = otheru->path;
1641 refcount_inc(&otheru->addr->refcnt);
1642 smp_store_release(&newu->addr, otheru->addr);
1644 /* Set credentials */
1645 copy_peercred(sk, other);
1647 sock->state = SS_CONNECTED;
1648 sk->sk_state = TCP_ESTABLISHED;
1651 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1652 unix_peer(sk) = newsk;
1654 unix_state_unlock(sk);
1656 /* take ten and send info to listening sock */
1657 spin_lock(&other->sk_receive_queue.lock);
1658 __skb_queue_tail(&other->sk_receive_queue, skb);
1659 spin_unlock(&other->sk_receive_queue.lock);
1660 unix_state_unlock(other);
1661 other->sk_data_ready(other);
1667 unix_state_unlock(other);
1672 unix_release_sock(newsk, 0);
1678 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1680 struct sock *ska = socka->sk, *skb = sockb->sk;
1682 /* Join our sockets back to back */
1685 unix_peer(ska) = skb;
1686 unix_peer(skb) = ska;
1690 ska->sk_state = TCP_ESTABLISHED;
1691 skb->sk_state = TCP_ESTABLISHED;
1692 socka->state = SS_CONNECTED;
1693 sockb->state = SS_CONNECTED;
1697 static void unix_sock_inherit_flags(const struct socket *old,
1700 if (test_bit(SOCK_PASSCRED, &old->flags))
1701 set_bit(SOCK_PASSCRED, &new->flags);
1702 if (test_bit(SOCK_PASSPIDFD, &old->flags))
1703 set_bit(SOCK_PASSPIDFD, &new->flags);
1704 if (test_bit(SOCK_PASSSEC, &old->flags))
1705 set_bit(SOCK_PASSSEC, &new->flags);
1708 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1711 struct sock *sk = sock->sk;
1713 struct sk_buff *skb;
1717 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1721 if (sk->sk_state != TCP_LISTEN)
1724 /* If socket state is TCP_LISTEN it cannot change (for now...),
1725 * so that no locks are necessary.
1728 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1731 /* This means receive shutdown. */
1738 skb_free_datagram(sk, skb);
1739 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1741 /* attach accepted sock to socket */
1742 unix_state_lock(tsk);
1743 newsock->state = SS_CONNECTED;
1744 unix_sock_inherit_flags(sock, newsock);
1745 sock_graft(tsk, newsock);
1746 unix_state_unlock(tsk);
1754 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1756 struct sock *sk = sock->sk;
1757 struct unix_address *addr;
1758 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1762 sk = unix_peer_get(sk);
1772 addr = smp_load_acquire(&unix_sk(sk)->addr);
1774 sunaddr->sun_family = AF_UNIX;
1775 sunaddr->sun_path[0] = 0;
1776 err = offsetof(struct sockaddr_un, sun_path);
1779 memcpy(sunaddr, addr->name, addr->len);
1782 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1783 CGROUP_UNIX_GETPEERNAME);
1785 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1786 CGROUP_UNIX_GETSOCKNAME);
1793 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1795 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1798 * Garbage collection of unix sockets starts by selecting a set of
1799 * candidate sockets which have reference only from being in flight
1800 * (total_refs == inflight_refs). This condition is checked once during
1801 * the candidate collection phase, and candidates are marked as such, so
1802 * that non-candidates can later be ignored. While inflight_refs is
1803 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1804 * is an instantaneous decision.
1806 * Once a candidate, however, the socket must not be reinstalled into a
1807 * file descriptor while the garbage collection is in progress.
1809 * If the above conditions are met, then the directed graph of
1810 * candidates (*) does not change while unix_gc_lock is held.
1812 * Any operations that changes the file count through file descriptors
1813 * (dup, close, sendmsg) does not change the graph since candidates are
1814 * not installed in fds.
1816 * Dequeing a candidate via recvmsg would install it into an fd, but
1817 * that takes unix_gc_lock to decrement the inflight count, so it's
1818 * serialized with garbage collection.
1820 * MSG_PEEK is special in that it does not change the inflight count,
1821 * yet does install the socket into an fd. The following lock/unlock
1822 * pair is to ensure serialization with garbage collection. It must be
1823 * done between incrementing the file count and installing the file into
1826 * If garbage collection starts after the barrier provided by the
1827 * lock/unlock, then it will see the elevated refcount and not mark this
1828 * as a candidate. If a garbage collection is already in progress
1829 * before the file count was incremented, then the lock/unlock pair will
1830 * ensure that garbage collection is finished before progressing to
1831 * installing the fd.
1833 * (*) A -> B where B is on the queue of A or B is on the queue of C
1834 * which is on the queue of listening socket A.
1836 spin_lock(&unix_gc_lock);
1837 spin_unlock(&unix_gc_lock);
1840 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1844 UNIXCB(skb).pid = get_pid(scm->pid);
1845 UNIXCB(skb).uid = scm->creds.uid;
1846 UNIXCB(skb).gid = scm->creds.gid;
1847 UNIXCB(skb).fp = NULL;
1848 unix_get_secdata(scm, skb);
1849 if (scm->fp && send_fds)
1850 err = unix_attach_fds(scm, skb);
1852 skb->destructor = unix_destruct_scm;
1856 static bool unix_passcred_enabled(const struct socket *sock,
1857 const struct sock *other)
1859 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1860 test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1861 !other->sk_socket ||
1862 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1863 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1867 * Some apps rely on write() giving SCM_CREDENTIALS
1868 * We include credentials if source or destination socket
1869 * asserted SOCK_PASSCRED.
1871 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1872 const struct sock *other)
1874 if (UNIXCB(skb).pid)
1876 if (unix_passcred_enabled(sock, other)) {
1877 UNIXCB(skb).pid = get_pid(task_tgid(current));
1878 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1882 static bool unix_skb_scm_eq(struct sk_buff *skb,
1883 struct scm_cookie *scm)
1885 return UNIXCB(skb).pid == scm->pid &&
1886 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1887 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1888 unix_secdata_eq(scm, skb);
1891 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1893 struct scm_fp_list *fp = UNIXCB(skb).fp;
1894 struct unix_sock *u = unix_sk(sk);
1896 if (unlikely(fp && fp->count))
1897 atomic_add(fp->count, &u->scm_stat.nr_fds);
1900 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1902 struct scm_fp_list *fp = UNIXCB(skb).fp;
1903 struct unix_sock *u = unix_sk(sk);
1905 if (unlikely(fp && fp->count))
1906 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1910 * Send AF_UNIX data.
1913 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1916 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1917 struct sock *sk = sock->sk, *other = NULL;
1918 struct unix_sock *u = unix_sk(sk);
1919 struct scm_cookie scm;
1920 struct sk_buff *skb;
1927 err = scm_send(sock, msg, &scm, false);
1932 if (msg->msg_flags&MSG_OOB)
1935 if (msg->msg_namelen) {
1936 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1940 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
1949 other = unix_peer_get(sk);
1954 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1955 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1956 err = unix_autobind(sk);
1962 if (len > sk->sk_sndbuf - 32)
1965 if (len > SKB_MAX_ALLOC) {
1966 data_len = min_t(size_t,
1967 len - SKB_MAX_ALLOC,
1968 MAX_SKB_FRAGS * PAGE_SIZE);
1969 data_len = PAGE_ALIGN(data_len);
1971 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1974 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1975 msg->msg_flags & MSG_DONTWAIT, &err,
1976 PAGE_ALLOC_COSTLY_ORDER);
1980 err = unix_scm_to_skb(&scm, skb, true);
1984 skb_put(skb, len - data_len);
1985 skb->data_len = data_len;
1987 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1991 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1996 if (sunaddr == NULL)
1999 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
2001 if (IS_ERR(other)) {
2002 err = PTR_ERR(other);
2008 if (sk_filter(other, skb) < 0) {
2009 /* Toss the packet but do not return any error to the sender */
2015 unix_state_lock(other);
2018 if (!unix_may_send(sk, other))
2021 if (unlikely(sock_flag(other, SOCK_DEAD))) {
2023 * Check with 1003.1g - what should
2026 unix_state_unlock(other);
2030 unix_state_lock(sk);
2033 if (sk->sk_type == SOCK_SEQPACKET) {
2034 /* We are here only when racing with unix_release_sock()
2035 * is clearing @other. Never change state to TCP_CLOSE
2036 * unlike SOCK_DGRAM wants.
2038 unix_state_unlock(sk);
2040 } else if (unix_peer(sk) == other) {
2041 unix_peer(sk) = NULL;
2042 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2044 sk->sk_state = TCP_CLOSE;
2045 unix_state_unlock(sk);
2047 unix_dgram_disconnected(sk, other);
2049 err = -ECONNREFUSED;
2051 unix_state_unlock(sk);
2061 if (other->sk_shutdown & RCV_SHUTDOWN)
2064 if (sk->sk_type != SOCK_SEQPACKET) {
2065 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2070 /* other == sk && unix_peer(other) != sk if
2071 * - unix_peer(sk) == NULL, destination address bound to sk
2072 * - unix_peer(sk) == sk by time of get but disconnected before lock
2075 unlikely(unix_peer(other) != sk &&
2076 unix_recvq_full_lockless(other))) {
2078 timeo = unix_wait_for_peer(other, timeo);
2080 err = sock_intr_errno(timeo);
2081 if (signal_pending(current))
2088 unix_state_unlock(other);
2089 unix_state_double_lock(sk, other);
2092 if (unix_peer(sk) != other ||
2093 unix_dgram_peer_wake_me(sk, other)) {
2101 goto restart_locked;
2105 if (unlikely(sk_locked))
2106 unix_state_unlock(sk);
2108 if (sock_flag(other, SOCK_RCVTSTAMP))
2109 __net_timestamp(skb);
2110 maybe_add_creds(skb, sock, other);
2111 scm_stat_add(other, skb);
2112 skb_queue_tail(&other->sk_receive_queue, skb);
2113 unix_state_unlock(other);
2114 other->sk_data_ready(other);
2121 unix_state_unlock(sk);
2122 unix_state_unlock(other);
2132 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2133 * bytes, and a minimum of a full page.
2135 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2137 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2138 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2139 struct scm_cookie *scm, bool fds_sent)
2141 struct unix_sock *ousk = unix_sk(other);
2142 struct sk_buff *skb;
2145 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2150 err = unix_scm_to_skb(scm, skb, !fds_sent);
2156 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2163 unix_state_lock(other);
2165 if (sock_flag(other, SOCK_DEAD) ||
2166 (other->sk_shutdown & RCV_SHUTDOWN)) {
2167 unix_state_unlock(other);
2172 maybe_add_creds(skb, sock, other);
2176 consume_skb(ousk->oob_skb);
2178 WRITE_ONCE(ousk->oob_skb, skb);
2180 scm_stat_add(other, skb);
2181 skb_queue_tail(&other->sk_receive_queue, skb);
2182 sk_send_sigurg(other);
2183 unix_state_unlock(other);
2184 other->sk_data_ready(other);
2190 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2193 struct sock *sk = sock->sk;
2194 struct sock *other = NULL;
2196 struct sk_buff *skb;
2198 struct scm_cookie scm;
2199 bool fds_sent = false;
2203 err = scm_send(sock, msg, &scm, false);
2208 if (msg->msg_flags & MSG_OOB) {
2209 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2217 if (msg->msg_namelen) {
2218 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2222 other = unix_peer(sk);
2227 if (sk->sk_shutdown & SEND_SHUTDOWN)
2230 while (sent < len) {
2233 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2234 skb = sock_alloc_send_pskb(sk, 0, 0,
2235 msg->msg_flags & MSG_DONTWAIT,
2238 /* Keep two messages in the pipe so it schedules better */
2239 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2241 /* allow fallback to order-0 allocations */
2242 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2244 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2246 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2248 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2249 msg->msg_flags & MSG_DONTWAIT, &err,
2250 get_order(UNIX_SKB_FRAGS_SZ));
2255 /* Only send the fds in the first buffer */
2256 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2263 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2264 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2271 refcount_add(size, &sk->sk_wmem_alloc);
2273 skb_put(skb, size - data_len);
2274 skb->data_len = data_len;
2276 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2283 unix_state_lock(other);
2285 if (sock_flag(other, SOCK_DEAD) ||
2286 (other->sk_shutdown & RCV_SHUTDOWN))
2289 maybe_add_creds(skb, sock, other);
2290 scm_stat_add(other, skb);
2291 skb_queue_tail(&other->sk_receive_queue, skb);
2292 unix_state_unlock(other);
2293 other->sk_data_ready(other);
2297 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2298 if (msg->msg_flags & MSG_OOB) {
2299 err = queue_oob(sock, msg, other, &scm, fds_sent);
2311 unix_state_unlock(other);
2314 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2315 send_sig(SIGPIPE, current, 0);
2319 return sent ? : err;
2322 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2326 struct sock *sk = sock->sk;
2328 err = sock_error(sk);
2332 if (sk->sk_state != TCP_ESTABLISHED)
2335 if (msg->msg_namelen)
2336 msg->msg_namelen = 0;
2338 return unix_dgram_sendmsg(sock, msg, len);
2341 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2342 size_t size, int flags)
2344 struct sock *sk = sock->sk;
2346 if (sk->sk_state != TCP_ESTABLISHED)
2349 return unix_dgram_recvmsg(sock, msg, size, flags);
2352 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2354 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2357 msg->msg_namelen = addr->len;
2358 memcpy(msg->msg_name, addr->name, addr->len);
2362 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2365 struct scm_cookie scm;
2366 struct socket *sock = sk->sk_socket;
2367 struct unix_sock *u = unix_sk(sk);
2368 struct sk_buff *skb, *last;
2377 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2380 mutex_lock(&u->iolock);
2382 skip = sk_peek_offset(sk, flags);
2383 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2384 &skip, &err, &last);
2386 if (!(flags & MSG_PEEK))
2387 scm_stat_del(sk, skb);
2391 mutex_unlock(&u->iolock);
2396 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2397 &err, &timeo, last));
2399 if (!skb) { /* implies iolock unlocked */
2400 unix_state_lock(sk);
2401 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2402 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2403 (sk->sk_shutdown & RCV_SHUTDOWN))
2405 unix_state_unlock(sk);
2409 if (wq_has_sleeper(&u->peer_wait))
2410 wake_up_interruptible_sync_poll(&u->peer_wait,
2411 EPOLLOUT | EPOLLWRNORM |
2414 if (msg->msg_name) {
2415 unix_copy_addr(msg, skb->sk);
2417 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2422 if (size > skb->len - skip)
2423 size = skb->len - skip;
2424 else if (size < skb->len - skip)
2425 msg->msg_flags |= MSG_TRUNC;
2427 err = skb_copy_datagram_msg(skb, skip, msg, size);
2431 if (sock_flag(sk, SOCK_RCVTSTAMP))
2432 __sock_recv_timestamp(msg, sk, skb);
2434 memset(&scm, 0, sizeof(scm));
2436 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2437 unix_set_secdata(&scm, skb);
2439 if (!(flags & MSG_PEEK)) {
2441 unix_detach_fds(&scm, skb);
2443 sk_peek_offset_bwd(sk, skb->len);
2445 /* It is questionable: on PEEK we could:
2446 - do not return fds - good, but too simple 8)
2447 - return fds, and do not return them on read (old strategy,
2449 - clone fds (I chose it for now, it is the most universal
2452 POSIX 1003.1g does not actually define this clearly
2453 at all. POSIX 1003.1g doesn't define a lot of things
2458 sk_peek_offset_fwd(sk, size);
2461 unix_peek_fds(&scm, skb);
2463 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2465 scm_recv_unix(sock, msg, &scm, flags);
2468 skb_free_datagram(sk, skb);
2469 mutex_unlock(&u->iolock);
2474 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2477 struct sock *sk = sock->sk;
2479 #ifdef CONFIG_BPF_SYSCALL
2480 const struct proto *prot = READ_ONCE(sk->sk_prot);
2482 if (prot != &unix_dgram_proto)
2483 return prot->recvmsg(sk, msg, size, flags, NULL);
2485 return __unix_dgram_recvmsg(sk, msg, size, flags);
2488 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2490 struct unix_sock *u = unix_sk(sk);
2491 struct sk_buff *skb;
2494 mutex_lock(&u->iolock);
2495 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2496 mutex_unlock(&u->iolock);
2500 return recv_actor(sk, skb);
2504 * Sleep until more data has arrived. But check for races..
2506 static long unix_stream_data_wait(struct sock *sk, long timeo,
2507 struct sk_buff *last, unsigned int last_len,
2510 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2511 struct sk_buff *tail;
2514 unix_state_lock(sk);
2517 prepare_to_wait(sk_sleep(sk), &wait, state);
2519 tail = skb_peek_tail(&sk->sk_receive_queue);
2521 (tail && tail->len != last_len) ||
2523 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2524 signal_pending(current) ||
2528 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2529 unix_state_unlock(sk);
2530 timeo = schedule_timeout(timeo);
2531 unix_state_lock(sk);
2533 if (sock_flag(sk, SOCK_DEAD))
2536 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2539 finish_wait(sk_sleep(sk), &wait);
2540 unix_state_unlock(sk);
2544 static unsigned int unix_skb_len(const struct sk_buff *skb)
2546 return skb->len - UNIXCB(skb).consumed;
2549 struct unix_stream_read_state {
2550 int (*recv_actor)(struct sk_buff *, int, int,
2551 struct unix_stream_read_state *);
2552 struct socket *socket;
2554 struct pipe_inode_info *pipe;
2557 unsigned int splice_flags;
2560 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2561 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2563 struct socket *sock = state->socket;
2564 struct sock *sk = sock->sk;
2565 struct unix_sock *u = unix_sk(sk);
2567 struct sk_buff *oob_skb;
2569 mutex_lock(&u->iolock);
2570 unix_state_lock(sk);
2572 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2573 unix_state_unlock(sk);
2574 mutex_unlock(&u->iolock);
2578 oob_skb = u->oob_skb;
2580 if (!(state->flags & MSG_PEEK))
2581 WRITE_ONCE(u->oob_skb, NULL);
2584 unix_state_unlock(sk);
2586 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2588 if (!(state->flags & MSG_PEEK))
2589 UNIXCB(oob_skb).consumed += 1;
2591 consume_skb(oob_skb);
2593 mutex_unlock(&u->iolock);
2598 state->msg->msg_flags |= MSG_OOB;
2602 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2603 int flags, int copied)
2605 struct unix_sock *u = unix_sk(sk);
2607 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2608 skb_unlink(skb, &sk->sk_receive_queue);
2612 if (skb == u->oob_skb) {
2615 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2616 if (!(flags & MSG_PEEK)) {
2617 WRITE_ONCE(u->oob_skb, NULL);
2620 } else if (!(flags & MSG_PEEK)) {
2621 skb_unlink(skb, &sk->sk_receive_queue);
2623 skb = skb_peek(&sk->sk_receive_queue);
2631 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2633 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2636 return unix_read_skb(sk, recv_actor);
2639 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2642 struct scm_cookie scm;
2643 struct socket *sock = state->socket;
2644 struct sock *sk = sock->sk;
2645 struct unix_sock *u = unix_sk(sk);
2647 int flags = state->flags;
2648 int noblock = flags & MSG_DONTWAIT;
2649 bool check_creds = false;
2654 size_t size = state->size;
2655 unsigned int last_len;
2657 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2662 if (unlikely(flags & MSG_OOB)) {
2664 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2665 err = unix_stream_recv_urg(state);
2670 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2671 timeo = sock_rcvtimeo(sk, noblock);
2673 memset(&scm, 0, sizeof(scm));
2675 /* Lock the socket to prevent queue disordering
2676 * while sleeps in memcpy_tomsg
2678 mutex_lock(&u->iolock);
2680 skip = max(sk_peek_offset(sk, flags), 0);
2685 struct sk_buff *skb, *last;
2688 unix_state_lock(sk);
2689 if (sock_flag(sk, SOCK_DEAD)) {
2693 last = skb = skb_peek(&sk->sk_receive_queue);
2694 last_len = last ? last->len : 0;
2696 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2698 skb = manage_oob(skb, sk, flags, copied);
2700 unix_state_unlock(sk);
2709 if (copied >= target)
2713 * POSIX 1003.1g mandates this order.
2716 err = sock_error(sk);
2719 if (sk->sk_shutdown & RCV_SHUTDOWN)
2722 unix_state_unlock(sk);
2728 mutex_unlock(&u->iolock);
2730 timeo = unix_stream_data_wait(sk, timeo, last,
2731 last_len, freezable);
2733 if (signal_pending(current)) {
2734 err = sock_intr_errno(timeo);
2739 mutex_lock(&u->iolock);
2742 unix_state_unlock(sk);
2746 while (skip >= unix_skb_len(skb)) {
2747 skip -= unix_skb_len(skb);
2749 last_len = skb->len;
2750 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2755 unix_state_unlock(sk);
2758 /* Never glue messages from different writers */
2759 if (!unix_skb_scm_eq(skb, &scm))
2761 } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2762 test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2763 /* Copy credentials */
2764 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2765 unix_set_secdata(&scm, skb);
2769 /* Copy address just once */
2770 if (state->msg && state->msg->msg_name) {
2771 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2772 state->msg->msg_name);
2773 unix_copy_addr(state->msg, skb->sk);
2775 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2776 state->msg->msg_name,
2777 &state->msg->msg_namelen);
2782 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2784 chunk = state->recv_actor(skb, skip, chunk, state);
2785 drop_skb = !unix_skb_len(skb);
2786 /* skb is only safe to use if !drop_skb */
2797 /* the skb was touched by a concurrent reader;
2798 * we should not expect anything from this skb
2799 * anymore and assume it invalid - we can be
2800 * sure it was dropped from the socket queue
2802 * let's report a short read
2808 /* Mark read part of skb as used */
2809 if (!(flags & MSG_PEEK)) {
2810 UNIXCB(skb).consumed += chunk;
2812 sk_peek_offset_bwd(sk, chunk);
2814 if (UNIXCB(skb).fp) {
2815 scm_stat_del(sk, skb);
2816 unix_detach_fds(&scm, skb);
2819 if (unix_skb_len(skb))
2822 skb_unlink(skb, &sk->sk_receive_queue);
2828 /* It is questionable, see note in unix_dgram_recvmsg.
2831 unix_peek_fds(&scm, skb);
2833 sk_peek_offset_fwd(sk, chunk);
2840 last_len = skb->len;
2841 unix_state_lock(sk);
2842 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2845 unix_state_unlock(sk);
2850 mutex_unlock(&u->iolock);
2852 scm_recv_unix(sock, state->msg, &scm, flags);
2856 return copied ? : err;
2859 static int unix_stream_read_actor(struct sk_buff *skb,
2860 int skip, int chunk,
2861 struct unix_stream_read_state *state)
2865 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2867 return ret ?: chunk;
2870 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2871 size_t size, int flags)
2873 struct unix_stream_read_state state = {
2874 .recv_actor = unix_stream_read_actor,
2875 .socket = sk->sk_socket,
2881 return unix_stream_read_generic(&state, true);
2884 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2885 size_t size, int flags)
2887 struct unix_stream_read_state state = {
2888 .recv_actor = unix_stream_read_actor,
2895 #ifdef CONFIG_BPF_SYSCALL
2896 struct sock *sk = sock->sk;
2897 const struct proto *prot = READ_ONCE(sk->sk_prot);
2899 if (prot != &unix_stream_proto)
2900 return prot->recvmsg(sk, msg, size, flags, NULL);
2902 return unix_stream_read_generic(&state, true);
2905 static int unix_stream_splice_actor(struct sk_buff *skb,
2906 int skip, int chunk,
2907 struct unix_stream_read_state *state)
2909 return skb_splice_bits(skb, state->socket->sk,
2910 UNIXCB(skb).consumed + skip,
2911 state->pipe, chunk, state->splice_flags);
2914 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2915 struct pipe_inode_info *pipe,
2916 size_t size, unsigned int flags)
2918 struct unix_stream_read_state state = {
2919 .recv_actor = unix_stream_splice_actor,
2923 .splice_flags = flags,
2926 if (unlikely(*ppos))
2929 if (sock->file->f_flags & O_NONBLOCK ||
2930 flags & SPLICE_F_NONBLOCK)
2931 state.flags = MSG_DONTWAIT;
2933 return unix_stream_read_generic(&state, false);
2936 static int unix_shutdown(struct socket *sock, int mode)
2938 struct sock *sk = sock->sk;
2941 if (mode < SHUT_RD || mode > SHUT_RDWR)
2944 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2945 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2946 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2950 unix_state_lock(sk);
2951 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2952 other = unix_peer(sk);
2955 unix_state_unlock(sk);
2956 sk->sk_state_change(sk);
2959 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2962 const struct proto *prot = READ_ONCE(other->sk_prot);
2965 prot->unhash(other);
2966 if (mode&RCV_SHUTDOWN)
2967 peer_mode |= SEND_SHUTDOWN;
2968 if (mode&SEND_SHUTDOWN)
2969 peer_mode |= RCV_SHUTDOWN;
2970 unix_state_lock(other);
2971 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2972 unix_state_unlock(other);
2973 other->sk_state_change(other);
2974 if (peer_mode == SHUTDOWN_MASK)
2975 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2976 else if (peer_mode & RCV_SHUTDOWN)
2977 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2985 long unix_inq_len(struct sock *sk)
2987 struct sk_buff *skb;
2990 if (sk->sk_state == TCP_LISTEN)
2993 spin_lock(&sk->sk_receive_queue.lock);
2994 if (sk->sk_type == SOCK_STREAM ||
2995 sk->sk_type == SOCK_SEQPACKET) {
2996 skb_queue_walk(&sk->sk_receive_queue, skb)
2997 amount += unix_skb_len(skb);
2999 skb = skb_peek(&sk->sk_receive_queue);
3003 spin_unlock(&sk->sk_receive_queue.lock);
3007 EXPORT_SYMBOL_GPL(unix_inq_len);
3009 long unix_outq_len(struct sock *sk)
3011 return sk_wmem_alloc_get(sk);
3013 EXPORT_SYMBOL_GPL(unix_outq_len);
3015 static int unix_open_file(struct sock *sk)
3021 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3024 if (!smp_load_acquire(&unix_sk(sk)->addr))
3027 path = unix_sk(sk)->path;
3033 fd = get_unused_fd_flags(O_CLOEXEC);
3037 f = dentry_open(&path, O_PATH, current_cred());
3051 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3053 struct sock *sk = sock->sk;
3059 amount = unix_outq_len(sk);
3060 err = put_user(amount, (int __user *)arg);
3063 amount = unix_inq_len(sk);
3067 err = put_user(amount, (int __user *)arg);
3070 err = unix_open_file(sk);
3072 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3075 struct sk_buff *skb;
3078 skb = skb_peek(&sk->sk_receive_queue);
3079 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3081 err = put_user(answ, (int __user *)arg);
3092 #ifdef CONFIG_COMPAT
3093 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3095 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3099 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3101 struct sock *sk = sock->sk;
3105 sock_poll_wait(file, sock, wait);
3107 shutdown = READ_ONCE(sk->sk_shutdown);
3109 /* exceptional events? */
3110 if (READ_ONCE(sk->sk_err))
3112 if (shutdown == SHUTDOWN_MASK)
3114 if (shutdown & RCV_SHUTDOWN)
3115 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3118 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3119 mask |= EPOLLIN | EPOLLRDNORM;
3120 if (sk_is_readable(sk))
3121 mask |= EPOLLIN | EPOLLRDNORM;
3122 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3123 if (READ_ONCE(unix_sk(sk)->oob_skb))
3127 /* Connection-based need to check for termination and startup */
3128 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3129 sk->sk_state == TCP_CLOSE)
3133 * we set writable also when the other side has shut down the
3134 * connection. This prevents stuck sockets.
3136 if (unix_writable(sk))
3137 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3142 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3145 struct sock *sk = sock->sk, *other;
3146 unsigned int writable;
3150 sock_poll_wait(file, sock, wait);
3152 shutdown = READ_ONCE(sk->sk_shutdown);
3154 /* exceptional events? */
3155 if (READ_ONCE(sk->sk_err) ||
3156 !skb_queue_empty_lockless(&sk->sk_error_queue))
3158 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3160 if (shutdown & RCV_SHUTDOWN)
3161 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3162 if (shutdown == SHUTDOWN_MASK)
3166 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3167 mask |= EPOLLIN | EPOLLRDNORM;
3168 if (sk_is_readable(sk))
3169 mask |= EPOLLIN | EPOLLRDNORM;
3171 /* Connection-based need to check for termination and startup */
3172 if (sk->sk_type == SOCK_SEQPACKET) {
3173 if (sk->sk_state == TCP_CLOSE)
3175 /* connection hasn't started yet? */
3176 if (sk->sk_state == TCP_SYN_SENT)
3180 /* No write status requested, avoid expensive OUT tests. */
3181 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3184 writable = unix_writable(sk);
3186 unix_state_lock(sk);
3188 other = unix_peer(sk);
3189 if (other && unix_peer(other) != sk &&
3190 unix_recvq_full_lockless(other) &&
3191 unix_dgram_peer_wake_me(sk, other))
3194 unix_state_unlock(sk);
3198 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3200 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3205 #ifdef CONFIG_PROC_FS
3207 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3209 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3210 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3211 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3213 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3215 unsigned long offset = get_offset(*pos);
3216 unsigned long bucket = get_bucket(*pos);
3217 unsigned long count = 0;
3220 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3221 sk; sk = sk_next(sk)) {
3222 if (++count == offset)
3229 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3231 unsigned long bucket = get_bucket(*pos);
3232 struct net *net = seq_file_net(seq);
3235 while (bucket < UNIX_HASH_SIZE) {
3236 spin_lock(&net->unx.table.locks[bucket]);
3238 sk = unix_from_bucket(seq, pos);
3242 spin_unlock(&net->unx.table.locks[bucket]);
3244 *pos = set_bucket_offset(++bucket, 1);
3250 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3253 unsigned long bucket = get_bucket(*pos);
3260 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3262 *pos = set_bucket_offset(++bucket, 1);
3264 return unix_get_first(seq, pos);
3267 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3270 return SEQ_START_TOKEN;
3272 return unix_get_first(seq, pos);
3275 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3279 if (v == SEQ_START_TOKEN)
3280 return unix_get_first(seq, pos);
3282 return unix_get_next(seq, v, pos);
3285 static void unix_seq_stop(struct seq_file *seq, void *v)
3287 struct sock *sk = v;
3290 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3293 static int unix_seq_show(struct seq_file *seq, void *v)
3296 if (v == SEQ_START_TOKEN)
3297 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3301 struct unix_sock *u = unix_sk(s);
3304 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3306 refcount_read(&s->sk_refcnt),
3308 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3311 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3312 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3315 if (u->addr) { // under a hash table lock here
3320 len = u->addr->len -
3321 offsetof(struct sockaddr_un, sun_path);
3322 if (u->addr->name->sun_path[0]) {
3328 for ( ; i < len; i++)
3329 seq_putc(seq, u->addr->name->sun_path[i] ?:
3332 unix_state_unlock(s);
3333 seq_putc(seq, '\n');
3339 static const struct seq_operations unix_seq_ops = {
3340 .start = unix_seq_start,
3341 .next = unix_seq_next,
3342 .stop = unix_seq_stop,
3343 .show = unix_seq_show,
3346 #ifdef CONFIG_BPF_SYSCALL
3347 struct bpf_unix_iter_state {
3348 struct seq_net_private p;
3349 unsigned int cur_sk;
3350 unsigned int end_sk;
3351 unsigned int max_sk;
3352 struct sock **batch;
3353 bool st_bucket_done;
3356 struct bpf_iter__unix {
3357 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3358 __bpf_md_ptr(struct unix_sock *, unix_sk);
3359 uid_t uid __aligned(8);
3362 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3363 struct unix_sock *unix_sk, uid_t uid)
3365 struct bpf_iter__unix ctx;
3367 meta->seq_num--; /* skip SEQ_START_TOKEN */
3369 ctx.unix_sk = unix_sk;
3371 return bpf_iter_run_prog(prog, &ctx);
3374 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3377 struct bpf_unix_iter_state *iter = seq->private;
3378 unsigned int expected = 1;
3381 sock_hold(start_sk);
3382 iter->batch[iter->end_sk++] = start_sk;
3384 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3385 if (iter->end_sk < iter->max_sk) {
3387 iter->batch[iter->end_sk++] = sk;
3393 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3398 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3400 while (iter->cur_sk < iter->end_sk)
3401 sock_put(iter->batch[iter->cur_sk++]);
3404 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3405 unsigned int new_batch_sz)
3407 struct sock **new_batch;
3409 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3410 GFP_USER | __GFP_NOWARN);
3414 bpf_iter_unix_put_batch(iter);
3415 kvfree(iter->batch);
3416 iter->batch = new_batch;
3417 iter->max_sk = new_batch_sz;
3422 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3425 struct bpf_unix_iter_state *iter = seq->private;
3426 unsigned int expected;
3427 bool resized = false;
3430 if (iter->st_bucket_done)
3431 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3434 /* Get a new batch */
3438 sk = unix_get_first(seq, pos);
3440 return NULL; /* Done */
3442 expected = bpf_iter_unix_hold_batch(seq, sk);
3444 if (iter->end_sk == expected) {
3445 iter->st_bucket_done = true;
3449 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3457 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3460 return SEQ_START_TOKEN;
3462 /* bpf iter does not support lseek, so it always
3463 * continue from where it was stop()-ped.
3465 return bpf_iter_unix_batch(seq, pos);
3468 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3470 struct bpf_unix_iter_state *iter = seq->private;
3473 /* Whenever seq_next() is called, the iter->cur_sk is
3474 * done with seq_show(), so advance to the next sk in
3477 if (iter->cur_sk < iter->end_sk)
3478 sock_put(iter->batch[iter->cur_sk++]);
3482 if (iter->cur_sk < iter->end_sk)
3483 sk = iter->batch[iter->cur_sk];
3485 sk = bpf_iter_unix_batch(seq, pos);
3490 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3492 struct bpf_iter_meta meta;
3493 struct bpf_prog *prog;
3494 struct sock *sk = v;
3499 if (v == SEQ_START_TOKEN)
3502 slow = lock_sock_fast(sk);
3504 if (unlikely(sk_unhashed(sk))) {
3509 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3511 prog = bpf_iter_get_info(&meta, false);
3512 ret = unix_prog_seq_show(prog, &meta, v, uid);
3514 unlock_sock_fast(sk, slow);
3518 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3520 struct bpf_unix_iter_state *iter = seq->private;
3521 struct bpf_iter_meta meta;
3522 struct bpf_prog *prog;
3526 prog = bpf_iter_get_info(&meta, true);
3528 (void)unix_prog_seq_show(prog, &meta, v, 0);
3531 if (iter->cur_sk < iter->end_sk)
3532 bpf_iter_unix_put_batch(iter);
3535 static const struct seq_operations bpf_iter_unix_seq_ops = {
3536 .start = bpf_iter_unix_seq_start,
3537 .next = bpf_iter_unix_seq_next,
3538 .stop = bpf_iter_unix_seq_stop,
3539 .show = bpf_iter_unix_seq_show,
3544 static const struct net_proto_family unix_family_ops = {
3546 .create = unix_create,
3547 .owner = THIS_MODULE,
3551 static int __net_init unix_net_init(struct net *net)
3555 net->unx.sysctl_max_dgram_qlen = 10;
3556 if (unix_sysctl_register(net))
3559 #ifdef CONFIG_PROC_FS
3560 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3561 sizeof(struct seq_net_private)))
3565 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3566 sizeof(spinlock_t), GFP_KERNEL);
3567 if (!net->unx.table.locks)
3570 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3571 sizeof(struct hlist_head),
3573 if (!net->unx.table.buckets)
3576 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3577 spin_lock_init(&net->unx.table.locks[i]);
3578 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3584 kvfree(net->unx.table.locks);
3586 #ifdef CONFIG_PROC_FS
3587 remove_proc_entry("unix", net->proc_net);
3590 unix_sysctl_unregister(net);
3595 static void __net_exit unix_net_exit(struct net *net)
3597 kvfree(net->unx.table.buckets);
3598 kvfree(net->unx.table.locks);
3599 unix_sysctl_unregister(net);
3600 remove_proc_entry("unix", net->proc_net);
3603 static struct pernet_operations unix_net_ops = {
3604 .init = unix_net_init,
3605 .exit = unix_net_exit,
3608 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3609 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3610 struct unix_sock *unix_sk, uid_t uid)
3612 #define INIT_BATCH_SZ 16
3614 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3616 struct bpf_unix_iter_state *iter = priv_data;
3619 err = bpf_iter_init_seq_net(priv_data, aux);
3623 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3625 bpf_iter_fini_seq_net(priv_data);
3632 static void bpf_iter_fini_unix(void *priv_data)
3634 struct bpf_unix_iter_state *iter = priv_data;
3636 bpf_iter_fini_seq_net(priv_data);
3637 kvfree(iter->batch);
3640 static const struct bpf_iter_seq_info unix_seq_info = {
3641 .seq_ops = &bpf_iter_unix_seq_ops,
3642 .init_seq_private = bpf_iter_init_unix,
3643 .fini_seq_private = bpf_iter_fini_unix,
3644 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3647 static const struct bpf_func_proto *
3648 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3649 const struct bpf_prog *prog)
3652 case BPF_FUNC_setsockopt:
3653 return &bpf_sk_setsockopt_proto;
3654 case BPF_FUNC_getsockopt:
3655 return &bpf_sk_getsockopt_proto;
3661 static struct bpf_iter_reg unix_reg_info = {
3663 .ctx_arg_info_size = 1,
3665 { offsetof(struct bpf_iter__unix, unix_sk),
3666 PTR_TO_BTF_ID_OR_NULL },
3668 .get_func_proto = bpf_iter_unix_get_func_proto,
3669 .seq_info = &unix_seq_info,
3672 static void __init bpf_iter_register(void)
3674 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3675 if (bpf_iter_reg_target(&unix_reg_info))
3676 pr_warn("Warning: could not register bpf iterator unix\n");
3680 static int __init af_unix_init(void)
3684 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3686 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3687 spin_lock_init(&bsd_socket_locks[i]);
3688 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3691 rc = proto_register(&unix_dgram_proto, 1);
3693 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3697 rc = proto_register(&unix_stream_proto, 1);
3699 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3700 proto_unregister(&unix_dgram_proto);
3704 sock_register(&unix_family_ops);
3705 register_pernet_subsys(&unix_net_ops);
3706 unix_bpf_build_proto();
3708 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3709 bpf_iter_register();
3716 /* Later than subsys_initcall() because we depend on stuff initialised there */
3717 fs_initcall(af_unix_init);