1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 #include <linux/bpf-cgroup.h>
123 static atomic_long_t unix_nr_socks;
124 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
125 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
127 /* SMP locking strategy:
128 * hash table is protected with spinlock.
129 * each socket state is protected by separate spinlock.
132 static unsigned int unix_unbound_hash(struct sock *sk)
134 unsigned long hash = (unsigned long)sk;
140 return hash & UNIX_HASH_MOD;
143 static unsigned int unix_bsd_hash(struct inode *i)
145 return i->i_ino & UNIX_HASH_MOD;
148 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
149 int addr_len, int type)
151 __wsum csum = csum_partial(sunaddr, addr_len, 0);
154 hash = (__force unsigned int)csum_fold(csum);
158 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
161 static void unix_table_double_lock(struct net *net,
162 unsigned int hash1, unsigned int hash2)
164 if (hash1 == hash2) {
165 spin_lock(&net->unx.table.locks[hash1]);
172 spin_lock(&net->unx.table.locks[hash1]);
173 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
176 static void unix_table_double_unlock(struct net *net,
177 unsigned int hash1, unsigned int hash2)
179 if (hash1 == hash2) {
180 spin_unlock(&net->unx.table.locks[hash1]);
184 spin_unlock(&net->unx.table.locks[hash1]);
185 spin_unlock(&net->unx.table.locks[hash2]);
188 #ifdef CONFIG_SECURITY_NETWORK
189 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
191 UNIXCB(skb).secid = scm->secid;
194 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
196 scm->secid = UNIXCB(skb).secid;
199 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
201 return (scm->secid == UNIXCB(skb).secid);
204 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
210 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
214 #endif /* CONFIG_SECURITY_NETWORK */
216 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
218 return unix_peer(osk) == sk;
221 static inline int unix_may_send(struct sock *sk, struct sock *osk)
223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
226 static inline int unix_recvq_full(const struct sock *sk)
228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
231 static inline int unix_recvq_full_lockless(const struct sock *sk)
233 return skb_queue_len_lockless(&sk->sk_receive_queue) >
234 READ_ONCE(sk->sk_max_ack_backlog);
237 struct sock *unix_peer_get(struct sock *s)
245 unix_state_unlock(s);
248 EXPORT_SYMBOL_GPL(unix_peer_get);
250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
253 struct unix_address *addr;
255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
259 refcount_set(&addr->refcnt, 1);
260 addr->len = addr_len;
261 memcpy(addr->name, sunaddr, addr_len);
266 static inline void unix_release_addr(struct unix_address *addr)
268 if (refcount_dec_and_test(&addr->refcnt))
273 * Check unix socket name:
274 * - should be not zero length.
275 * - if started by not zero, should be NULL terminated (FS object)
276 * - if started by zero, it is abstract name.
279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
282 addr_len > sizeof(*sunaddr))
285 if (sunaddr->sun_family != AF_UNIX)
291 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
293 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
294 short offset = offsetof(struct sockaddr_storage, __data);
296 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
298 /* This may look like an off by one error but it is a bit more
299 * subtle. 108 is the longest valid AF_UNIX path for a binding.
300 * sun_path[108] doesn't as such exist. However in kernel space
301 * we are guaranteed that it is a valid memory location in our
302 * kernel address buffer because syscall functions always pass
303 * a pointer of struct sockaddr_storage which has a bigger buffer
304 * than 108. Also, we must terminate sun_path for strlen() in
307 addr->__data[addr_len - offset] = 0;
309 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
310 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
311 * know the actual buffer.
313 return strlen(addr->__data) + offset + 1;
316 static void __unix_remove_socket(struct sock *sk)
318 sk_del_node_init(sk);
321 static void __unix_insert_socket(struct net *net, struct sock *sk)
323 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
324 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
327 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
328 struct unix_address *addr, unsigned int hash)
330 __unix_remove_socket(sk);
331 smp_store_release(&unix_sk(sk)->addr, addr);
334 __unix_insert_socket(net, sk);
337 static void unix_remove_socket(struct net *net, struct sock *sk)
339 spin_lock(&net->unx.table.locks[sk->sk_hash]);
340 __unix_remove_socket(sk);
341 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
344 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
346 spin_lock(&net->unx.table.locks[sk->sk_hash]);
347 __unix_insert_socket(net, sk);
348 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
351 static void unix_insert_bsd_socket(struct sock *sk)
353 spin_lock(&bsd_socket_locks[sk->sk_hash]);
354 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
355 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
358 static void unix_remove_bsd_socket(struct sock *sk)
360 if (!hlist_unhashed(&sk->sk_bind_node)) {
361 spin_lock(&bsd_socket_locks[sk->sk_hash]);
362 __sk_del_bind_node(sk);
363 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
365 sk_node_init(&sk->sk_bind_node);
369 static struct sock *__unix_find_socket_byname(struct net *net,
370 struct sockaddr_un *sunname,
371 int len, unsigned int hash)
375 sk_for_each(s, &net->unx.table.buckets[hash]) {
376 struct unix_sock *u = unix_sk(s);
378 if (u->addr->len == len &&
379 !memcmp(u->addr->name, sunname, len))
385 static inline struct sock *unix_find_socket_byname(struct net *net,
386 struct sockaddr_un *sunname,
387 int len, unsigned int hash)
391 spin_lock(&net->unx.table.locks[hash]);
392 s = __unix_find_socket_byname(net, sunname, len, hash);
395 spin_unlock(&net->unx.table.locks[hash]);
399 static struct sock *unix_find_socket_byinode(struct inode *i)
401 unsigned int hash = unix_bsd_hash(i);
404 spin_lock(&bsd_socket_locks[hash]);
405 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
406 struct dentry *dentry = unix_sk(s)->path.dentry;
408 if (dentry && d_backing_inode(dentry) == i) {
410 spin_unlock(&bsd_socket_locks[hash]);
414 spin_unlock(&bsd_socket_locks[hash]);
418 /* Support code for asymmetrically connected dgram sockets
420 * If a datagram socket is connected to a socket not itself connected
421 * to the first socket (eg, /dev/log), clients may only enqueue more
422 * messages if the present receive queue of the server socket is not
423 * "too large". This means there's a second writeability condition
424 * poll and sendmsg need to test. The dgram recv code will do a wake
425 * up on the peer_wait wait queue of a socket upon reception of a
426 * datagram which needs to be propagated to sleeping would-be writers
427 * since these might not have sent anything so far. This can't be
428 * accomplished via poll_wait because the lifetime of the server
429 * socket might be less than that of its clients if these break their
430 * association with it or if the server socket is closed while clients
431 * are still connected to it and there's no way to inform "a polling
432 * implementation" that it should let go of a certain wait queue
434 * In order to propagate a wake up, a wait_queue_entry_t of the client
435 * socket is enqueued on the peer_wait queue of the server socket
436 * whose wake function does a wake_up on the ordinary client socket
437 * wait queue. This connection is established whenever a write (or
438 * poll for write) hit the flow control condition and broken when the
439 * association to the server socket is dissolved or after a wake up
443 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
447 wait_queue_head_t *u_sleep;
449 u = container_of(q, struct unix_sock, peer_wake);
451 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
453 u->peer_wake.private = NULL;
455 /* relaying can only happen while the wq still exists */
456 u_sleep = sk_sleep(&u->sk);
458 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
463 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
465 struct unix_sock *u, *u_other;
469 u_other = unix_sk(other);
471 spin_lock(&u_other->peer_wait.lock);
473 if (!u->peer_wake.private) {
474 u->peer_wake.private = other;
475 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
480 spin_unlock(&u_other->peer_wait.lock);
484 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
487 struct unix_sock *u, *u_other;
490 u_other = unix_sk(other);
491 spin_lock(&u_other->peer_wait.lock);
493 if (u->peer_wake.private == other) {
494 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
495 u->peer_wake.private = NULL;
498 spin_unlock(&u_other->peer_wait.lock);
501 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
504 unix_dgram_peer_wake_disconnect(sk, other);
505 wake_up_interruptible_poll(sk_sleep(sk),
512 * - unix_peer(sk) == other
513 * - association is stable
515 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
519 connected = unix_dgram_peer_wake_connect(sk, other);
521 /* If other is SOCK_DEAD, we want to make sure we signal
522 * POLLOUT, such that a subsequent write() can get a
523 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
524 * to other and its full, we will hang waiting for POLLOUT.
526 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
530 unix_dgram_peer_wake_disconnect(sk, other);
535 static int unix_writable(const struct sock *sk)
537 return sk->sk_state != TCP_LISTEN &&
538 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
541 static void unix_write_space(struct sock *sk)
543 struct socket_wq *wq;
546 if (unix_writable(sk)) {
547 wq = rcu_dereference(sk->sk_wq);
548 if (skwq_has_sleeper(wq))
549 wake_up_interruptible_sync_poll(&wq->wait,
550 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
551 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
556 /* When dgram socket disconnects (or changes its peer), we clear its receive
557 * queue of packets arrived from previous peer. First, it allows to do
558 * flow control based only on wmem_alloc; second, sk connected to peer
559 * may receive messages only from that peer. */
560 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
562 if (!skb_queue_empty(&sk->sk_receive_queue)) {
563 skb_queue_purge(&sk->sk_receive_queue);
564 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
566 /* If one link of bidirectional dgram pipe is disconnected,
567 * we signal error. Messages are lost. Do not make this,
568 * when peer was not connected to us.
570 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
571 WRITE_ONCE(other->sk_err, ECONNRESET);
572 sk_error_report(other);
575 other->sk_state = TCP_CLOSE;
578 static void unix_sock_destructor(struct sock *sk)
580 struct unix_sock *u = unix_sk(sk);
582 skb_queue_purge(&sk->sk_receive_queue);
584 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
585 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
586 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
587 if (!sock_flag(sk, SOCK_DEAD)) {
588 pr_info("Attempt to release alive unix socket: %p\n", sk);
593 unix_release_addr(u->addr);
595 atomic_long_dec(&unix_nr_socks);
596 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
597 #ifdef UNIX_REFCNT_DEBUG
598 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
599 atomic_long_read(&unix_nr_socks));
603 static void unix_release_sock(struct sock *sk, int embrion)
605 struct unix_sock *u = unix_sk(sk);
611 unix_remove_socket(sock_net(sk), sk);
612 unix_remove_bsd_socket(sk);
617 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
619 u->path.dentry = NULL;
621 state = sk->sk_state;
622 sk->sk_state = TCP_CLOSE;
624 skpair = unix_peer(sk);
625 unix_peer(sk) = NULL;
627 unix_state_unlock(sk);
629 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
631 kfree_skb(u->oob_skb);
636 wake_up_interruptible_all(&u->peer_wait);
638 if (skpair != NULL) {
639 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
640 unix_state_lock(skpair);
642 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
643 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
644 WRITE_ONCE(skpair->sk_err, ECONNRESET);
645 unix_state_unlock(skpair);
646 skpair->sk_state_change(skpair);
647 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
650 unix_dgram_peer_wake_disconnect(sk, skpair);
651 sock_put(skpair); /* It may now die */
654 /* Try to flush out this socket. Throw out buffers at least */
656 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
657 if (state == TCP_LISTEN)
658 unix_release_sock(skb->sk, 1);
659 /* passed fds are erased in the kfree_skb hook */
660 UNIXCB(skb).consumed = skb->len;
669 /* ---- Socket is dead now and most probably destroyed ---- */
672 * Fixme: BSD difference: In BSD all sockets connected to us get
673 * ECONNRESET and we die on the spot. In Linux we behave
674 * like files and pipes do and wait for the last
677 * Can't we simply set sock->err?
679 * What the above comment does talk about? --ANK(980817)
682 if (READ_ONCE(unix_tot_inflight))
683 unix_gc(); /* Garbage collect fds */
686 static void init_peercred(struct sock *sk)
688 const struct cred *old_cred;
691 spin_lock(&sk->sk_peer_lock);
692 old_pid = sk->sk_peer_pid;
693 old_cred = sk->sk_peer_cred;
694 sk->sk_peer_pid = get_pid(task_tgid(current));
695 sk->sk_peer_cred = get_current_cred();
696 spin_unlock(&sk->sk_peer_lock);
702 static void copy_peercred(struct sock *sk, struct sock *peersk)
704 const struct cred *old_cred;
708 spin_lock(&sk->sk_peer_lock);
709 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
711 spin_lock(&peersk->sk_peer_lock);
712 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
714 old_pid = sk->sk_peer_pid;
715 old_cred = sk->sk_peer_cred;
716 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
717 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
719 spin_unlock(&sk->sk_peer_lock);
720 spin_unlock(&peersk->sk_peer_lock);
726 static int unix_listen(struct socket *sock, int backlog)
729 struct sock *sk = sock->sk;
730 struct unix_sock *u = unix_sk(sk);
733 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
734 goto out; /* Only stream/seqpacket sockets accept */
737 goto out; /* No listens on an unbound socket */
739 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
741 if (backlog > sk->sk_max_ack_backlog)
742 wake_up_interruptible_all(&u->peer_wait);
743 sk->sk_max_ack_backlog = backlog;
744 sk->sk_state = TCP_LISTEN;
745 /* set credentials so connect can copy them */
750 unix_state_unlock(sk);
755 static int unix_release(struct socket *);
756 static int unix_bind(struct socket *, struct sockaddr *, int);
757 static int unix_stream_connect(struct socket *, struct sockaddr *,
758 int addr_len, int flags);
759 static int unix_socketpair(struct socket *, struct socket *);
760 static int unix_accept(struct socket *, struct socket *, int, bool);
761 static int unix_getname(struct socket *, struct sockaddr *, int);
762 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
763 static __poll_t unix_dgram_poll(struct file *, struct socket *,
765 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
767 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
769 static int unix_shutdown(struct socket *, int);
770 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
771 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
772 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
773 struct pipe_inode_info *, size_t size,
775 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
776 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
777 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
778 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
779 static int unix_dgram_connect(struct socket *, struct sockaddr *,
781 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
782 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
785 #ifdef CONFIG_PROC_FS
786 static int unix_count_nr_fds(struct sock *sk)
792 spin_lock(&sk->sk_receive_queue.lock);
793 skb = skb_peek(&sk->sk_receive_queue);
795 u = unix_sk(skb->sk);
796 nr_fds += atomic_read(&u->scm_stat.nr_fds);
797 skb = skb_peek_next(skb, &sk->sk_receive_queue);
799 spin_unlock(&sk->sk_receive_queue.lock);
804 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
806 struct sock *sk = sock->sk;
807 unsigned char s_state;
812 s_state = READ_ONCE(sk->sk_state);
815 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
816 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
817 * SOCK_DGRAM is ordinary. So, no lock is needed.
819 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
820 nr_fds = atomic_read(&u->scm_stat.nr_fds);
821 else if (s_state == TCP_LISTEN)
822 nr_fds = unix_count_nr_fds(sk);
824 seq_printf(m, "scm_fds: %u\n", nr_fds);
828 #define unix_show_fdinfo NULL
831 static const struct proto_ops unix_stream_ops = {
833 .owner = THIS_MODULE,
834 .release = unix_release,
836 .connect = unix_stream_connect,
837 .socketpair = unix_socketpair,
838 .accept = unix_accept,
839 .getname = unix_getname,
843 .compat_ioctl = unix_compat_ioctl,
845 .listen = unix_listen,
846 .shutdown = unix_shutdown,
847 .sendmsg = unix_stream_sendmsg,
848 .recvmsg = unix_stream_recvmsg,
849 .read_skb = unix_stream_read_skb,
850 .mmap = sock_no_mmap,
851 .splice_read = unix_stream_splice_read,
852 .set_peek_off = sk_set_peek_off,
853 .show_fdinfo = unix_show_fdinfo,
856 static const struct proto_ops unix_dgram_ops = {
858 .owner = THIS_MODULE,
859 .release = unix_release,
861 .connect = unix_dgram_connect,
862 .socketpair = unix_socketpair,
863 .accept = sock_no_accept,
864 .getname = unix_getname,
865 .poll = unix_dgram_poll,
868 .compat_ioctl = unix_compat_ioctl,
870 .listen = sock_no_listen,
871 .shutdown = unix_shutdown,
872 .sendmsg = unix_dgram_sendmsg,
873 .read_skb = unix_read_skb,
874 .recvmsg = unix_dgram_recvmsg,
875 .mmap = sock_no_mmap,
876 .set_peek_off = sk_set_peek_off,
877 .show_fdinfo = unix_show_fdinfo,
880 static const struct proto_ops unix_seqpacket_ops = {
882 .owner = THIS_MODULE,
883 .release = unix_release,
885 .connect = unix_stream_connect,
886 .socketpair = unix_socketpair,
887 .accept = unix_accept,
888 .getname = unix_getname,
889 .poll = unix_dgram_poll,
892 .compat_ioctl = unix_compat_ioctl,
894 .listen = unix_listen,
895 .shutdown = unix_shutdown,
896 .sendmsg = unix_seqpacket_sendmsg,
897 .recvmsg = unix_seqpacket_recvmsg,
898 .mmap = sock_no_mmap,
899 .set_peek_off = sk_set_peek_off,
900 .show_fdinfo = unix_show_fdinfo,
903 static void unix_close(struct sock *sk, long timeout)
905 /* Nothing to do here, unix socket does not need a ->close().
906 * This is merely for sockmap.
910 static void unix_unhash(struct sock *sk)
912 /* Nothing to do here, unix socket does not need a ->unhash().
913 * This is merely for sockmap.
917 static bool unix_bpf_bypass_getsockopt(int level, int optname)
919 if (level == SOL_SOCKET) {
931 struct proto unix_dgram_proto = {
933 .owner = THIS_MODULE,
934 .obj_size = sizeof(struct unix_sock),
936 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
937 #ifdef CONFIG_BPF_SYSCALL
938 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
942 struct proto unix_stream_proto = {
943 .name = "UNIX-STREAM",
944 .owner = THIS_MODULE,
945 .obj_size = sizeof(struct unix_sock),
947 .unhash = unix_unhash,
948 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
949 #ifdef CONFIG_BPF_SYSCALL
950 .psock_update_sk_prot = unix_stream_bpf_update_proto,
954 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
960 atomic_long_inc(&unix_nr_socks);
961 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
966 if (type == SOCK_STREAM)
967 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
968 else /*dgram and seqpacket */
969 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
976 sock_init_data(sock, sk);
978 sk->sk_hash = unix_unbound_hash(sk);
979 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
980 sk->sk_write_space = unix_write_space;
981 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
982 sk->sk_destruct = unix_sock_destructor;
985 u->path.dentry = NULL;
987 spin_lock_init(&u->lock);
988 INIT_LIST_HEAD(&u->link);
989 mutex_init(&u->iolock); /* single task reading lock */
990 mutex_init(&u->bindlock); /* single task binding lock */
991 init_waitqueue_head(&u->peer_wait);
992 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
993 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
994 unix_insert_unbound_socket(net, sk);
996 sock_prot_inuse_add(net, sk->sk_prot, 1);
1001 atomic_long_dec(&unix_nr_socks);
1002 return ERR_PTR(err);
1005 static int unix_create(struct net *net, struct socket *sock, int protocol,
1010 if (protocol && protocol != PF_UNIX)
1011 return -EPROTONOSUPPORT;
1013 sock->state = SS_UNCONNECTED;
1015 switch (sock->type) {
1017 sock->ops = &unix_stream_ops;
1020 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1024 sock->type = SOCK_DGRAM;
1027 sock->ops = &unix_dgram_ops;
1029 case SOCK_SEQPACKET:
1030 sock->ops = &unix_seqpacket_ops;
1033 return -ESOCKTNOSUPPORT;
1036 sk = unix_create1(net, sock, kern, sock->type);
1043 static int unix_release(struct socket *sock)
1045 struct sock *sk = sock->sk;
1050 sk->sk_prot->close(sk, 0);
1051 unix_release_sock(sk, 0);
1057 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1060 struct inode *inode;
1065 unix_mkname_bsd(sunaddr, addr_len);
1066 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1070 err = path_permission(&path, MAY_WRITE);
1074 err = -ECONNREFUSED;
1075 inode = d_backing_inode(path.dentry);
1076 if (!S_ISSOCK(inode->i_mode))
1079 sk = unix_find_socket_byinode(inode);
1084 if (sk->sk_type == type)
1098 return ERR_PTR(err);
1101 static struct sock *unix_find_abstract(struct net *net,
1102 struct sockaddr_un *sunaddr,
1103 int addr_len, int type)
1105 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1106 struct dentry *dentry;
1109 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1111 return ERR_PTR(-ECONNREFUSED);
1113 dentry = unix_sk(sk)->path.dentry;
1115 touch_atime(&unix_sk(sk)->path);
1120 static struct sock *unix_find_other(struct net *net,
1121 struct sockaddr_un *sunaddr,
1122 int addr_len, int type)
1126 if (sunaddr->sun_path[0])
1127 sk = unix_find_bsd(sunaddr, addr_len, type);
1129 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1134 static int unix_autobind(struct sock *sk)
1136 unsigned int new_hash, old_hash = sk->sk_hash;
1137 struct unix_sock *u = unix_sk(sk);
1138 struct net *net = sock_net(sk);
1139 struct unix_address *addr;
1140 u32 lastnum, ordernum;
1143 err = mutex_lock_interruptible(&u->bindlock);
1151 addr = kzalloc(sizeof(*addr) +
1152 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1156 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1157 addr->name->sun_family = AF_UNIX;
1158 refcount_set(&addr->refcnt, 1);
1160 ordernum = get_random_u32();
1161 lastnum = ordernum & 0xFFFFF;
1163 ordernum = (ordernum + 1) & 0xFFFFF;
1164 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1166 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1167 unix_table_double_lock(net, old_hash, new_hash);
1169 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1170 unix_table_double_unlock(net, old_hash, new_hash);
1172 /* __unix_find_socket_byname() may take long time if many names
1173 * are already in use.
1177 if (ordernum == lastnum) {
1178 /* Give up if all names seems to be in use. */
1180 unix_release_addr(addr);
1187 __unix_set_addr_hash(net, sk, addr, new_hash);
1188 unix_table_double_unlock(net, old_hash, new_hash);
1191 out: mutex_unlock(&u->bindlock);
1195 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1198 umode_t mode = S_IFSOCK |
1199 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1200 unsigned int new_hash, old_hash = sk->sk_hash;
1201 struct unix_sock *u = unix_sk(sk);
1202 struct net *net = sock_net(sk);
1203 struct mnt_idmap *idmap;
1204 struct unix_address *addr;
1205 struct dentry *dentry;
1209 addr_len = unix_mkname_bsd(sunaddr, addr_len);
1210 addr = unix_create_addr(sunaddr, addr_len);
1215 * Get the parent directory, calculate the hash for last
1218 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1219 if (IS_ERR(dentry)) {
1220 err = PTR_ERR(dentry);
1225 * All right, let's create it.
1227 idmap = mnt_idmap(parent.mnt);
1228 err = security_path_mknod(&parent, dentry, mode, 0);
1230 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1233 err = mutex_lock_interruptible(&u->bindlock);
1239 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1240 unix_table_double_lock(net, old_hash, new_hash);
1241 u->path.mnt = mntget(parent.mnt);
1242 u->path.dentry = dget(dentry);
1243 __unix_set_addr_hash(net, sk, addr, new_hash);
1244 unix_table_double_unlock(net, old_hash, new_hash);
1245 unix_insert_bsd_socket(sk);
1246 mutex_unlock(&u->bindlock);
1247 done_path_create(&parent, dentry);
1251 mutex_unlock(&u->bindlock);
1254 /* failed after successful mknod? unlink what we'd created... */
1255 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1257 done_path_create(&parent, dentry);
1259 unix_release_addr(addr);
1260 return err == -EEXIST ? -EADDRINUSE : err;
1263 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1266 unsigned int new_hash, old_hash = sk->sk_hash;
1267 struct unix_sock *u = unix_sk(sk);
1268 struct net *net = sock_net(sk);
1269 struct unix_address *addr;
1272 addr = unix_create_addr(sunaddr, addr_len);
1276 err = mutex_lock_interruptible(&u->bindlock);
1285 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1286 unix_table_double_lock(net, old_hash, new_hash);
1288 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1291 __unix_set_addr_hash(net, sk, addr, new_hash);
1292 unix_table_double_unlock(net, old_hash, new_hash);
1293 mutex_unlock(&u->bindlock);
1297 unix_table_double_unlock(net, old_hash, new_hash);
1300 mutex_unlock(&u->bindlock);
1302 unix_release_addr(addr);
1306 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1308 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1309 struct sock *sk = sock->sk;
1312 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1313 sunaddr->sun_family == AF_UNIX)
1314 return unix_autobind(sk);
1316 err = unix_validate_addr(sunaddr, addr_len);
1320 if (sunaddr->sun_path[0])
1321 err = unix_bind_bsd(sk, sunaddr, addr_len);
1323 err = unix_bind_abstract(sk, sunaddr, addr_len);
1328 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1330 if (unlikely(sk1 == sk2) || !sk2) {
1331 unix_state_lock(sk1);
1337 unix_state_lock(sk1);
1338 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1341 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1343 if (unlikely(sk1 == sk2) || !sk2) {
1344 unix_state_unlock(sk1);
1347 unix_state_unlock(sk1);
1348 unix_state_unlock(sk2);
1351 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1352 int alen, int flags)
1354 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1355 struct sock *sk = sock->sk;
1360 if (alen < offsetofend(struct sockaddr, sa_family))
1363 if (addr->sa_family != AF_UNSPEC) {
1364 err = unix_validate_addr(sunaddr, alen);
1368 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1372 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1373 test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1374 !unix_sk(sk)->addr) {
1375 err = unix_autobind(sk);
1381 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1382 if (IS_ERR(other)) {
1383 err = PTR_ERR(other);
1387 unix_state_double_lock(sk, other);
1389 /* Apparently VFS overslept socket death. Retry. */
1390 if (sock_flag(other, SOCK_DEAD)) {
1391 unix_state_double_unlock(sk, other);
1397 if (!unix_may_send(sk, other))
1400 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1404 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1407 * 1003.1g breaking connected state with AF_UNSPEC
1410 unix_state_double_lock(sk, other);
1414 * If it was connected, reconnect.
1416 if (unix_peer(sk)) {
1417 struct sock *old_peer = unix_peer(sk);
1419 unix_peer(sk) = other;
1421 sk->sk_state = TCP_CLOSE;
1422 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1424 unix_state_double_unlock(sk, other);
1426 if (other != old_peer)
1427 unix_dgram_disconnected(sk, old_peer);
1430 unix_peer(sk) = other;
1431 unix_state_double_unlock(sk, other);
1437 unix_state_double_unlock(sk, other);
1443 static long unix_wait_for_peer(struct sock *other, long timeo)
1444 __releases(&unix_sk(other)->lock)
1446 struct unix_sock *u = unix_sk(other);
1450 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1452 sched = !sock_flag(other, SOCK_DEAD) &&
1453 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1454 unix_recvq_full_lockless(other);
1456 unix_state_unlock(other);
1459 timeo = schedule_timeout(timeo);
1461 finish_wait(&u->peer_wait, &wait);
1465 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1466 int addr_len, int flags)
1468 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1469 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1470 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1471 struct net *net = sock_net(sk);
1472 struct sk_buff *skb = NULL;
1477 err = unix_validate_addr(sunaddr, addr_len);
1481 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1485 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1486 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1487 err = unix_autobind(sk);
1492 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1494 /* First of all allocate resources.
1495 If we will make it after state is locked,
1496 we will have to recheck all again in any case.
1499 /* create new sock for complete connection */
1500 newsk = unix_create1(net, NULL, 0, sock->type);
1501 if (IS_ERR(newsk)) {
1502 err = PTR_ERR(newsk);
1509 /* Allocate skb for sending to listening sock */
1510 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1515 /* Find listening sock. */
1516 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1517 if (IS_ERR(other)) {
1518 err = PTR_ERR(other);
1523 /* Latch state of peer */
1524 unix_state_lock(other);
1526 /* Apparently VFS overslept socket death. Retry. */
1527 if (sock_flag(other, SOCK_DEAD)) {
1528 unix_state_unlock(other);
1533 err = -ECONNREFUSED;
1534 if (other->sk_state != TCP_LISTEN)
1536 if (other->sk_shutdown & RCV_SHUTDOWN)
1539 if (unix_recvq_full(other)) {
1544 timeo = unix_wait_for_peer(other, timeo);
1546 err = sock_intr_errno(timeo);
1547 if (signal_pending(current))
1555 It is tricky place. We need to grab our state lock and cannot
1556 drop lock on peer. It is dangerous because deadlock is
1557 possible. Connect to self case and simultaneous
1558 attempt to connect are eliminated by checking socket
1559 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1560 check this before attempt to grab lock.
1562 Well, and we have to recheck the state after socket locked.
1568 /* This is ok... continue with connect */
1570 case TCP_ESTABLISHED:
1571 /* Socket is already connected */
1579 unix_state_lock_nested(sk, U_LOCK_SECOND);
1581 if (sk->sk_state != st) {
1582 unix_state_unlock(sk);
1583 unix_state_unlock(other);
1588 err = security_unix_stream_connect(sk, other, newsk);
1590 unix_state_unlock(sk);
1594 /* The way is open! Fastly set all the necessary fields... */
1597 unix_peer(newsk) = sk;
1598 newsk->sk_state = TCP_ESTABLISHED;
1599 newsk->sk_type = sk->sk_type;
1600 init_peercred(newsk);
1601 newu = unix_sk(newsk);
1602 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1603 otheru = unix_sk(other);
1605 /* copy address information from listening to new sock
1607 * The contents of *(otheru->addr) and otheru->path
1608 * are seen fully set up here, since we have found
1609 * otheru in hash under its lock. Insertion into the
1610 * hash chain we'd found it in had been done in an
1611 * earlier critical area protected by the chain's lock,
1612 * the same one where we'd set *(otheru->addr) contents,
1613 * as well as otheru->path and otheru->addr itself.
1615 * Using smp_store_release() here to set newu->addr
1616 * is enough to make those stores, as well as stores
1617 * to newu->path visible to anyone who gets newu->addr
1618 * by smp_load_acquire(). IOW, the same warranties
1619 * as for unix_sock instances bound in unix_bind() or
1620 * in unix_autobind().
1622 if (otheru->path.dentry) {
1623 path_get(&otheru->path);
1624 newu->path = otheru->path;
1626 refcount_inc(&otheru->addr->refcnt);
1627 smp_store_release(&newu->addr, otheru->addr);
1629 /* Set credentials */
1630 copy_peercred(sk, other);
1632 sock->state = SS_CONNECTED;
1633 sk->sk_state = TCP_ESTABLISHED;
1636 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1637 unix_peer(sk) = newsk;
1639 unix_state_unlock(sk);
1641 /* take ten and send info to listening sock */
1642 spin_lock(&other->sk_receive_queue.lock);
1643 __skb_queue_tail(&other->sk_receive_queue, skb);
1644 spin_unlock(&other->sk_receive_queue.lock);
1645 unix_state_unlock(other);
1646 other->sk_data_ready(other);
1652 unix_state_unlock(other);
1657 unix_release_sock(newsk, 0);
1663 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1665 struct sock *ska = socka->sk, *skb = sockb->sk;
1667 /* Join our sockets back to back */
1670 unix_peer(ska) = skb;
1671 unix_peer(skb) = ska;
1675 ska->sk_state = TCP_ESTABLISHED;
1676 skb->sk_state = TCP_ESTABLISHED;
1677 socka->state = SS_CONNECTED;
1678 sockb->state = SS_CONNECTED;
1682 static void unix_sock_inherit_flags(const struct socket *old,
1685 if (test_bit(SOCK_PASSCRED, &old->flags))
1686 set_bit(SOCK_PASSCRED, &new->flags);
1687 if (test_bit(SOCK_PASSPIDFD, &old->flags))
1688 set_bit(SOCK_PASSPIDFD, &new->flags);
1689 if (test_bit(SOCK_PASSSEC, &old->flags))
1690 set_bit(SOCK_PASSSEC, &new->flags);
1693 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1696 struct sock *sk = sock->sk;
1698 struct sk_buff *skb;
1702 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1706 if (sk->sk_state != TCP_LISTEN)
1709 /* If socket state is TCP_LISTEN it cannot change (for now...),
1710 * so that no locks are necessary.
1713 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1716 /* This means receive shutdown. */
1723 skb_free_datagram(sk, skb);
1724 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1726 /* attach accepted sock to socket */
1727 unix_state_lock(tsk);
1728 newsock->state = SS_CONNECTED;
1729 unix_sock_inherit_flags(sock, newsock);
1730 sock_graft(tsk, newsock);
1731 unix_state_unlock(tsk);
1739 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1741 struct sock *sk = sock->sk;
1742 struct unix_address *addr;
1743 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1747 sk = unix_peer_get(sk);
1757 addr = smp_load_acquire(&unix_sk(sk)->addr);
1759 sunaddr->sun_family = AF_UNIX;
1760 sunaddr->sun_path[0] = 0;
1761 err = offsetof(struct sockaddr_un, sun_path);
1764 memcpy(sunaddr, addr->name, addr->len);
1767 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1768 CGROUP_UNIX_GETPEERNAME);
1770 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1771 CGROUP_UNIX_GETSOCKNAME);
1778 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1780 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1783 * Garbage collection of unix sockets starts by selecting a set of
1784 * candidate sockets which have reference only from being in flight
1785 * (total_refs == inflight_refs). This condition is checked once during
1786 * the candidate collection phase, and candidates are marked as such, so
1787 * that non-candidates can later be ignored. While inflight_refs is
1788 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1789 * is an instantaneous decision.
1791 * Once a candidate, however, the socket must not be reinstalled into a
1792 * file descriptor while the garbage collection is in progress.
1794 * If the above conditions are met, then the directed graph of
1795 * candidates (*) does not change while unix_gc_lock is held.
1797 * Any operations that changes the file count through file descriptors
1798 * (dup, close, sendmsg) does not change the graph since candidates are
1799 * not installed in fds.
1801 * Dequeing a candidate via recvmsg would install it into an fd, but
1802 * that takes unix_gc_lock to decrement the inflight count, so it's
1803 * serialized with garbage collection.
1805 * MSG_PEEK is special in that it does not change the inflight count,
1806 * yet does install the socket into an fd. The following lock/unlock
1807 * pair is to ensure serialization with garbage collection. It must be
1808 * done between incrementing the file count and installing the file into
1811 * If garbage collection starts after the barrier provided by the
1812 * lock/unlock, then it will see the elevated refcount and not mark this
1813 * as a candidate. If a garbage collection is already in progress
1814 * before the file count was incremented, then the lock/unlock pair will
1815 * ensure that garbage collection is finished before progressing to
1816 * installing the fd.
1818 * (*) A -> B where B is on the queue of A or B is on the queue of C
1819 * which is on the queue of listening socket A.
1821 spin_lock(&unix_gc_lock);
1822 spin_unlock(&unix_gc_lock);
1825 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1829 UNIXCB(skb).pid = get_pid(scm->pid);
1830 UNIXCB(skb).uid = scm->creds.uid;
1831 UNIXCB(skb).gid = scm->creds.gid;
1832 UNIXCB(skb).fp = NULL;
1833 unix_get_secdata(scm, skb);
1834 if (scm->fp && send_fds)
1835 err = unix_attach_fds(scm, skb);
1837 skb->destructor = unix_destruct_scm;
1841 static bool unix_passcred_enabled(const struct socket *sock,
1842 const struct sock *other)
1844 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1845 test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1846 !other->sk_socket ||
1847 test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1848 test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1852 * Some apps rely on write() giving SCM_CREDENTIALS
1853 * We include credentials if source or destination socket
1854 * asserted SOCK_PASSCRED.
1856 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1857 const struct sock *other)
1859 if (UNIXCB(skb).pid)
1861 if (unix_passcred_enabled(sock, other)) {
1862 UNIXCB(skb).pid = get_pid(task_tgid(current));
1863 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1867 static bool unix_skb_scm_eq(struct sk_buff *skb,
1868 struct scm_cookie *scm)
1870 return UNIXCB(skb).pid == scm->pid &&
1871 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1872 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1873 unix_secdata_eq(scm, skb);
1876 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1878 struct scm_fp_list *fp = UNIXCB(skb).fp;
1879 struct unix_sock *u = unix_sk(sk);
1881 if (unlikely(fp && fp->count))
1882 atomic_add(fp->count, &u->scm_stat.nr_fds);
1885 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1887 struct scm_fp_list *fp = UNIXCB(skb).fp;
1888 struct unix_sock *u = unix_sk(sk);
1890 if (unlikely(fp && fp->count))
1891 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1895 * Send AF_UNIX data.
1898 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1901 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1902 struct sock *sk = sock->sk, *other = NULL;
1903 struct unix_sock *u = unix_sk(sk);
1904 struct scm_cookie scm;
1905 struct sk_buff *skb;
1912 err = scm_send(sock, msg, &scm, false);
1917 if (msg->msg_flags&MSG_OOB)
1920 if (msg->msg_namelen) {
1921 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1925 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
1934 other = unix_peer_get(sk);
1939 if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1940 test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1941 err = unix_autobind(sk);
1947 if (len > sk->sk_sndbuf - 32)
1950 if (len > SKB_MAX_ALLOC) {
1951 data_len = min_t(size_t,
1952 len - SKB_MAX_ALLOC,
1953 MAX_SKB_FRAGS * PAGE_SIZE);
1954 data_len = PAGE_ALIGN(data_len);
1956 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1959 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1960 msg->msg_flags & MSG_DONTWAIT, &err,
1961 PAGE_ALLOC_COSTLY_ORDER);
1965 err = unix_scm_to_skb(&scm, skb, true);
1969 skb_put(skb, len - data_len);
1970 skb->data_len = data_len;
1972 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1976 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1981 if (sunaddr == NULL)
1984 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1986 if (IS_ERR(other)) {
1987 err = PTR_ERR(other);
1993 if (sk_filter(other, skb) < 0) {
1994 /* Toss the packet but do not return any error to the sender */
2000 unix_state_lock(other);
2003 if (!unix_may_send(sk, other))
2006 if (unlikely(sock_flag(other, SOCK_DEAD))) {
2008 * Check with 1003.1g - what should
2011 unix_state_unlock(other);
2015 unix_state_lock(sk);
2018 if (sk->sk_type == SOCK_SEQPACKET) {
2019 /* We are here only when racing with unix_release_sock()
2020 * is clearing @other. Never change state to TCP_CLOSE
2021 * unlike SOCK_DGRAM wants.
2023 unix_state_unlock(sk);
2025 } else if (unix_peer(sk) == other) {
2026 unix_peer(sk) = NULL;
2027 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2029 sk->sk_state = TCP_CLOSE;
2030 unix_state_unlock(sk);
2032 unix_dgram_disconnected(sk, other);
2034 err = -ECONNREFUSED;
2036 unix_state_unlock(sk);
2046 if (other->sk_shutdown & RCV_SHUTDOWN)
2049 if (sk->sk_type != SOCK_SEQPACKET) {
2050 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2055 /* other == sk && unix_peer(other) != sk if
2056 * - unix_peer(sk) == NULL, destination address bound to sk
2057 * - unix_peer(sk) == sk by time of get but disconnected before lock
2060 unlikely(unix_peer(other) != sk &&
2061 unix_recvq_full_lockless(other))) {
2063 timeo = unix_wait_for_peer(other, timeo);
2065 err = sock_intr_errno(timeo);
2066 if (signal_pending(current))
2073 unix_state_unlock(other);
2074 unix_state_double_lock(sk, other);
2077 if (unix_peer(sk) != other ||
2078 unix_dgram_peer_wake_me(sk, other)) {
2086 goto restart_locked;
2090 if (unlikely(sk_locked))
2091 unix_state_unlock(sk);
2093 if (sock_flag(other, SOCK_RCVTSTAMP))
2094 __net_timestamp(skb);
2095 maybe_add_creds(skb, sock, other);
2096 scm_stat_add(other, skb);
2097 skb_queue_tail(&other->sk_receive_queue, skb);
2098 unix_state_unlock(other);
2099 other->sk_data_ready(other);
2106 unix_state_unlock(sk);
2107 unix_state_unlock(other);
2117 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2118 * bytes, and a minimum of a full page.
2120 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2122 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2123 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2124 struct scm_cookie *scm, bool fds_sent)
2126 struct unix_sock *ousk = unix_sk(other);
2127 struct sk_buff *skb;
2130 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2135 err = unix_scm_to_skb(scm, skb, !fds_sent);
2141 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2148 unix_state_lock(other);
2150 if (sock_flag(other, SOCK_DEAD) ||
2151 (other->sk_shutdown & RCV_SHUTDOWN)) {
2152 unix_state_unlock(other);
2157 maybe_add_creds(skb, sock, other);
2161 consume_skb(ousk->oob_skb);
2163 WRITE_ONCE(ousk->oob_skb, skb);
2165 scm_stat_add(other, skb);
2166 skb_queue_tail(&other->sk_receive_queue, skb);
2167 sk_send_sigurg(other);
2168 unix_state_unlock(other);
2169 other->sk_data_ready(other);
2175 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2178 struct sock *sk = sock->sk;
2179 struct sock *other = NULL;
2181 struct sk_buff *skb;
2183 struct scm_cookie scm;
2184 bool fds_sent = false;
2188 err = scm_send(sock, msg, &scm, false);
2193 if (msg->msg_flags & MSG_OOB) {
2194 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2202 if (msg->msg_namelen) {
2203 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2207 other = unix_peer(sk);
2212 if (sk->sk_shutdown & SEND_SHUTDOWN)
2215 while (sent < len) {
2218 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2219 skb = sock_alloc_send_pskb(sk, 0, 0,
2220 msg->msg_flags & MSG_DONTWAIT,
2223 /* Keep two messages in the pipe so it schedules better */
2224 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2226 /* allow fallback to order-0 allocations */
2227 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2229 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2231 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2233 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2234 msg->msg_flags & MSG_DONTWAIT, &err,
2235 get_order(UNIX_SKB_FRAGS_SZ));
2240 /* Only send the fds in the first buffer */
2241 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2248 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2249 err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2256 refcount_add(size, &sk->sk_wmem_alloc);
2258 skb_put(skb, size - data_len);
2259 skb->data_len = data_len;
2261 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2268 unix_state_lock(other);
2270 if (sock_flag(other, SOCK_DEAD) ||
2271 (other->sk_shutdown & RCV_SHUTDOWN))
2274 maybe_add_creds(skb, sock, other);
2275 scm_stat_add(other, skb);
2276 skb_queue_tail(&other->sk_receive_queue, skb);
2277 unix_state_unlock(other);
2278 other->sk_data_ready(other);
2282 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2283 if (msg->msg_flags & MSG_OOB) {
2284 err = queue_oob(sock, msg, other, &scm, fds_sent);
2296 unix_state_unlock(other);
2299 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2300 send_sig(SIGPIPE, current, 0);
2304 return sent ? : err;
2307 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2311 struct sock *sk = sock->sk;
2313 err = sock_error(sk);
2317 if (sk->sk_state != TCP_ESTABLISHED)
2320 if (msg->msg_namelen)
2321 msg->msg_namelen = 0;
2323 return unix_dgram_sendmsg(sock, msg, len);
2326 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2327 size_t size, int flags)
2329 struct sock *sk = sock->sk;
2331 if (sk->sk_state != TCP_ESTABLISHED)
2334 return unix_dgram_recvmsg(sock, msg, size, flags);
2337 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2339 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2342 msg->msg_namelen = addr->len;
2343 memcpy(msg->msg_name, addr->name, addr->len);
2347 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2350 struct scm_cookie scm;
2351 struct socket *sock = sk->sk_socket;
2352 struct unix_sock *u = unix_sk(sk);
2353 struct sk_buff *skb, *last;
2362 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2365 mutex_lock(&u->iolock);
2367 skip = sk_peek_offset(sk, flags);
2368 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2369 &skip, &err, &last);
2371 if (!(flags & MSG_PEEK))
2372 scm_stat_del(sk, skb);
2376 mutex_unlock(&u->iolock);
2381 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2382 &err, &timeo, last));
2384 if (!skb) { /* implies iolock unlocked */
2385 unix_state_lock(sk);
2386 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2387 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2388 (sk->sk_shutdown & RCV_SHUTDOWN))
2390 unix_state_unlock(sk);
2394 if (wq_has_sleeper(&u->peer_wait))
2395 wake_up_interruptible_sync_poll(&u->peer_wait,
2396 EPOLLOUT | EPOLLWRNORM |
2399 if (msg->msg_name) {
2400 unix_copy_addr(msg, skb->sk);
2402 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2407 if (size > skb->len - skip)
2408 size = skb->len - skip;
2409 else if (size < skb->len - skip)
2410 msg->msg_flags |= MSG_TRUNC;
2412 err = skb_copy_datagram_msg(skb, skip, msg, size);
2416 if (sock_flag(sk, SOCK_RCVTSTAMP))
2417 __sock_recv_timestamp(msg, sk, skb);
2419 memset(&scm, 0, sizeof(scm));
2421 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2422 unix_set_secdata(&scm, skb);
2424 if (!(flags & MSG_PEEK)) {
2426 unix_detach_fds(&scm, skb);
2428 sk_peek_offset_bwd(sk, skb->len);
2430 /* It is questionable: on PEEK we could:
2431 - do not return fds - good, but too simple 8)
2432 - return fds, and do not return them on read (old strategy,
2434 - clone fds (I chose it for now, it is the most universal
2437 POSIX 1003.1g does not actually define this clearly
2438 at all. POSIX 1003.1g doesn't define a lot of things
2443 sk_peek_offset_fwd(sk, size);
2446 unix_peek_fds(&scm, skb);
2448 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2450 scm_recv_unix(sock, msg, &scm, flags);
2453 skb_free_datagram(sk, skb);
2454 mutex_unlock(&u->iolock);
2459 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2462 struct sock *sk = sock->sk;
2464 #ifdef CONFIG_BPF_SYSCALL
2465 const struct proto *prot = READ_ONCE(sk->sk_prot);
2467 if (prot != &unix_dgram_proto)
2468 return prot->recvmsg(sk, msg, size, flags, NULL);
2470 return __unix_dgram_recvmsg(sk, msg, size, flags);
2473 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2475 struct unix_sock *u = unix_sk(sk);
2476 struct sk_buff *skb;
2479 mutex_lock(&u->iolock);
2480 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2481 mutex_unlock(&u->iolock);
2485 return recv_actor(sk, skb);
2489 * Sleep until more data has arrived. But check for races..
2491 static long unix_stream_data_wait(struct sock *sk, long timeo,
2492 struct sk_buff *last, unsigned int last_len,
2495 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2496 struct sk_buff *tail;
2499 unix_state_lock(sk);
2502 prepare_to_wait(sk_sleep(sk), &wait, state);
2504 tail = skb_peek_tail(&sk->sk_receive_queue);
2506 (tail && tail->len != last_len) ||
2508 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2509 signal_pending(current) ||
2513 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2514 unix_state_unlock(sk);
2515 timeo = schedule_timeout(timeo);
2516 unix_state_lock(sk);
2518 if (sock_flag(sk, SOCK_DEAD))
2521 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2524 finish_wait(sk_sleep(sk), &wait);
2525 unix_state_unlock(sk);
2529 static unsigned int unix_skb_len(const struct sk_buff *skb)
2531 return skb->len - UNIXCB(skb).consumed;
2534 struct unix_stream_read_state {
2535 int (*recv_actor)(struct sk_buff *, int, int,
2536 struct unix_stream_read_state *);
2537 struct socket *socket;
2539 struct pipe_inode_info *pipe;
2542 unsigned int splice_flags;
2545 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2546 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2548 struct socket *sock = state->socket;
2549 struct sock *sk = sock->sk;
2550 struct unix_sock *u = unix_sk(sk);
2552 struct sk_buff *oob_skb;
2554 mutex_lock(&u->iolock);
2555 unix_state_lock(sk);
2557 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2558 unix_state_unlock(sk);
2559 mutex_unlock(&u->iolock);
2563 oob_skb = u->oob_skb;
2565 if (!(state->flags & MSG_PEEK))
2566 WRITE_ONCE(u->oob_skb, NULL);
2569 unix_state_unlock(sk);
2571 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2573 if (!(state->flags & MSG_PEEK))
2574 UNIXCB(oob_skb).consumed += 1;
2576 consume_skb(oob_skb);
2578 mutex_unlock(&u->iolock);
2583 state->msg->msg_flags |= MSG_OOB;
2587 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2588 int flags, int copied)
2590 struct unix_sock *u = unix_sk(sk);
2592 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2593 skb_unlink(skb, &sk->sk_receive_queue);
2597 if (skb == u->oob_skb) {
2600 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2601 if (!(flags & MSG_PEEK)) {
2602 WRITE_ONCE(u->oob_skb, NULL);
2605 } else if (flags & MSG_PEEK) {
2608 skb_unlink(skb, &sk->sk_receive_queue);
2609 WRITE_ONCE(u->oob_skb, NULL);
2610 if (!WARN_ON_ONCE(skb_unref(skb)))
2612 skb = skb_peek(&sk->sk_receive_queue);
2620 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2622 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2625 return unix_read_skb(sk, recv_actor);
2628 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2631 struct scm_cookie scm;
2632 struct socket *sock = state->socket;
2633 struct sock *sk = sock->sk;
2634 struct unix_sock *u = unix_sk(sk);
2636 int flags = state->flags;
2637 int noblock = flags & MSG_DONTWAIT;
2638 bool check_creds = false;
2643 size_t size = state->size;
2644 unsigned int last_len;
2646 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2651 if (unlikely(flags & MSG_OOB)) {
2653 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2654 err = unix_stream_recv_urg(state);
2659 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2660 timeo = sock_rcvtimeo(sk, noblock);
2662 memset(&scm, 0, sizeof(scm));
2664 /* Lock the socket to prevent queue disordering
2665 * while sleeps in memcpy_tomsg
2667 mutex_lock(&u->iolock);
2669 skip = max(sk_peek_offset(sk, flags), 0);
2674 struct sk_buff *skb, *last;
2677 unix_state_lock(sk);
2678 if (sock_flag(sk, SOCK_DEAD)) {
2682 last = skb = skb_peek(&sk->sk_receive_queue);
2683 last_len = last ? last->len : 0;
2686 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2688 skb = manage_oob(skb, sk, flags, copied);
2689 if (!skb && copied) {
2690 unix_state_unlock(sk);
2696 if (copied >= target)
2700 * POSIX 1003.1g mandates this order.
2703 err = sock_error(sk);
2706 if (sk->sk_shutdown & RCV_SHUTDOWN)
2709 unix_state_unlock(sk);
2715 mutex_unlock(&u->iolock);
2717 timeo = unix_stream_data_wait(sk, timeo, last,
2718 last_len, freezable);
2720 if (signal_pending(current)) {
2721 err = sock_intr_errno(timeo);
2726 mutex_lock(&u->iolock);
2729 unix_state_unlock(sk);
2733 while (skip >= unix_skb_len(skb)) {
2734 skip -= unix_skb_len(skb);
2736 last_len = skb->len;
2737 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2742 unix_state_unlock(sk);
2745 /* Never glue messages from different writers */
2746 if (!unix_skb_scm_eq(skb, &scm))
2748 } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2749 test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2750 /* Copy credentials */
2751 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2752 unix_set_secdata(&scm, skb);
2756 /* Copy address just once */
2757 if (state->msg && state->msg->msg_name) {
2758 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2759 state->msg->msg_name);
2760 unix_copy_addr(state->msg, skb->sk);
2762 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2763 state->msg->msg_name,
2764 &state->msg->msg_namelen);
2769 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2771 chunk = state->recv_actor(skb, skip, chunk, state);
2772 drop_skb = !unix_skb_len(skb);
2773 /* skb is only safe to use if !drop_skb */
2784 /* the skb was touched by a concurrent reader;
2785 * we should not expect anything from this skb
2786 * anymore and assume it invalid - we can be
2787 * sure it was dropped from the socket queue
2789 * let's report a short read
2795 /* Mark read part of skb as used */
2796 if (!(flags & MSG_PEEK)) {
2797 UNIXCB(skb).consumed += chunk;
2799 sk_peek_offset_bwd(sk, chunk);
2801 if (UNIXCB(skb).fp) {
2802 scm_stat_del(sk, skb);
2803 unix_detach_fds(&scm, skb);
2806 if (unix_skb_len(skb))
2809 skb_unlink(skb, &sk->sk_receive_queue);
2815 /* It is questionable, see note in unix_dgram_recvmsg.
2818 unix_peek_fds(&scm, skb);
2820 sk_peek_offset_fwd(sk, chunk);
2827 last_len = skb->len;
2828 unix_state_lock(sk);
2829 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2832 unix_state_unlock(sk);
2837 mutex_unlock(&u->iolock);
2839 scm_recv_unix(sock, state->msg, &scm, flags);
2843 return copied ? : err;
2846 static int unix_stream_read_actor(struct sk_buff *skb,
2847 int skip, int chunk,
2848 struct unix_stream_read_state *state)
2852 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2854 return ret ?: chunk;
2857 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2858 size_t size, int flags)
2860 struct unix_stream_read_state state = {
2861 .recv_actor = unix_stream_read_actor,
2862 .socket = sk->sk_socket,
2868 return unix_stream_read_generic(&state, true);
2871 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2872 size_t size, int flags)
2874 struct unix_stream_read_state state = {
2875 .recv_actor = unix_stream_read_actor,
2882 #ifdef CONFIG_BPF_SYSCALL
2883 struct sock *sk = sock->sk;
2884 const struct proto *prot = READ_ONCE(sk->sk_prot);
2886 if (prot != &unix_stream_proto)
2887 return prot->recvmsg(sk, msg, size, flags, NULL);
2889 return unix_stream_read_generic(&state, true);
2892 static int unix_stream_splice_actor(struct sk_buff *skb,
2893 int skip, int chunk,
2894 struct unix_stream_read_state *state)
2896 return skb_splice_bits(skb, state->socket->sk,
2897 UNIXCB(skb).consumed + skip,
2898 state->pipe, chunk, state->splice_flags);
2901 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2902 struct pipe_inode_info *pipe,
2903 size_t size, unsigned int flags)
2905 struct unix_stream_read_state state = {
2906 .recv_actor = unix_stream_splice_actor,
2910 .splice_flags = flags,
2913 if (unlikely(*ppos))
2916 if (sock->file->f_flags & O_NONBLOCK ||
2917 flags & SPLICE_F_NONBLOCK)
2918 state.flags = MSG_DONTWAIT;
2920 return unix_stream_read_generic(&state, false);
2923 static int unix_shutdown(struct socket *sock, int mode)
2925 struct sock *sk = sock->sk;
2928 if (mode < SHUT_RD || mode > SHUT_RDWR)
2931 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2932 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2933 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2937 unix_state_lock(sk);
2938 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2939 other = unix_peer(sk);
2942 unix_state_unlock(sk);
2943 sk->sk_state_change(sk);
2946 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2949 const struct proto *prot = READ_ONCE(other->sk_prot);
2952 prot->unhash(other);
2953 if (mode&RCV_SHUTDOWN)
2954 peer_mode |= SEND_SHUTDOWN;
2955 if (mode&SEND_SHUTDOWN)
2956 peer_mode |= RCV_SHUTDOWN;
2957 unix_state_lock(other);
2958 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2959 unix_state_unlock(other);
2960 other->sk_state_change(other);
2961 if (peer_mode == SHUTDOWN_MASK)
2962 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2963 else if (peer_mode & RCV_SHUTDOWN)
2964 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2972 long unix_inq_len(struct sock *sk)
2974 struct sk_buff *skb;
2977 if (sk->sk_state == TCP_LISTEN)
2980 spin_lock(&sk->sk_receive_queue.lock);
2981 if (sk->sk_type == SOCK_STREAM ||
2982 sk->sk_type == SOCK_SEQPACKET) {
2983 skb_queue_walk(&sk->sk_receive_queue, skb)
2984 amount += unix_skb_len(skb);
2986 skb = skb_peek(&sk->sk_receive_queue);
2990 spin_unlock(&sk->sk_receive_queue.lock);
2994 EXPORT_SYMBOL_GPL(unix_inq_len);
2996 long unix_outq_len(struct sock *sk)
2998 return sk_wmem_alloc_get(sk);
3000 EXPORT_SYMBOL_GPL(unix_outq_len);
3002 static int unix_open_file(struct sock *sk)
3008 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3011 if (!smp_load_acquire(&unix_sk(sk)->addr))
3014 path = unix_sk(sk)->path;
3020 fd = get_unused_fd_flags(O_CLOEXEC);
3024 f = dentry_open(&path, O_PATH, current_cred());
3038 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3040 struct sock *sk = sock->sk;
3046 amount = unix_outq_len(sk);
3047 err = put_user(amount, (int __user *)arg);
3050 amount = unix_inq_len(sk);
3054 err = put_user(amount, (int __user *)arg);
3057 err = unix_open_file(sk);
3059 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3062 struct sk_buff *skb;
3065 skb = skb_peek(&sk->sk_receive_queue);
3066 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3068 err = put_user(answ, (int __user *)arg);
3079 #ifdef CONFIG_COMPAT
3080 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3082 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3086 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3088 struct sock *sk = sock->sk;
3092 sock_poll_wait(file, sock, wait);
3094 shutdown = READ_ONCE(sk->sk_shutdown);
3096 /* exceptional events? */
3097 if (READ_ONCE(sk->sk_err))
3099 if (shutdown == SHUTDOWN_MASK)
3101 if (shutdown & RCV_SHUTDOWN)
3102 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3105 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3106 mask |= EPOLLIN | EPOLLRDNORM;
3107 if (sk_is_readable(sk))
3108 mask |= EPOLLIN | EPOLLRDNORM;
3109 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3110 if (READ_ONCE(unix_sk(sk)->oob_skb))
3114 /* Connection-based need to check for termination and startup */
3115 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3116 sk->sk_state == TCP_CLOSE)
3120 * we set writable also when the other side has shut down the
3121 * connection. This prevents stuck sockets.
3123 if (unix_writable(sk))
3124 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3129 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3132 struct sock *sk = sock->sk, *other;
3133 unsigned int writable;
3137 sock_poll_wait(file, sock, wait);
3139 shutdown = READ_ONCE(sk->sk_shutdown);
3141 /* exceptional events? */
3142 if (READ_ONCE(sk->sk_err) ||
3143 !skb_queue_empty_lockless(&sk->sk_error_queue))
3145 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3147 if (shutdown & RCV_SHUTDOWN)
3148 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3149 if (shutdown == SHUTDOWN_MASK)
3153 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3154 mask |= EPOLLIN | EPOLLRDNORM;
3155 if (sk_is_readable(sk))
3156 mask |= EPOLLIN | EPOLLRDNORM;
3158 /* Connection-based need to check for termination and startup */
3159 if (sk->sk_type == SOCK_SEQPACKET) {
3160 if (sk->sk_state == TCP_CLOSE)
3162 /* connection hasn't started yet? */
3163 if (sk->sk_state == TCP_SYN_SENT)
3167 /* No write status requested, avoid expensive OUT tests. */
3168 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3171 writable = unix_writable(sk);
3173 unix_state_lock(sk);
3175 other = unix_peer(sk);
3176 if (other && unix_peer(other) != sk &&
3177 unix_recvq_full_lockless(other) &&
3178 unix_dgram_peer_wake_me(sk, other))
3181 unix_state_unlock(sk);
3185 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3187 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3192 #ifdef CONFIG_PROC_FS
3194 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3196 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3197 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3198 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3200 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3202 unsigned long offset = get_offset(*pos);
3203 unsigned long bucket = get_bucket(*pos);
3204 unsigned long count = 0;
3207 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3208 sk; sk = sk_next(sk)) {
3209 if (++count == offset)
3216 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3218 unsigned long bucket = get_bucket(*pos);
3219 struct net *net = seq_file_net(seq);
3222 while (bucket < UNIX_HASH_SIZE) {
3223 spin_lock(&net->unx.table.locks[bucket]);
3225 sk = unix_from_bucket(seq, pos);
3229 spin_unlock(&net->unx.table.locks[bucket]);
3231 *pos = set_bucket_offset(++bucket, 1);
3237 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3240 unsigned long bucket = get_bucket(*pos);
3247 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3249 *pos = set_bucket_offset(++bucket, 1);
3251 return unix_get_first(seq, pos);
3254 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3257 return SEQ_START_TOKEN;
3259 return unix_get_first(seq, pos);
3262 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3266 if (v == SEQ_START_TOKEN)
3267 return unix_get_first(seq, pos);
3269 return unix_get_next(seq, v, pos);
3272 static void unix_seq_stop(struct seq_file *seq, void *v)
3274 struct sock *sk = v;
3277 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3280 static int unix_seq_show(struct seq_file *seq, void *v)
3283 if (v == SEQ_START_TOKEN)
3284 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3288 struct unix_sock *u = unix_sk(s);
3291 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3293 refcount_read(&s->sk_refcnt),
3295 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3298 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3299 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3302 if (u->addr) { // under a hash table lock here
3307 len = u->addr->len -
3308 offsetof(struct sockaddr_un, sun_path);
3309 if (u->addr->name->sun_path[0]) {
3315 for ( ; i < len; i++)
3316 seq_putc(seq, u->addr->name->sun_path[i] ?:
3319 unix_state_unlock(s);
3320 seq_putc(seq, '\n');
3326 static const struct seq_operations unix_seq_ops = {
3327 .start = unix_seq_start,
3328 .next = unix_seq_next,
3329 .stop = unix_seq_stop,
3330 .show = unix_seq_show,
3333 #ifdef CONFIG_BPF_SYSCALL
3334 struct bpf_unix_iter_state {
3335 struct seq_net_private p;
3336 unsigned int cur_sk;
3337 unsigned int end_sk;
3338 unsigned int max_sk;
3339 struct sock **batch;
3340 bool st_bucket_done;
3343 struct bpf_iter__unix {
3344 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3345 __bpf_md_ptr(struct unix_sock *, unix_sk);
3346 uid_t uid __aligned(8);
3349 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3350 struct unix_sock *unix_sk, uid_t uid)
3352 struct bpf_iter__unix ctx;
3354 meta->seq_num--; /* skip SEQ_START_TOKEN */
3356 ctx.unix_sk = unix_sk;
3358 return bpf_iter_run_prog(prog, &ctx);
3361 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3364 struct bpf_unix_iter_state *iter = seq->private;
3365 unsigned int expected = 1;
3368 sock_hold(start_sk);
3369 iter->batch[iter->end_sk++] = start_sk;
3371 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3372 if (iter->end_sk < iter->max_sk) {
3374 iter->batch[iter->end_sk++] = sk;
3380 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3385 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3387 while (iter->cur_sk < iter->end_sk)
3388 sock_put(iter->batch[iter->cur_sk++]);
3391 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3392 unsigned int new_batch_sz)
3394 struct sock **new_batch;
3396 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3397 GFP_USER | __GFP_NOWARN);
3401 bpf_iter_unix_put_batch(iter);
3402 kvfree(iter->batch);
3403 iter->batch = new_batch;
3404 iter->max_sk = new_batch_sz;
3409 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3412 struct bpf_unix_iter_state *iter = seq->private;
3413 unsigned int expected;
3414 bool resized = false;
3417 if (iter->st_bucket_done)
3418 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3421 /* Get a new batch */
3425 sk = unix_get_first(seq, pos);
3427 return NULL; /* Done */
3429 expected = bpf_iter_unix_hold_batch(seq, sk);
3431 if (iter->end_sk == expected) {
3432 iter->st_bucket_done = true;
3436 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3444 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3447 return SEQ_START_TOKEN;
3449 /* bpf iter does not support lseek, so it always
3450 * continue from where it was stop()-ped.
3452 return bpf_iter_unix_batch(seq, pos);
3455 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3457 struct bpf_unix_iter_state *iter = seq->private;
3460 /* Whenever seq_next() is called, the iter->cur_sk is
3461 * done with seq_show(), so advance to the next sk in
3464 if (iter->cur_sk < iter->end_sk)
3465 sock_put(iter->batch[iter->cur_sk++]);
3469 if (iter->cur_sk < iter->end_sk)
3470 sk = iter->batch[iter->cur_sk];
3472 sk = bpf_iter_unix_batch(seq, pos);
3477 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3479 struct bpf_iter_meta meta;
3480 struct bpf_prog *prog;
3481 struct sock *sk = v;
3486 if (v == SEQ_START_TOKEN)
3489 slow = lock_sock_fast(sk);
3491 if (unlikely(sk_unhashed(sk))) {
3496 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3498 prog = bpf_iter_get_info(&meta, false);
3499 ret = unix_prog_seq_show(prog, &meta, v, uid);
3501 unlock_sock_fast(sk, slow);
3505 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3507 struct bpf_unix_iter_state *iter = seq->private;
3508 struct bpf_iter_meta meta;
3509 struct bpf_prog *prog;
3513 prog = bpf_iter_get_info(&meta, true);
3515 (void)unix_prog_seq_show(prog, &meta, v, 0);
3518 if (iter->cur_sk < iter->end_sk)
3519 bpf_iter_unix_put_batch(iter);
3522 static const struct seq_operations bpf_iter_unix_seq_ops = {
3523 .start = bpf_iter_unix_seq_start,
3524 .next = bpf_iter_unix_seq_next,
3525 .stop = bpf_iter_unix_seq_stop,
3526 .show = bpf_iter_unix_seq_show,
3531 static const struct net_proto_family unix_family_ops = {
3533 .create = unix_create,
3534 .owner = THIS_MODULE,
3538 static int __net_init unix_net_init(struct net *net)
3542 net->unx.sysctl_max_dgram_qlen = 10;
3543 if (unix_sysctl_register(net))
3546 #ifdef CONFIG_PROC_FS
3547 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3548 sizeof(struct seq_net_private)))
3552 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3553 sizeof(spinlock_t), GFP_KERNEL);
3554 if (!net->unx.table.locks)
3557 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3558 sizeof(struct hlist_head),
3560 if (!net->unx.table.buckets)
3563 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3564 spin_lock_init(&net->unx.table.locks[i]);
3565 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3571 kvfree(net->unx.table.locks);
3573 #ifdef CONFIG_PROC_FS
3574 remove_proc_entry("unix", net->proc_net);
3577 unix_sysctl_unregister(net);
3582 static void __net_exit unix_net_exit(struct net *net)
3584 kvfree(net->unx.table.buckets);
3585 kvfree(net->unx.table.locks);
3586 unix_sysctl_unregister(net);
3587 remove_proc_entry("unix", net->proc_net);
3590 static struct pernet_operations unix_net_ops = {
3591 .init = unix_net_init,
3592 .exit = unix_net_exit,
3595 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3596 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3597 struct unix_sock *unix_sk, uid_t uid)
3599 #define INIT_BATCH_SZ 16
3601 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3603 struct bpf_unix_iter_state *iter = priv_data;
3606 err = bpf_iter_init_seq_net(priv_data, aux);
3610 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3612 bpf_iter_fini_seq_net(priv_data);
3619 static void bpf_iter_fini_unix(void *priv_data)
3621 struct bpf_unix_iter_state *iter = priv_data;
3623 bpf_iter_fini_seq_net(priv_data);
3624 kvfree(iter->batch);
3627 static const struct bpf_iter_seq_info unix_seq_info = {
3628 .seq_ops = &bpf_iter_unix_seq_ops,
3629 .init_seq_private = bpf_iter_init_unix,
3630 .fini_seq_private = bpf_iter_fini_unix,
3631 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3634 static const struct bpf_func_proto *
3635 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3636 const struct bpf_prog *prog)
3639 case BPF_FUNC_setsockopt:
3640 return &bpf_sk_setsockopt_proto;
3641 case BPF_FUNC_getsockopt:
3642 return &bpf_sk_getsockopt_proto;
3648 static struct bpf_iter_reg unix_reg_info = {
3650 .ctx_arg_info_size = 1,
3652 { offsetof(struct bpf_iter__unix, unix_sk),
3653 PTR_TO_BTF_ID_OR_NULL },
3655 .get_func_proto = bpf_iter_unix_get_func_proto,
3656 .seq_info = &unix_seq_info,
3659 static void __init bpf_iter_register(void)
3661 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3662 if (bpf_iter_reg_target(&unix_reg_info))
3663 pr_warn("Warning: could not register bpf iterator unix\n");
3667 static int __init af_unix_init(void)
3671 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3673 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3674 spin_lock_init(&bsd_socket_locks[i]);
3675 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3678 rc = proto_register(&unix_dgram_proto, 1);
3680 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3684 rc = proto_register(&unix_stream_proto, 1);
3686 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3687 proto_unregister(&unix_dgram_proto);
3691 sock_register(&unix_family_ops);
3692 register_pernet_subsys(&unix_net_ops);
3693 unix_bpf_build_proto();
3695 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3696 bpf_iter_register();
3703 /* Later than subsys_initcall() because we depend on stuff initialised there */
3704 fs_initcall(af_unix_init);