1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/freezer.h>
116 #include <linux/file.h>
117 #include <linux/btf_ids.h>
121 static atomic_long_t unix_nr_socks;
122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
125 /* SMP locking strategy:
126 * hash table is protected with spinlock.
127 * each socket state is protected by separate spinlock.
130 static unsigned int unix_unbound_hash(struct sock *sk)
132 unsigned long hash = (unsigned long)sk;
138 return hash & UNIX_HASH_MOD;
141 static unsigned int unix_bsd_hash(struct inode *i)
143 return i->i_ino & UNIX_HASH_MOD;
146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
147 int addr_len, int type)
149 __wsum csum = csum_partial(sunaddr, addr_len, 0);
152 hash = (__force unsigned int)csum_fold(csum);
156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
159 static void unix_table_double_lock(struct net *net,
160 unsigned int hash1, unsigned int hash2)
162 if (hash1 == hash2) {
163 spin_lock(&net->unx.table.locks[hash1]);
170 spin_lock(&net->unx.table.locks[hash1]);
171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
174 static void unix_table_double_unlock(struct net *net,
175 unsigned int hash1, unsigned int hash2)
177 if (hash1 == hash2) {
178 spin_unlock(&net->unx.table.locks[hash1]);
182 spin_unlock(&net->unx.table.locks[hash1]);
183 spin_unlock(&net->unx.table.locks[hash2]);
186 #ifdef CONFIG_SECURITY_NETWORK
187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189 UNIXCB(skb).secid = scm->secid;
192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194 scm->secid = UNIXCB(skb).secid;
197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199 return (scm->secid == UNIXCB(skb).secid);
202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
212 #endif /* CONFIG_SECURITY_NETWORK */
214 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
216 return unix_peer(osk) == sk;
219 static inline int unix_may_send(struct sock *sk, struct sock *osk)
221 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
224 static inline int unix_recvq_full(const struct sock *sk)
226 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
229 static inline int unix_recvq_full_lockless(const struct sock *sk)
231 return skb_queue_len_lockless(&sk->sk_receive_queue) >
232 READ_ONCE(sk->sk_max_ack_backlog);
235 struct sock *unix_peer_get(struct sock *s)
243 unix_state_unlock(s);
246 EXPORT_SYMBOL_GPL(unix_peer_get);
248 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
251 struct unix_address *addr;
253 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
257 refcount_set(&addr->refcnt, 1);
258 addr->len = addr_len;
259 memcpy(addr->name, sunaddr, addr_len);
264 static inline void unix_release_addr(struct unix_address *addr)
266 if (refcount_dec_and_test(&addr->refcnt))
271 * Check unix socket name:
272 * - should be not zero length.
273 * - if started by not zero, should be NULL terminated (FS object)
274 * - if started by zero, it is abstract name.
277 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
279 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
280 addr_len > sizeof(*sunaddr))
283 if (sunaddr->sun_family != AF_UNIX)
289 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
291 /* This may look like an off by one error but it is a bit more
292 * subtle. 108 is the longest valid AF_UNIX path for a binding.
293 * sun_path[108] doesn't as such exist. However in kernel space
294 * we are guaranteed that it is a valid memory location in our
295 * kernel address buffer because syscall functions always pass
296 * a pointer of struct sockaddr_storage which has a bigger buffer
299 ((char *)sunaddr)[addr_len] = 0;
302 static void __unix_remove_socket(struct sock *sk)
304 sk_del_node_init(sk);
307 static void __unix_insert_socket(struct net *net, struct sock *sk)
309 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
310 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
313 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
314 struct unix_address *addr, unsigned int hash)
316 __unix_remove_socket(sk);
317 smp_store_release(&unix_sk(sk)->addr, addr);
320 __unix_insert_socket(net, sk);
323 static void unix_remove_socket(struct net *net, struct sock *sk)
325 spin_lock(&net->unx.table.locks[sk->sk_hash]);
326 __unix_remove_socket(sk);
327 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
330 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
332 spin_lock(&net->unx.table.locks[sk->sk_hash]);
333 __unix_insert_socket(net, sk);
334 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
337 static void unix_insert_bsd_socket(struct sock *sk)
339 spin_lock(&bsd_socket_locks[sk->sk_hash]);
340 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
341 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
344 static void unix_remove_bsd_socket(struct sock *sk)
346 if (!hlist_unhashed(&sk->sk_bind_node)) {
347 spin_lock(&bsd_socket_locks[sk->sk_hash]);
348 __sk_del_bind_node(sk);
349 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
351 sk_node_init(&sk->sk_bind_node);
355 static struct sock *__unix_find_socket_byname(struct net *net,
356 struct sockaddr_un *sunname,
357 int len, unsigned int hash)
361 sk_for_each(s, &net->unx.table.buckets[hash]) {
362 struct unix_sock *u = unix_sk(s);
364 if (u->addr->len == len &&
365 !memcmp(u->addr->name, sunname, len))
371 static inline struct sock *unix_find_socket_byname(struct net *net,
372 struct sockaddr_un *sunname,
373 int len, unsigned int hash)
377 spin_lock(&net->unx.table.locks[hash]);
378 s = __unix_find_socket_byname(net, sunname, len, hash);
381 spin_unlock(&net->unx.table.locks[hash]);
385 static struct sock *unix_find_socket_byinode(struct inode *i)
387 unsigned int hash = unix_bsd_hash(i);
390 spin_lock(&bsd_socket_locks[hash]);
391 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
392 struct dentry *dentry = unix_sk(s)->path.dentry;
394 if (dentry && d_backing_inode(dentry) == i) {
396 spin_unlock(&bsd_socket_locks[hash]);
400 spin_unlock(&bsd_socket_locks[hash]);
404 /* Support code for asymmetrically connected dgram sockets
406 * If a datagram socket is connected to a socket not itself connected
407 * to the first socket (eg, /dev/log), clients may only enqueue more
408 * messages if the present receive queue of the server socket is not
409 * "too large". This means there's a second writeability condition
410 * poll and sendmsg need to test. The dgram recv code will do a wake
411 * up on the peer_wait wait queue of a socket upon reception of a
412 * datagram which needs to be propagated to sleeping would-be writers
413 * since these might not have sent anything so far. This can't be
414 * accomplished via poll_wait because the lifetime of the server
415 * socket might be less than that of its clients if these break their
416 * association with it or if the server socket is closed while clients
417 * are still connected to it and there's no way to inform "a polling
418 * implementation" that it should let go of a certain wait queue
420 * In order to propagate a wake up, a wait_queue_entry_t of the client
421 * socket is enqueued on the peer_wait queue of the server socket
422 * whose wake function does a wake_up on the ordinary client socket
423 * wait queue. This connection is established whenever a write (or
424 * poll for write) hit the flow control condition and broken when the
425 * association to the server socket is dissolved or after a wake up
429 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
433 wait_queue_head_t *u_sleep;
435 u = container_of(q, struct unix_sock, peer_wake);
437 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
439 u->peer_wake.private = NULL;
441 /* relaying can only happen while the wq still exists */
442 u_sleep = sk_sleep(&u->sk);
444 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
449 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
451 struct unix_sock *u, *u_other;
455 u_other = unix_sk(other);
457 spin_lock(&u_other->peer_wait.lock);
459 if (!u->peer_wake.private) {
460 u->peer_wake.private = other;
461 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
466 spin_unlock(&u_other->peer_wait.lock);
470 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
473 struct unix_sock *u, *u_other;
476 u_other = unix_sk(other);
477 spin_lock(&u_other->peer_wait.lock);
479 if (u->peer_wake.private == other) {
480 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
481 u->peer_wake.private = NULL;
484 spin_unlock(&u_other->peer_wait.lock);
487 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
490 unix_dgram_peer_wake_disconnect(sk, other);
491 wake_up_interruptible_poll(sk_sleep(sk),
498 * - unix_peer(sk) == other
499 * - association is stable
501 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
505 connected = unix_dgram_peer_wake_connect(sk, other);
507 /* If other is SOCK_DEAD, we want to make sure we signal
508 * POLLOUT, such that a subsequent write() can get a
509 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
510 * to other and its full, we will hang waiting for POLLOUT.
512 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
516 unix_dgram_peer_wake_disconnect(sk, other);
521 static int unix_writable(const struct sock *sk)
523 return sk->sk_state != TCP_LISTEN &&
524 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
527 static void unix_write_space(struct sock *sk)
529 struct socket_wq *wq;
532 if (unix_writable(sk)) {
533 wq = rcu_dereference(sk->sk_wq);
534 if (skwq_has_sleeper(wq))
535 wake_up_interruptible_sync_poll(&wq->wait,
536 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
537 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
542 /* When dgram socket disconnects (or changes its peer), we clear its receive
543 * queue of packets arrived from previous peer. First, it allows to do
544 * flow control based only on wmem_alloc; second, sk connected to peer
545 * may receive messages only from that peer. */
546 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
548 if (!skb_queue_empty(&sk->sk_receive_queue)) {
549 skb_queue_purge(&sk->sk_receive_queue);
550 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
552 /* If one link of bidirectional dgram pipe is disconnected,
553 * we signal error. Messages are lost. Do not make this,
554 * when peer was not connected to us.
556 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
557 other->sk_err = ECONNRESET;
558 sk_error_report(other);
561 other->sk_state = TCP_CLOSE;
564 static void unix_sock_destructor(struct sock *sk)
566 struct unix_sock *u = unix_sk(sk);
568 skb_queue_purge(&sk->sk_receive_queue);
570 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
571 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
572 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
573 if (!sock_flag(sk, SOCK_DEAD)) {
574 pr_info("Attempt to release alive unix socket: %p\n", sk);
579 unix_release_addr(u->addr);
581 atomic_long_dec(&unix_nr_socks);
582 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
583 #ifdef UNIX_REFCNT_DEBUG
584 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
585 atomic_long_read(&unix_nr_socks));
589 static void unix_release_sock(struct sock *sk, int embrion)
591 struct unix_sock *u = unix_sk(sk);
597 unix_remove_socket(sock_net(sk), sk);
598 unix_remove_bsd_socket(sk);
603 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
605 u->path.dentry = NULL;
607 state = sk->sk_state;
608 sk->sk_state = TCP_CLOSE;
610 skpair = unix_peer(sk);
611 unix_peer(sk) = NULL;
613 unix_state_unlock(sk);
615 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
617 kfree_skb(u->oob_skb);
622 wake_up_interruptible_all(&u->peer_wait);
624 if (skpair != NULL) {
625 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
626 unix_state_lock(skpair);
628 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
629 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
630 skpair->sk_err = ECONNRESET;
631 unix_state_unlock(skpair);
632 skpair->sk_state_change(skpair);
633 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
636 unix_dgram_peer_wake_disconnect(sk, skpair);
637 sock_put(skpair); /* It may now die */
640 /* Try to flush out this socket. Throw out buffers at least */
642 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
643 if (state == TCP_LISTEN)
644 unix_release_sock(skb->sk, 1);
645 /* passed fds are erased in the kfree_skb hook */
646 UNIXCB(skb).consumed = skb->len;
655 /* ---- Socket is dead now and most probably destroyed ---- */
658 * Fixme: BSD difference: In BSD all sockets connected to us get
659 * ECONNRESET and we die on the spot. In Linux we behave
660 * like files and pipes do and wait for the last
663 * Can't we simply set sock->err?
665 * What the above comment does talk about? --ANK(980817)
668 if (READ_ONCE(unix_tot_inflight))
669 unix_gc(); /* Garbage collect fds */
672 static void init_peercred(struct sock *sk)
674 const struct cred *old_cred;
677 spin_lock(&sk->sk_peer_lock);
678 old_pid = sk->sk_peer_pid;
679 old_cred = sk->sk_peer_cred;
680 sk->sk_peer_pid = get_pid(task_tgid(current));
681 sk->sk_peer_cred = get_current_cred();
682 spin_unlock(&sk->sk_peer_lock);
688 static void copy_peercred(struct sock *sk, struct sock *peersk)
690 const struct cred *old_cred;
694 spin_lock(&sk->sk_peer_lock);
695 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
697 spin_lock(&peersk->sk_peer_lock);
698 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
700 old_pid = sk->sk_peer_pid;
701 old_cred = sk->sk_peer_cred;
702 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
703 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
705 spin_unlock(&sk->sk_peer_lock);
706 spin_unlock(&peersk->sk_peer_lock);
712 static int unix_listen(struct socket *sock, int backlog)
715 struct sock *sk = sock->sk;
716 struct unix_sock *u = unix_sk(sk);
719 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
720 goto out; /* Only stream/seqpacket sockets accept */
723 goto out; /* No listens on an unbound socket */
725 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
727 if (backlog > sk->sk_max_ack_backlog)
728 wake_up_interruptible_all(&u->peer_wait);
729 sk->sk_max_ack_backlog = backlog;
730 sk->sk_state = TCP_LISTEN;
731 /* set credentials so connect can copy them */
736 unix_state_unlock(sk);
741 static int unix_release(struct socket *);
742 static int unix_bind(struct socket *, struct sockaddr *, int);
743 static int unix_stream_connect(struct socket *, struct sockaddr *,
744 int addr_len, int flags);
745 static int unix_socketpair(struct socket *, struct socket *);
746 static int unix_accept(struct socket *, struct socket *, int, bool);
747 static int unix_getname(struct socket *, struct sockaddr *, int);
748 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
749 static __poll_t unix_dgram_poll(struct file *, struct socket *,
751 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
753 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
755 static int unix_shutdown(struct socket *, int);
756 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
757 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
758 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
759 size_t size, int flags);
760 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
761 struct pipe_inode_info *, size_t size,
763 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
764 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
765 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
766 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
767 static int unix_dgram_connect(struct socket *, struct sockaddr *,
769 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
770 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
773 static int unix_set_peek_off(struct sock *sk, int val)
775 struct unix_sock *u = unix_sk(sk);
777 if (mutex_lock_interruptible(&u->iolock))
780 WRITE_ONCE(sk->sk_peek_off, val);
781 mutex_unlock(&u->iolock);
786 #ifdef CONFIG_PROC_FS
787 static int unix_count_nr_fds(struct sock *sk)
793 spin_lock(&sk->sk_receive_queue.lock);
794 skb = skb_peek(&sk->sk_receive_queue);
796 u = unix_sk(skb->sk);
797 nr_fds += atomic_read(&u->scm_stat.nr_fds);
798 skb = skb_peek_next(skb, &sk->sk_receive_queue);
800 spin_unlock(&sk->sk_receive_queue.lock);
805 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
807 struct sock *sk = sock->sk;
813 if (sock->type == SOCK_DGRAM) {
814 nr_fds = atomic_read(&u->scm_stat.nr_fds);
819 if (sk->sk_state != TCP_LISTEN)
820 nr_fds = atomic_read(&u->scm_stat.nr_fds);
822 nr_fds = unix_count_nr_fds(sk);
823 unix_state_unlock(sk);
825 seq_printf(m, "scm_fds: %u\n", nr_fds);
829 #define unix_show_fdinfo NULL
832 static const struct proto_ops unix_stream_ops = {
834 .owner = THIS_MODULE,
835 .release = unix_release,
837 .connect = unix_stream_connect,
838 .socketpair = unix_socketpair,
839 .accept = unix_accept,
840 .getname = unix_getname,
844 .compat_ioctl = unix_compat_ioctl,
846 .listen = unix_listen,
847 .shutdown = unix_shutdown,
848 .sendmsg = unix_stream_sendmsg,
849 .recvmsg = unix_stream_recvmsg,
850 .read_skb = unix_stream_read_skb,
851 .mmap = sock_no_mmap,
852 .sendpage = unix_stream_sendpage,
853 .splice_read = unix_stream_splice_read,
854 .set_peek_off = unix_set_peek_off,
855 .show_fdinfo = unix_show_fdinfo,
858 static const struct proto_ops unix_dgram_ops = {
860 .owner = THIS_MODULE,
861 .release = unix_release,
863 .connect = unix_dgram_connect,
864 .socketpair = unix_socketpair,
865 .accept = sock_no_accept,
866 .getname = unix_getname,
867 .poll = unix_dgram_poll,
870 .compat_ioctl = unix_compat_ioctl,
872 .listen = sock_no_listen,
873 .shutdown = unix_shutdown,
874 .sendmsg = unix_dgram_sendmsg,
875 .read_skb = unix_read_skb,
876 .recvmsg = unix_dgram_recvmsg,
877 .mmap = sock_no_mmap,
878 .sendpage = sock_no_sendpage,
879 .set_peek_off = unix_set_peek_off,
880 .show_fdinfo = unix_show_fdinfo,
883 static const struct proto_ops unix_seqpacket_ops = {
885 .owner = THIS_MODULE,
886 .release = unix_release,
888 .connect = unix_stream_connect,
889 .socketpair = unix_socketpair,
890 .accept = unix_accept,
891 .getname = unix_getname,
892 .poll = unix_dgram_poll,
895 .compat_ioctl = unix_compat_ioctl,
897 .listen = unix_listen,
898 .shutdown = unix_shutdown,
899 .sendmsg = unix_seqpacket_sendmsg,
900 .recvmsg = unix_seqpacket_recvmsg,
901 .mmap = sock_no_mmap,
902 .sendpage = sock_no_sendpage,
903 .set_peek_off = unix_set_peek_off,
904 .show_fdinfo = unix_show_fdinfo,
907 static void unix_close(struct sock *sk, long timeout)
909 /* Nothing to do here, unix socket does not need a ->close().
910 * This is merely for sockmap.
914 static void unix_unhash(struct sock *sk)
916 /* Nothing to do here, unix socket does not need a ->unhash().
917 * This is merely for sockmap.
921 struct proto unix_dgram_proto = {
923 .owner = THIS_MODULE,
924 .obj_size = sizeof(struct unix_sock),
926 #ifdef CONFIG_BPF_SYSCALL
927 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
931 struct proto unix_stream_proto = {
932 .name = "UNIX-STREAM",
933 .owner = THIS_MODULE,
934 .obj_size = sizeof(struct unix_sock),
936 .unhash = unix_unhash,
937 #ifdef CONFIG_BPF_SYSCALL
938 .psock_update_sk_prot = unix_stream_bpf_update_proto,
942 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
948 atomic_long_inc(&unix_nr_socks);
949 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
954 if (type == SOCK_STREAM)
955 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
956 else /*dgram and seqpacket */
957 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
964 sock_init_data(sock, sk);
966 sk->sk_hash = unix_unbound_hash(sk);
967 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
968 sk->sk_write_space = unix_write_space;
969 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
970 sk->sk_destruct = unix_sock_destructor;
973 u->path.dentry = NULL;
975 spin_lock_init(&u->lock);
976 INIT_LIST_HEAD(&u->link);
977 mutex_init(&u->iolock); /* single task reading lock */
978 mutex_init(&u->bindlock); /* single task binding lock */
979 init_waitqueue_head(&u->peer_wait);
980 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
981 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
982 unix_insert_unbound_socket(net, sk);
984 sock_prot_inuse_add(net, sk->sk_prot, 1);
989 atomic_long_dec(&unix_nr_socks);
993 static int unix_create(struct net *net, struct socket *sock, int protocol,
998 if (protocol && protocol != PF_UNIX)
999 return -EPROTONOSUPPORT;
1001 sock->state = SS_UNCONNECTED;
1003 switch (sock->type) {
1005 sock->ops = &unix_stream_ops;
1008 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1012 sock->type = SOCK_DGRAM;
1015 sock->ops = &unix_dgram_ops;
1017 case SOCK_SEQPACKET:
1018 sock->ops = &unix_seqpacket_ops;
1021 return -ESOCKTNOSUPPORT;
1024 sk = unix_create1(net, sock, kern, sock->type);
1031 static int unix_release(struct socket *sock)
1033 struct sock *sk = sock->sk;
1038 sk->sk_prot->close(sk, 0);
1039 unix_release_sock(sk, 0);
1045 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1048 struct inode *inode;
1053 unix_mkname_bsd(sunaddr, addr_len);
1054 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1058 err = path_permission(&path, MAY_WRITE);
1062 err = -ECONNREFUSED;
1063 inode = d_backing_inode(path.dentry);
1064 if (!S_ISSOCK(inode->i_mode))
1067 sk = unix_find_socket_byinode(inode);
1072 if (sk->sk_type == type)
1086 return ERR_PTR(err);
1089 static struct sock *unix_find_abstract(struct net *net,
1090 struct sockaddr_un *sunaddr,
1091 int addr_len, int type)
1093 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1094 struct dentry *dentry;
1097 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1099 return ERR_PTR(-ECONNREFUSED);
1101 dentry = unix_sk(sk)->path.dentry;
1103 touch_atime(&unix_sk(sk)->path);
1108 static struct sock *unix_find_other(struct net *net,
1109 struct sockaddr_un *sunaddr,
1110 int addr_len, int type)
1114 if (sunaddr->sun_path[0])
1115 sk = unix_find_bsd(sunaddr, addr_len, type);
1117 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1122 static int unix_autobind(struct sock *sk)
1124 unsigned int new_hash, old_hash = sk->sk_hash;
1125 struct unix_sock *u = unix_sk(sk);
1126 struct net *net = sock_net(sk);
1127 struct unix_address *addr;
1128 u32 lastnum, ordernum;
1131 err = mutex_lock_interruptible(&u->bindlock);
1139 addr = kzalloc(sizeof(*addr) +
1140 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1144 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1145 addr->name->sun_family = AF_UNIX;
1146 refcount_set(&addr->refcnt, 1);
1148 ordernum = get_random_u32();
1149 lastnum = ordernum & 0xFFFFF;
1151 ordernum = (ordernum + 1) & 0xFFFFF;
1152 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1154 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1155 unix_table_double_lock(net, old_hash, new_hash);
1157 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1158 unix_table_double_unlock(net, old_hash, new_hash);
1160 /* __unix_find_socket_byname() may take long time if many names
1161 * are already in use.
1165 if (ordernum == lastnum) {
1166 /* Give up if all names seems to be in use. */
1168 unix_release_addr(addr);
1175 __unix_set_addr_hash(net, sk, addr, new_hash);
1176 unix_table_double_unlock(net, old_hash, new_hash);
1179 out: mutex_unlock(&u->bindlock);
1183 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1186 umode_t mode = S_IFSOCK |
1187 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1188 unsigned int new_hash, old_hash = sk->sk_hash;
1189 struct unix_sock *u = unix_sk(sk);
1190 struct net *net = sock_net(sk);
1191 struct user_namespace *ns; // barf...
1192 struct unix_address *addr;
1193 struct dentry *dentry;
1197 unix_mkname_bsd(sunaddr, addr_len);
1198 addr_len = strlen(sunaddr->sun_path) +
1199 offsetof(struct sockaddr_un, sun_path) + 1;
1201 addr = unix_create_addr(sunaddr, addr_len);
1206 * Get the parent directory, calculate the hash for last
1209 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1210 if (IS_ERR(dentry)) {
1211 err = PTR_ERR(dentry);
1216 * All right, let's create it.
1218 ns = mnt_user_ns(parent.mnt);
1219 err = security_path_mknod(&parent, dentry, mode, 0);
1221 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1224 err = mutex_lock_interruptible(&u->bindlock);
1230 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1231 unix_table_double_lock(net, old_hash, new_hash);
1232 u->path.mnt = mntget(parent.mnt);
1233 u->path.dentry = dget(dentry);
1234 __unix_set_addr_hash(net, sk, addr, new_hash);
1235 unix_table_double_unlock(net, old_hash, new_hash);
1236 unix_insert_bsd_socket(sk);
1237 mutex_unlock(&u->bindlock);
1238 done_path_create(&parent, dentry);
1242 mutex_unlock(&u->bindlock);
1245 /* failed after successful mknod? unlink what we'd created... */
1246 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1248 done_path_create(&parent, dentry);
1250 unix_release_addr(addr);
1251 return err == -EEXIST ? -EADDRINUSE : err;
1254 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1257 unsigned int new_hash, old_hash = sk->sk_hash;
1258 struct unix_sock *u = unix_sk(sk);
1259 struct net *net = sock_net(sk);
1260 struct unix_address *addr;
1263 addr = unix_create_addr(sunaddr, addr_len);
1267 err = mutex_lock_interruptible(&u->bindlock);
1276 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1277 unix_table_double_lock(net, old_hash, new_hash);
1279 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1282 __unix_set_addr_hash(net, sk, addr, new_hash);
1283 unix_table_double_unlock(net, old_hash, new_hash);
1284 mutex_unlock(&u->bindlock);
1288 unix_table_double_unlock(net, old_hash, new_hash);
1291 mutex_unlock(&u->bindlock);
1293 unix_release_addr(addr);
1297 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1299 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1300 struct sock *sk = sock->sk;
1303 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1304 sunaddr->sun_family == AF_UNIX)
1305 return unix_autobind(sk);
1307 err = unix_validate_addr(sunaddr, addr_len);
1311 if (sunaddr->sun_path[0])
1312 err = unix_bind_bsd(sk, sunaddr, addr_len);
1314 err = unix_bind_abstract(sk, sunaddr, addr_len);
1319 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1321 if (unlikely(sk1 == sk2) || !sk2) {
1322 unix_state_lock(sk1);
1328 unix_state_lock(sk1);
1329 unix_state_lock_nested(sk2, U_LOCK_SECOND);
1332 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1334 if (unlikely(sk1 == sk2) || !sk2) {
1335 unix_state_unlock(sk1);
1338 unix_state_unlock(sk1);
1339 unix_state_unlock(sk2);
1342 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1343 int alen, int flags)
1345 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1346 struct sock *sk = sock->sk;
1351 if (alen < offsetofend(struct sockaddr, sa_family))
1354 if (addr->sa_family != AF_UNSPEC) {
1355 err = unix_validate_addr(sunaddr, alen);
1359 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1360 !unix_sk(sk)->addr) {
1361 err = unix_autobind(sk);
1367 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1368 if (IS_ERR(other)) {
1369 err = PTR_ERR(other);
1373 unix_state_double_lock(sk, other);
1375 /* Apparently VFS overslept socket death. Retry. */
1376 if (sock_flag(other, SOCK_DEAD)) {
1377 unix_state_double_unlock(sk, other);
1383 if (!unix_may_send(sk, other))
1386 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1390 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1393 * 1003.1g breaking connected state with AF_UNSPEC
1396 unix_state_double_lock(sk, other);
1400 * If it was connected, reconnect.
1402 if (unix_peer(sk)) {
1403 struct sock *old_peer = unix_peer(sk);
1405 unix_peer(sk) = other;
1407 sk->sk_state = TCP_CLOSE;
1408 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1410 unix_state_double_unlock(sk, other);
1412 if (other != old_peer)
1413 unix_dgram_disconnected(sk, old_peer);
1416 unix_peer(sk) = other;
1417 unix_state_double_unlock(sk, other);
1423 unix_state_double_unlock(sk, other);
1429 static long unix_wait_for_peer(struct sock *other, long timeo)
1430 __releases(&unix_sk(other)->lock)
1432 struct unix_sock *u = unix_sk(other);
1436 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1438 sched = !sock_flag(other, SOCK_DEAD) &&
1439 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1440 unix_recvq_full_lockless(other);
1442 unix_state_unlock(other);
1445 timeo = schedule_timeout(timeo);
1447 finish_wait(&u->peer_wait, &wait);
1451 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1452 int addr_len, int flags)
1454 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1455 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1456 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1457 struct net *net = sock_net(sk);
1458 struct sk_buff *skb = NULL;
1463 err = unix_validate_addr(sunaddr, addr_len);
1467 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1468 err = unix_autobind(sk);
1473 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1475 /* First of all allocate resources.
1476 If we will make it after state is locked,
1477 we will have to recheck all again in any case.
1480 /* create new sock for complete connection */
1481 newsk = unix_create1(net, NULL, 0, sock->type);
1482 if (IS_ERR(newsk)) {
1483 err = PTR_ERR(newsk);
1490 /* Allocate skb for sending to listening sock */
1491 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1496 /* Find listening sock. */
1497 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1498 if (IS_ERR(other)) {
1499 err = PTR_ERR(other);
1504 /* Latch state of peer */
1505 unix_state_lock(other);
1507 /* Apparently VFS overslept socket death. Retry. */
1508 if (sock_flag(other, SOCK_DEAD)) {
1509 unix_state_unlock(other);
1514 err = -ECONNREFUSED;
1515 if (other->sk_state != TCP_LISTEN)
1517 if (other->sk_shutdown & RCV_SHUTDOWN)
1520 if (unix_recvq_full(other)) {
1525 timeo = unix_wait_for_peer(other, timeo);
1527 err = sock_intr_errno(timeo);
1528 if (signal_pending(current))
1536 It is tricky place. We need to grab our state lock and cannot
1537 drop lock on peer. It is dangerous because deadlock is
1538 possible. Connect to self case and simultaneous
1539 attempt to connect are eliminated by checking socket
1540 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1541 check this before attempt to grab lock.
1543 Well, and we have to recheck the state after socket locked.
1549 /* This is ok... continue with connect */
1551 case TCP_ESTABLISHED:
1552 /* Socket is already connected */
1560 unix_state_lock_nested(sk, U_LOCK_SECOND);
1562 if (sk->sk_state != st) {
1563 unix_state_unlock(sk);
1564 unix_state_unlock(other);
1569 err = security_unix_stream_connect(sk, other, newsk);
1571 unix_state_unlock(sk);
1575 /* The way is open! Fastly set all the necessary fields... */
1578 unix_peer(newsk) = sk;
1579 newsk->sk_state = TCP_ESTABLISHED;
1580 newsk->sk_type = sk->sk_type;
1581 init_peercred(newsk);
1582 newu = unix_sk(newsk);
1583 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1584 otheru = unix_sk(other);
1586 /* copy address information from listening to new sock
1588 * The contents of *(otheru->addr) and otheru->path
1589 * are seen fully set up here, since we have found
1590 * otheru in hash under its lock. Insertion into the
1591 * hash chain we'd found it in had been done in an
1592 * earlier critical area protected by the chain's lock,
1593 * the same one where we'd set *(otheru->addr) contents,
1594 * as well as otheru->path and otheru->addr itself.
1596 * Using smp_store_release() here to set newu->addr
1597 * is enough to make those stores, as well as stores
1598 * to newu->path visible to anyone who gets newu->addr
1599 * by smp_load_acquire(). IOW, the same warranties
1600 * as for unix_sock instances bound in unix_bind() or
1601 * in unix_autobind().
1603 if (otheru->path.dentry) {
1604 path_get(&otheru->path);
1605 newu->path = otheru->path;
1607 refcount_inc(&otheru->addr->refcnt);
1608 smp_store_release(&newu->addr, otheru->addr);
1610 /* Set credentials */
1611 copy_peercred(sk, other);
1613 sock->state = SS_CONNECTED;
1614 sk->sk_state = TCP_ESTABLISHED;
1617 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1618 unix_peer(sk) = newsk;
1620 unix_state_unlock(sk);
1622 /* take ten and send info to listening sock */
1623 spin_lock(&other->sk_receive_queue.lock);
1624 __skb_queue_tail(&other->sk_receive_queue, skb);
1625 spin_unlock(&other->sk_receive_queue.lock);
1626 unix_state_unlock(other);
1627 other->sk_data_ready(other);
1633 unix_state_unlock(other);
1638 unix_release_sock(newsk, 0);
1644 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1646 struct sock *ska = socka->sk, *skb = sockb->sk;
1648 /* Join our sockets back to back */
1651 unix_peer(ska) = skb;
1652 unix_peer(skb) = ska;
1656 ska->sk_state = TCP_ESTABLISHED;
1657 skb->sk_state = TCP_ESTABLISHED;
1658 socka->state = SS_CONNECTED;
1659 sockb->state = SS_CONNECTED;
1663 static void unix_sock_inherit_flags(const struct socket *old,
1666 if (test_bit(SOCK_PASSCRED, &old->flags))
1667 set_bit(SOCK_PASSCRED, &new->flags);
1668 if (test_bit(SOCK_PASSSEC, &old->flags))
1669 set_bit(SOCK_PASSSEC, &new->flags);
1672 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1675 struct sock *sk = sock->sk;
1677 struct sk_buff *skb;
1681 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1685 if (sk->sk_state != TCP_LISTEN)
1688 /* If socket state is TCP_LISTEN it cannot change (for now...),
1689 * so that no locks are necessary.
1692 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1695 /* This means receive shutdown. */
1702 skb_free_datagram(sk, skb);
1703 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1705 /* attach accepted sock to socket */
1706 unix_state_lock(tsk);
1707 newsock->state = SS_CONNECTED;
1708 unix_sock_inherit_flags(sock, newsock);
1709 sock_graft(tsk, newsock);
1710 unix_state_unlock(tsk);
1718 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1720 struct sock *sk = sock->sk;
1721 struct unix_address *addr;
1722 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1726 sk = unix_peer_get(sk);
1736 addr = smp_load_acquire(&unix_sk(sk)->addr);
1738 sunaddr->sun_family = AF_UNIX;
1739 sunaddr->sun_path[0] = 0;
1740 err = offsetof(struct sockaddr_un, sun_path);
1743 memcpy(sunaddr, addr->name, addr->len);
1750 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1752 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1755 * Garbage collection of unix sockets starts by selecting a set of
1756 * candidate sockets which have reference only from being in flight
1757 * (total_refs == inflight_refs). This condition is checked once during
1758 * the candidate collection phase, and candidates are marked as such, so
1759 * that non-candidates can later be ignored. While inflight_refs is
1760 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1761 * is an instantaneous decision.
1763 * Once a candidate, however, the socket must not be reinstalled into a
1764 * file descriptor while the garbage collection is in progress.
1766 * If the above conditions are met, then the directed graph of
1767 * candidates (*) does not change while unix_gc_lock is held.
1769 * Any operations that changes the file count through file descriptors
1770 * (dup, close, sendmsg) does not change the graph since candidates are
1771 * not installed in fds.
1773 * Dequeing a candidate via recvmsg would install it into an fd, but
1774 * that takes unix_gc_lock to decrement the inflight count, so it's
1775 * serialized with garbage collection.
1777 * MSG_PEEK is special in that it does not change the inflight count,
1778 * yet does install the socket into an fd. The following lock/unlock
1779 * pair is to ensure serialization with garbage collection. It must be
1780 * done between incrementing the file count and installing the file into
1783 * If garbage collection starts after the barrier provided by the
1784 * lock/unlock, then it will see the elevated refcount and not mark this
1785 * as a candidate. If a garbage collection is already in progress
1786 * before the file count was incremented, then the lock/unlock pair will
1787 * ensure that garbage collection is finished before progressing to
1788 * installing the fd.
1790 * (*) A -> B where B is on the queue of A or B is on the queue of C
1791 * which is on the queue of listening socket A.
1793 spin_lock(&unix_gc_lock);
1794 spin_unlock(&unix_gc_lock);
1797 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1801 UNIXCB(skb).pid = get_pid(scm->pid);
1802 UNIXCB(skb).uid = scm->creds.uid;
1803 UNIXCB(skb).gid = scm->creds.gid;
1804 UNIXCB(skb).fp = NULL;
1805 unix_get_secdata(scm, skb);
1806 if (scm->fp && send_fds)
1807 err = unix_attach_fds(scm, skb);
1809 skb->destructor = unix_destruct_scm;
1813 static bool unix_passcred_enabled(const struct socket *sock,
1814 const struct sock *other)
1816 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1817 !other->sk_socket ||
1818 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1822 * Some apps rely on write() giving SCM_CREDENTIALS
1823 * We include credentials if source or destination socket
1824 * asserted SOCK_PASSCRED.
1826 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1827 const struct sock *other)
1829 if (UNIXCB(skb).pid)
1831 if (unix_passcred_enabled(sock, other)) {
1832 UNIXCB(skb).pid = get_pid(task_tgid(current));
1833 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1837 static int maybe_init_creds(struct scm_cookie *scm,
1838 struct socket *socket,
1839 const struct sock *other)
1842 struct msghdr msg = { .msg_controllen = 0 };
1844 err = scm_send(socket, &msg, scm, false);
1848 if (unix_passcred_enabled(socket, other)) {
1849 scm->pid = get_pid(task_tgid(current));
1850 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1855 static bool unix_skb_scm_eq(struct sk_buff *skb,
1856 struct scm_cookie *scm)
1858 return UNIXCB(skb).pid == scm->pid &&
1859 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1860 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1861 unix_secdata_eq(scm, skb);
1864 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1866 struct scm_fp_list *fp = UNIXCB(skb).fp;
1867 struct unix_sock *u = unix_sk(sk);
1869 if (unlikely(fp && fp->count))
1870 atomic_add(fp->count, &u->scm_stat.nr_fds);
1873 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1875 struct scm_fp_list *fp = UNIXCB(skb).fp;
1876 struct unix_sock *u = unix_sk(sk);
1878 if (unlikely(fp && fp->count))
1879 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1883 * Send AF_UNIX data.
1886 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1889 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1890 struct sock *sk = sock->sk, *other = NULL;
1891 struct unix_sock *u = unix_sk(sk);
1892 struct scm_cookie scm;
1893 struct sk_buff *skb;
1900 err = scm_send(sock, msg, &scm, false);
1905 if (msg->msg_flags&MSG_OOB)
1908 if (msg->msg_namelen) {
1909 err = unix_validate_addr(sunaddr, msg->msg_namelen);
1915 other = unix_peer_get(sk);
1920 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1921 err = unix_autobind(sk);
1927 if (len > sk->sk_sndbuf - 32)
1930 if (len > SKB_MAX_ALLOC) {
1931 data_len = min_t(size_t,
1932 len - SKB_MAX_ALLOC,
1933 MAX_SKB_FRAGS * PAGE_SIZE);
1934 data_len = PAGE_ALIGN(data_len);
1936 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1939 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1940 msg->msg_flags & MSG_DONTWAIT, &err,
1941 PAGE_ALLOC_COSTLY_ORDER);
1945 err = unix_scm_to_skb(&scm, skb, true);
1949 skb_put(skb, len - data_len);
1950 skb->data_len = data_len;
1952 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1956 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1961 if (sunaddr == NULL)
1964 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1966 if (IS_ERR(other)) {
1967 err = PTR_ERR(other);
1973 if (sk_filter(other, skb) < 0) {
1974 /* Toss the packet but do not return any error to the sender */
1980 unix_state_lock(other);
1983 if (!unix_may_send(sk, other))
1986 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1988 * Check with 1003.1g - what should
1991 unix_state_unlock(other);
1995 unix_state_lock(sk);
1998 if (sk->sk_type == SOCK_SEQPACKET) {
1999 /* We are here only when racing with unix_release_sock()
2000 * is clearing @other. Never change state to TCP_CLOSE
2001 * unlike SOCK_DGRAM wants.
2003 unix_state_unlock(sk);
2005 } else if (unix_peer(sk) == other) {
2006 unix_peer(sk) = NULL;
2007 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2009 sk->sk_state = TCP_CLOSE;
2010 unix_state_unlock(sk);
2012 unix_dgram_disconnected(sk, other);
2014 err = -ECONNREFUSED;
2016 unix_state_unlock(sk);
2026 if (other->sk_shutdown & RCV_SHUTDOWN)
2029 if (sk->sk_type != SOCK_SEQPACKET) {
2030 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2035 /* other == sk && unix_peer(other) != sk if
2036 * - unix_peer(sk) == NULL, destination address bound to sk
2037 * - unix_peer(sk) == sk by time of get but disconnected before lock
2040 unlikely(unix_peer(other) != sk &&
2041 unix_recvq_full_lockless(other))) {
2043 timeo = unix_wait_for_peer(other, timeo);
2045 err = sock_intr_errno(timeo);
2046 if (signal_pending(current))
2053 unix_state_unlock(other);
2054 unix_state_double_lock(sk, other);
2057 if (unix_peer(sk) != other ||
2058 unix_dgram_peer_wake_me(sk, other)) {
2066 goto restart_locked;
2070 if (unlikely(sk_locked))
2071 unix_state_unlock(sk);
2073 if (sock_flag(other, SOCK_RCVTSTAMP))
2074 __net_timestamp(skb);
2075 maybe_add_creds(skb, sock, other);
2076 scm_stat_add(other, skb);
2077 skb_queue_tail(&other->sk_receive_queue, skb);
2078 unix_state_unlock(other);
2079 other->sk_data_ready(other);
2086 unix_state_unlock(sk);
2087 unix_state_unlock(other);
2097 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2098 * bytes, and a minimum of a full page.
2100 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2102 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2103 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2104 struct scm_cookie *scm, bool fds_sent)
2106 struct unix_sock *ousk = unix_sk(other);
2107 struct sk_buff *skb;
2110 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2115 err = unix_scm_to_skb(scm, skb, !fds_sent);
2121 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2128 unix_state_lock(other);
2130 if (sock_flag(other, SOCK_DEAD) ||
2131 (other->sk_shutdown & RCV_SHUTDOWN)) {
2132 unix_state_unlock(other);
2137 maybe_add_creds(skb, sock, other);
2141 consume_skb(ousk->oob_skb);
2143 WRITE_ONCE(ousk->oob_skb, skb);
2145 scm_stat_add(other, skb);
2146 skb_queue_tail(&other->sk_receive_queue, skb);
2147 sk_send_sigurg(other);
2148 unix_state_unlock(other);
2149 other->sk_data_ready(other);
2155 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2158 struct sock *sk = sock->sk;
2159 struct sock *other = NULL;
2161 struct sk_buff *skb;
2163 struct scm_cookie scm;
2164 bool fds_sent = false;
2168 err = scm_send(sock, msg, &scm, false);
2173 if (msg->msg_flags & MSG_OOB) {
2174 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2182 if (msg->msg_namelen) {
2183 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2187 other = unix_peer(sk);
2192 if (sk->sk_shutdown & SEND_SHUTDOWN)
2195 while (sent < len) {
2198 /* Keep two messages in the pipe so it schedules better */
2199 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2201 /* allow fallback to order-0 allocations */
2202 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2204 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2206 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2208 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2209 msg->msg_flags & MSG_DONTWAIT, &err,
2210 get_order(UNIX_SKB_FRAGS_SZ));
2214 /* Only send the fds in the first buffer */
2215 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2222 skb_put(skb, size - data_len);
2223 skb->data_len = data_len;
2225 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2231 unix_state_lock(other);
2233 if (sock_flag(other, SOCK_DEAD) ||
2234 (other->sk_shutdown & RCV_SHUTDOWN))
2237 maybe_add_creds(skb, sock, other);
2238 scm_stat_add(other, skb);
2239 skb_queue_tail(&other->sk_receive_queue, skb);
2240 unix_state_unlock(other);
2241 other->sk_data_ready(other);
2245 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2246 if (msg->msg_flags & MSG_OOB) {
2247 err = queue_oob(sock, msg, other, &scm, fds_sent);
2259 unix_state_unlock(other);
2262 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2263 send_sig(SIGPIPE, current, 0);
2267 return sent ? : err;
2270 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2271 int offset, size_t size, int flags)
2274 bool send_sigpipe = false;
2275 bool init_scm = true;
2276 struct scm_cookie scm;
2277 struct sock *other, *sk = socket->sk;
2278 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2280 if (flags & MSG_OOB)
2283 other = unix_peer(sk);
2284 if (!other || sk->sk_state != TCP_ESTABLISHED)
2289 spin_unlock(&other->sk_receive_queue.lock);
2290 unix_state_unlock(other);
2291 mutex_unlock(&unix_sk(other)->iolock);
2292 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2298 /* we must acquire iolock as we modify already present
2299 * skbs in the sk_receive_queue and mess with skb->len
2301 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2303 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2307 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2309 send_sigpipe = true;
2313 unix_state_lock(other);
2315 if (sock_flag(other, SOCK_DEAD) ||
2316 other->sk_shutdown & RCV_SHUTDOWN) {
2318 send_sigpipe = true;
2319 goto err_state_unlock;
2323 err = maybe_init_creds(&scm, socket, other);
2325 goto err_state_unlock;
2329 spin_lock(&other->sk_receive_queue.lock);
2330 skb = skb_peek_tail(&other->sk_receive_queue);
2331 if (tail && tail == skb) {
2333 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2340 } else if (newskb) {
2341 /* this is fast path, we don't necessarily need to
2342 * call to kfree_skb even though with newskb == NULL
2343 * this - does no harm
2345 consume_skb(newskb);
2349 if (skb_append_pagefrags(skb, page, offset, size)) {
2355 skb->data_len += size;
2356 skb->truesize += size;
2357 refcount_add(size, &sk->sk_wmem_alloc);
2360 unix_scm_to_skb(&scm, skb, false);
2361 __skb_queue_tail(&other->sk_receive_queue, newskb);
2364 spin_unlock(&other->sk_receive_queue.lock);
2365 unix_state_unlock(other);
2366 mutex_unlock(&unix_sk(other)->iolock);
2368 other->sk_data_ready(other);
2373 unix_state_unlock(other);
2375 mutex_unlock(&unix_sk(other)->iolock);
2378 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2379 send_sig(SIGPIPE, current, 0);
2385 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2389 struct sock *sk = sock->sk;
2391 err = sock_error(sk);
2395 if (sk->sk_state != TCP_ESTABLISHED)
2398 if (msg->msg_namelen)
2399 msg->msg_namelen = 0;
2401 return unix_dgram_sendmsg(sock, msg, len);
2404 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2405 size_t size, int flags)
2407 struct sock *sk = sock->sk;
2409 if (sk->sk_state != TCP_ESTABLISHED)
2412 return unix_dgram_recvmsg(sock, msg, size, flags);
2415 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2417 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2420 msg->msg_namelen = addr->len;
2421 memcpy(msg->msg_name, addr->name, addr->len);
2425 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2428 struct scm_cookie scm;
2429 struct socket *sock = sk->sk_socket;
2430 struct unix_sock *u = unix_sk(sk);
2431 struct sk_buff *skb, *last;
2440 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2443 mutex_lock(&u->iolock);
2445 skip = sk_peek_offset(sk, flags);
2446 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2447 &skip, &err, &last);
2449 if (!(flags & MSG_PEEK))
2450 scm_stat_del(sk, skb);
2454 mutex_unlock(&u->iolock);
2459 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2460 &err, &timeo, last));
2462 if (!skb) { /* implies iolock unlocked */
2463 unix_state_lock(sk);
2464 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2465 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2466 (sk->sk_shutdown & RCV_SHUTDOWN))
2468 unix_state_unlock(sk);
2472 if (wq_has_sleeper(&u->peer_wait))
2473 wake_up_interruptible_sync_poll(&u->peer_wait,
2474 EPOLLOUT | EPOLLWRNORM |
2478 unix_copy_addr(msg, skb->sk);
2480 if (size > skb->len - skip)
2481 size = skb->len - skip;
2482 else if (size < skb->len - skip)
2483 msg->msg_flags |= MSG_TRUNC;
2485 err = skb_copy_datagram_msg(skb, skip, msg, size);
2489 if (sock_flag(sk, SOCK_RCVTSTAMP))
2490 __sock_recv_timestamp(msg, sk, skb);
2492 memset(&scm, 0, sizeof(scm));
2494 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2495 unix_set_secdata(&scm, skb);
2497 if (!(flags & MSG_PEEK)) {
2499 unix_detach_fds(&scm, skb);
2501 sk_peek_offset_bwd(sk, skb->len);
2503 /* It is questionable: on PEEK we could:
2504 - do not return fds - good, but too simple 8)
2505 - return fds, and do not return them on read (old strategy,
2507 - clone fds (I chose it for now, it is the most universal
2510 POSIX 1003.1g does not actually define this clearly
2511 at all. POSIX 1003.1g doesn't define a lot of things
2516 sk_peek_offset_fwd(sk, size);
2519 unix_peek_fds(&scm, skb);
2521 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2523 scm_recv(sock, msg, &scm, flags);
2526 skb_free_datagram(sk, skb);
2527 mutex_unlock(&u->iolock);
2532 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2535 struct sock *sk = sock->sk;
2537 #ifdef CONFIG_BPF_SYSCALL
2538 const struct proto *prot = READ_ONCE(sk->sk_prot);
2540 if (prot != &unix_dgram_proto)
2541 return prot->recvmsg(sk, msg, size, flags, NULL);
2543 return __unix_dgram_recvmsg(sk, msg, size, flags);
2546 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2548 struct unix_sock *u = unix_sk(sk);
2549 struct sk_buff *skb;
2552 mutex_lock(&u->iolock);
2553 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2554 mutex_unlock(&u->iolock);
2558 return recv_actor(sk, skb);
2562 * Sleep until more data has arrived. But check for races..
2564 static long unix_stream_data_wait(struct sock *sk, long timeo,
2565 struct sk_buff *last, unsigned int last_len,
2568 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2569 struct sk_buff *tail;
2572 unix_state_lock(sk);
2575 prepare_to_wait(sk_sleep(sk), &wait, state);
2577 tail = skb_peek_tail(&sk->sk_receive_queue);
2579 (tail && tail->len != last_len) ||
2581 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2582 signal_pending(current) ||
2586 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2587 unix_state_unlock(sk);
2588 timeo = schedule_timeout(timeo);
2589 unix_state_lock(sk);
2591 if (sock_flag(sk, SOCK_DEAD))
2594 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2597 finish_wait(sk_sleep(sk), &wait);
2598 unix_state_unlock(sk);
2602 static unsigned int unix_skb_len(const struct sk_buff *skb)
2604 return skb->len - UNIXCB(skb).consumed;
2607 struct unix_stream_read_state {
2608 int (*recv_actor)(struct sk_buff *, int, int,
2609 struct unix_stream_read_state *);
2610 struct socket *socket;
2612 struct pipe_inode_info *pipe;
2615 unsigned int splice_flags;
2618 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2619 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2621 struct socket *sock = state->socket;
2622 struct sock *sk = sock->sk;
2623 struct unix_sock *u = unix_sk(sk);
2625 struct sk_buff *oob_skb;
2627 mutex_lock(&u->iolock);
2628 unix_state_lock(sk);
2630 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2631 unix_state_unlock(sk);
2632 mutex_unlock(&u->iolock);
2636 oob_skb = u->oob_skb;
2638 if (!(state->flags & MSG_PEEK))
2639 WRITE_ONCE(u->oob_skb, NULL);
2642 unix_state_unlock(sk);
2644 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2646 if (!(state->flags & MSG_PEEK))
2647 UNIXCB(oob_skb).consumed += 1;
2649 consume_skb(oob_skb);
2651 mutex_unlock(&u->iolock);
2656 state->msg->msg_flags |= MSG_OOB;
2660 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2661 int flags, int copied)
2663 struct unix_sock *u = unix_sk(sk);
2665 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2666 skb_unlink(skb, &sk->sk_receive_queue);
2670 if (skb == u->oob_skb) {
2673 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2674 if (!(flags & MSG_PEEK)) {
2675 WRITE_ONCE(u->oob_skb, NULL);
2678 } else if (flags & MSG_PEEK) {
2681 skb_unlink(skb, &sk->sk_receive_queue);
2682 WRITE_ONCE(u->oob_skb, NULL);
2683 if (!WARN_ON_ONCE(skb_unref(skb)))
2685 skb = skb_peek(&sk->sk_receive_queue);
2693 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2695 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2698 return unix_read_skb(sk, recv_actor);
2701 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2704 struct scm_cookie scm;
2705 struct socket *sock = state->socket;
2706 struct sock *sk = sock->sk;
2707 struct unix_sock *u = unix_sk(sk);
2709 int flags = state->flags;
2710 int noblock = flags & MSG_DONTWAIT;
2711 bool check_creds = false;
2716 size_t size = state->size;
2717 unsigned int last_len;
2719 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2724 if (unlikely(flags & MSG_OOB)) {
2726 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2727 err = unix_stream_recv_urg(state);
2732 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2733 timeo = sock_rcvtimeo(sk, noblock);
2735 memset(&scm, 0, sizeof(scm));
2737 /* Lock the socket to prevent queue disordering
2738 * while sleeps in memcpy_tomsg
2740 mutex_lock(&u->iolock);
2742 skip = max(sk_peek_offset(sk, flags), 0);
2747 struct sk_buff *skb, *last;
2750 unix_state_lock(sk);
2751 if (sock_flag(sk, SOCK_DEAD)) {
2755 last = skb = skb_peek(&sk->sk_receive_queue);
2756 last_len = last ? last->len : 0;
2759 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2761 skb = manage_oob(skb, sk, flags, copied);
2762 if (!skb && copied) {
2763 unix_state_unlock(sk);
2769 if (copied >= target)
2773 * POSIX 1003.1g mandates this order.
2776 err = sock_error(sk);
2779 if (sk->sk_shutdown & RCV_SHUTDOWN)
2782 unix_state_unlock(sk);
2788 mutex_unlock(&u->iolock);
2790 timeo = unix_stream_data_wait(sk, timeo, last,
2791 last_len, freezable);
2793 if (signal_pending(current)) {
2794 err = sock_intr_errno(timeo);
2799 mutex_lock(&u->iolock);
2802 unix_state_unlock(sk);
2806 while (skip >= unix_skb_len(skb)) {
2807 skip -= unix_skb_len(skb);
2809 last_len = skb->len;
2810 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2815 unix_state_unlock(sk);
2818 /* Never glue messages from different writers */
2819 if (!unix_skb_scm_eq(skb, &scm))
2821 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2822 /* Copy credentials */
2823 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2824 unix_set_secdata(&scm, skb);
2828 /* Copy address just once */
2829 if (state->msg && state->msg->msg_name) {
2830 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2831 state->msg->msg_name);
2832 unix_copy_addr(state->msg, skb->sk);
2836 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2838 chunk = state->recv_actor(skb, skip, chunk, state);
2839 drop_skb = !unix_skb_len(skb);
2840 /* skb is only safe to use if !drop_skb */
2851 /* the skb was touched by a concurrent reader;
2852 * we should not expect anything from this skb
2853 * anymore and assume it invalid - we can be
2854 * sure it was dropped from the socket queue
2856 * let's report a short read
2862 /* Mark read part of skb as used */
2863 if (!(flags & MSG_PEEK)) {
2864 UNIXCB(skb).consumed += chunk;
2866 sk_peek_offset_bwd(sk, chunk);
2868 if (UNIXCB(skb).fp) {
2869 scm_stat_del(sk, skb);
2870 unix_detach_fds(&scm, skb);
2873 if (unix_skb_len(skb))
2876 skb_unlink(skb, &sk->sk_receive_queue);
2882 /* It is questionable, see note in unix_dgram_recvmsg.
2885 unix_peek_fds(&scm, skb);
2887 sk_peek_offset_fwd(sk, chunk);
2894 last_len = skb->len;
2895 unix_state_lock(sk);
2896 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2899 unix_state_unlock(sk);
2904 mutex_unlock(&u->iolock);
2906 scm_recv(sock, state->msg, &scm, flags);
2910 return copied ? : err;
2913 static int unix_stream_read_actor(struct sk_buff *skb,
2914 int skip, int chunk,
2915 struct unix_stream_read_state *state)
2919 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2921 return ret ?: chunk;
2924 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2925 size_t size, int flags)
2927 struct unix_stream_read_state state = {
2928 .recv_actor = unix_stream_read_actor,
2929 .socket = sk->sk_socket,
2935 return unix_stream_read_generic(&state, true);
2938 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2939 size_t size, int flags)
2941 struct unix_stream_read_state state = {
2942 .recv_actor = unix_stream_read_actor,
2949 #ifdef CONFIG_BPF_SYSCALL
2950 struct sock *sk = sock->sk;
2951 const struct proto *prot = READ_ONCE(sk->sk_prot);
2953 if (prot != &unix_stream_proto)
2954 return prot->recvmsg(sk, msg, size, flags, NULL);
2956 return unix_stream_read_generic(&state, true);
2959 static int unix_stream_splice_actor(struct sk_buff *skb,
2960 int skip, int chunk,
2961 struct unix_stream_read_state *state)
2963 return skb_splice_bits(skb, state->socket->sk,
2964 UNIXCB(skb).consumed + skip,
2965 state->pipe, chunk, state->splice_flags);
2968 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2969 struct pipe_inode_info *pipe,
2970 size_t size, unsigned int flags)
2972 struct unix_stream_read_state state = {
2973 .recv_actor = unix_stream_splice_actor,
2977 .splice_flags = flags,
2980 if (unlikely(*ppos))
2983 if (sock->file->f_flags & O_NONBLOCK ||
2984 flags & SPLICE_F_NONBLOCK)
2985 state.flags = MSG_DONTWAIT;
2987 return unix_stream_read_generic(&state, false);
2990 static int unix_shutdown(struct socket *sock, int mode)
2992 struct sock *sk = sock->sk;
2995 if (mode < SHUT_RD || mode > SHUT_RDWR)
2998 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2999 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
3000 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3004 unix_state_lock(sk);
3005 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
3006 other = unix_peer(sk);
3009 unix_state_unlock(sk);
3010 sk->sk_state_change(sk);
3013 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3016 const struct proto *prot = READ_ONCE(other->sk_prot);
3019 prot->unhash(other);
3020 if (mode&RCV_SHUTDOWN)
3021 peer_mode |= SEND_SHUTDOWN;
3022 if (mode&SEND_SHUTDOWN)
3023 peer_mode |= RCV_SHUTDOWN;
3024 unix_state_lock(other);
3025 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3026 unix_state_unlock(other);
3027 other->sk_state_change(other);
3028 if (peer_mode == SHUTDOWN_MASK)
3029 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3030 else if (peer_mode & RCV_SHUTDOWN)
3031 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3039 long unix_inq_len(struct sock *sk)
3041 struct sk_buff *skb;
3044 if (sk->sk_state == TCP_LISTEN)
3047 spin_lock(&sk->sk_receive_queue.lock);
3048 if (sk->sk_type == SOCK_STREAM ||
3049 sk->sk_type == SOCK_SEQPACKET) {
3050 skb_queue_walk(&sk->sk_receive_queue, skb)
3051 amount += unix_skb_len(skb);
3053 skb = skb_peek(&sk->sk_receive_queue);
3057 spin_unlock(&sk->sk_receive_queue.lock);
3061 EXPORT_SYMBOL_GPL(unix_inq_len);
3063 long unix_outq_len(struct sock *sk)
3065 return sk_wmem_alloc_get(sk);
3067 EXPORT_SYMBOL_GPL(unix_outq_len);
3069 static int unix_open_file(struct sock *sk)
3075 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3078 if (!smp_load_acquire(&unix_sk(sk)->addr))
3081 path = unix_sk(sk)->path;
3087 fd = get_unused_fd_flags(O_CLOEXEC);
3091 f = dentry_open(&path, O_PATH, current_cred());
3105 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3107 struct sock *sk = sock->sk;
3113 amount = unix_outq_len(sk);
3114 err = put_user(amount, (int __user *)arg);
3117 amount = unix_inq_len(sk);
3121 err = put_user(amount, (int __user *)arg);
3124 err = unix_open_file(sk);
3126 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3129 struct sk_buff *skb;
3132 skb = skb_peek(&sk->sk_receive_queue);
3133 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3135 err = put_user(answ, (int __user *)arg);
3146 #ifdef CONFIG_COMPAT
3147 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3149 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3153 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3155 struct sock *sk = sock->sk;
3159 sock_poll_wait(file, sock, wait);
3161 shutdown = READ_ONCE(sk->sk_shutdown);
3163 /* exceptional events? */
3166 if (shutdown == SHUTDOWN_MASK)
3168 if (shutdown & RCV_SHUTDOWN)
3169 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3172 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3173 mask |= EPOLLIN | EPOLLRDNORM;
3174 if (sk_is_readable(sk))
3175 mask |= EPOLLIN | EPOLLRDNORM;
3176 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3177 if (READ_ONCE(unix_sk(sk)->oob_skb))
3181 /* Connection-based need to check for termination and startup */
3182 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3183 sk->sk_state == TCP_CLOSE)
3187 * we set writable also when the other side has shut down the
3188 * connection. This prevents stuck sockets.
3190 if (unix_writable(sk))
3191 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3196 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3199 struct sock *sk = sock->sk, *other;
3200 unsigned int writable;
3204 sock_poll_wait(file, sock, wait);
3206 shutdown = READ_ONCE(sk->sk_shutdown);
3208 /* exceptional events? */
3209 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3211 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3213 if (shutdown & RCV_SHUTDOWN)
3214 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3215 if (shutdown == SHUTDOWN_MASK)
3219 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3220 mask |= EPOLLIN | EPOLLRDNORM;
3221 if (sk_is_readable(sk))
3222 mask |= EPOLLIN | EPOLLRDNORM;
3224 /* Connection-based need to check for termination and startup */
3225 if (sk->sk_type == SOCK_SEQPACKET) {
3226 if (sk->sk_state == TCP_CLOSE)
3228 /* connection hasn't started yet? */
3229 if (sk->sk_state == TCP_SYN_SENT)
3233 /* No write status requested, avoid expensive OUT tests. */
3234 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3237 writable = unix_writable(sk);
3239 unix_state_lock(sk);
3241 other = unix_peer(sk);
3242 if (other && unix_peer(other) != sk &&
3243 unix_recvq_full_lockless(other) &&
3244 unix_dgram_peer_wake_me(sk, other))
3247 unix_state_unlock(sk);
3251 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3253 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3258 #ifdef CONFIG_PROC_FS
3260 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3262 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3263 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3264 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3266 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3268 unsigned long offset = get_offset(*pos);
3269 unsigned long bucket = get_bucket(*pos);
3270 unsigned long count = 0;
3273 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3274 sk; sk = sk_next(sk)) {
3275 if (++count == offset)
3282 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3284 unsigned long bucket = get_bucket(*pos);
3285 struct net *net = seq_file_net(seq);
3288 while (bucket < UNIX_HASH_SIZE) {
3289 spin_lock(&net->unx.table.locks[bucket]);
3291 sk = unix_from_bucket(seq, pos);
3295 spin_unlock(&net->unx.table.locks[bucket]);
3297 *pos = set_bucket_offset(++bucket, 1);
3303 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3306 unsigned long bucket = get_bucket(*pos);
3313 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3315 *pos = set_bucket_offset(++bucket, 1);
3317 return unix_get_first(seq, pos);
3320 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3323 return SEQ_START_TOKEN;
3325 return unix_get_first(seq, pos);
3328 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3332 if (v == SEQ_START_TOKEN)
3333 return unix_get_first(seq, pos);
3335 return unix_get_next(seq, v, pos);
3338 static void unix_seq_stop(struct seq_file *seq, void *v)
3340 struct sock *sk = v;
3343 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3346 static int unix_seq_show(struct seq_file *seq, void *v)
3349 if (v == SEQ_START_TOKEN)
3350 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3354 struct unix_sock *u = unix_sk(s);
3357 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3359 refcount_read(&s->sk_refcnt),
3361 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3364 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3365 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3368 if (u->addr) { // under a hash table lock here
3373 len = u->addr->len -
3374 offsetof(struct sockaddr_un, sun_path);
3375 if (u->addr->name->sun_path[0]) {
3381 for ( ; i < len; i++)
3382 seq_putc(seq, u->addr->name->sun_path[i] ?:
3385 unix_state_unlock(s);
3386 seq_putc(seq, '\n');
3392 static const struct seq_operations unix_seq_ops = {
3393 .start = unix_seq_start,
3394 .next = unix_seq_next,
3395 .stop = unix_seq_stop,
3396 .show = unix_seq_show,
3399 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3400 struct bpf_unix_iter_state {
3401 struct seq_net_private p;
3402 unsigned int cur_sk;
3403 unsigned int end_sk;
3404 unsigned int max_sk;
3405 struct sock **batch;
3406 bool st_bucket_done;
3409 struct bpf_iter__unix {
3410 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3411 __bpf_md_ptr(struct unix_sock *, unix_sk);
3412 uid_t uid __aligned(8);
3415 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3416 struct unix_sock *unix_sk, uid_t uid)
3418 struct bpf_iter__unix ctx;
3420 meta->seq_num--; /* skip SEQ_START_TOKEN */
3422 ctx.unix_sk = unix_sk;
3424 return bpf_iter_run_prog(prog, &ctx);
3427 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3430 struct bpf_unix_iter_state *iter = seq->private;
3431 unsigned int expected = 1;
3434 sock_hold(start_sk);
3435 iter->batch[iter->end_sk++] = start_sk;
3437 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3438 if (iter->end_sk < iter->max_sk) {
3440 iter->batch[iter->end_sk++] = sk;
3446 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3451 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3453 while (iter->cur_sk < iter->end_sk)
3454 sock_put(iter->batch[iter->cur_sk++]);
3457 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3458 unsigned int new_batch_sz)
3460 struct sock **new_batch;
3462 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3463 GFP_USER | __GFP_NOWARN);
3467 bpf_iter_unix_put_batch(iter);
3468 kvfree(iter->batch);
3469 iter->batch = new_batch;
3470 iter->max_sk = new_batch_sz;
3475 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3478 struct bpf_unix_iter_state *iter = seq->private;
3479 unsigned int expected;
3480 bool resized = false;
3483 if (iter->st_bucket_done)
3484 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3487 /* Get a new batch */
3491 sk = unix_get_first(seq, pos);
3493 return NULL; /* Done */
3495 expected = bpf_iter_unix_hold_batch(seq, sk);
3497 if (iter->end_sk == expected) {
3498 iter->st_bucket_done = true;
3502 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3510 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3513 return SEQ_START_TOKEN;
3515 /* bpf iter does not support lseek, so it always
3516 * continue from where it was stop()-ped.
3518 return bpf_iter_unix_batch(seq, pos);
3521 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3523 struct bpf_unix_iter_state *iter = seq->private;
3526 /* Whenever seq_next() is called, the iter->cur_sk is
3527 * done with seq_show(), so advance to the next sk in
3530 if (iter->cur_sk < iter->end_sk)
3531 sock_put(iter->batch[iter->cur_sk++]);
3535 if (iter->cur_sk < iter->end_sk)
3536 sk = iter->batch[iter->cur_sk];
3538 sk = bpf_iter_unix_batch(seq, pos);
3543 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3545 struct bpf_iter_meta meta;
3546 struct bpf_prog *prog;
3547 struct sock *sk = v;
3552 if (v == SEQ_START_TOKEN)
3555 slow = lock_sock_fast(sk);
3557 if (unlikely(sk_unhashed(sk))) {
3562 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3564 prog = bpf_iter_get_info(&meta, false);
3565 ret = unix_prog_seq_show(prog, &meta, v, uid);
3567 unlock_sock_fast(sk, slow);
3571 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3573 struct bpf_unix_iter_state *iter = seq->private;
3574 struct bpf_iter_meta meta;
3575 struct bpf_prog *prog;
3579 prog = bpf_iter_get_info(&meta, true);
3581 (void)unix_prog_seq_show(prog, &meta, v, 0);
3584 if (iter->cur_sk < iter->end_sk)
3585 bpf_iter_unix_put_batch(iter);
3588 static const struct seq_operations bpf_iter_unix_seq_ops = {
3589 .start = bpf_iter_unix_seq_start,
3590 .next = bpf_iter_unix_seq_next,
3591 .stop = bpf_iter_unix_seq_stop,
3592 .show = bpf_iter_unix_seq_show,
3597 static const struct net_proto_family unix_family_ops = {
3599 .create = unix_create,
3600 .owner = THIS_MODULE,
3604 static int __net_init unix_net_init(struct net *net)
3608 net->unx.sysctl_max_dgram_qlen = 10;
3609 if (unix_sysctl_register(net))
3612 #ifdef CONFIG_PROC_FS
3613 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3614 sizeof(struct seq_net_private)))
3618 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3619 sizeof(spinlock_t), GFP_KERNEL);
3620 if (!net->unx.table.locks)
3623 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3624 sizeof(struct hlist_head),
3626 if (!net->unx.table.buckets)
3629 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3630 spin_lock_init(&net->unx.table.locks[i]);
3631 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3637 kvfree(net->unx.table.locks);
3639 #ifdef CONFIG_PROC_FS
3640 remove_proc_entry("unix", net->proc_net);
3643 unix_sysctl_unregister(net);
3648 static void __net_exit unix_net_exit(struct net *net)
3650 kvfree(net->unx.table.buckets);
3651 kvfree(net->unx.table.locks);
3652 unix_sysctl_unregister(net);
3653 remove_proc_entry("unix", net->proc_net);
3656 static struct pernet_operations unix_net_ops = {
3657 .init = unix_net_init,
3658 .exit = unix_net_exit,
3661 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3662 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3663 struct unix_sock *unix_sk, uid_t uid)
3665 #define INIT_BATCH_SZ 16
3667 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3669 struct bpf_unix_iter_state *iter = priv_data;
3672 err = bpf_iter_init_seq_net(priv_data, aux);
3676 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3678 bpf_iter_fini_seq_net(priv_data);
3685 static void bpf_iter_fini_unix(void *priv_data)
3687 struct bpf_unix_iter_state *iter = priv_data;
3689 bpf_iter_fini_seq_net(priv_data);
3690 kvfree(iter->batch);
3693 static const struct bpf_iter_seq_info unix_seq_info = {
3694 .seq_ops = &bpf_iter_unix_seq_ops,
3695 .init_seq_private = bpf_iter_init_unix,
3696 .fini_seq_private = bpf_iter_fini_unix,
3697 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3700 static const struct bpf_func_proto *
3701 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3702 const struct bpf_prog *prog)
3705 case BPF_FUNC_setsockopt:
3706 return &bpf_sk_setsockopt_proto;
3707 case BPF_FUNC_getsockopt:
3708 return &bpf_sk_getsockopt_proto;
3714 static struct bpf_iter_reg unix_reg_info = {
3716 .ctx_arg_info_size = 1,
3718 { offsetof(struct bpf_iter__unix, unix_sk),
3719 PTR_TO_BTF_ID_OR_NULL },
3721 .get_func_proto = bpf_iter_unix_get_func_proto,
3722 .seq_info = &unix_seq_info,
3725 static void __init bpf_iter_register(void)
3727 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3728 if (bpf_iter_reg_target(&unix_reg_info))
3729 pr_warn("Warning: could not register bpf iterator unix\n");
3733 static int __init af_unix_init(void)
3737 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3739 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3740 spin_lock_init(&bsd_socket_locks[i]);
3741 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3744 rc = proto_register(&unix_dgram_proto, 1);
3746 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3750 rc = proto_register(&unix_stream_proto, 1);
3752 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3753 proto_unregister(&unix_dgram_proto);
3757 sock_register(&unix_family_ops);
3758 register_pernet_subsys(&unix_net_ops);
3759 unix_bpf_build_proto();
3761 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3762 bpf_iter_register();
3769 static void __exit af_unix_exit(void)
3771 sock_unregister(PF_UNIX);
3772 proto_unregister(&unix_dgram_proto);
3773 proto_unregister(&unix_stream_proto);
3774 unregister_pernet_subsys(&unix_net_ops);
3777 /* Earlier than device_initcall() so that other drivers invoking
3778 request_module() don't end up in a loop when modprobe tries
3779 to use a UNIX socket. But later than subsys_initcall() because
3780 we depend on stuff initialised there */
3781 fs_initcall(af_unix_init);
3782 module_exit(af_unix_exit);
3784 MODULE_LICENSE("GPL");
3785 MODULE_ALIAS_NETPROTO(PF_UNIX);