1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
126 static struct hlist_head *unix_sockets_unbound(void *addr)
128 unsigned long hash = (unsigned long)addr;
132 hash %= UNIX_HASH_SIZE;
133 return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 UNIXCB(skb).secid = scm->secid;
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146 scm->secid = UNIXCB(skb).secid;
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151 return (scm->secid == UNIXCB(skb).secid);
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164 #endif /* CONFIG_SECURITY_NETWORK */
167 * SMP locking strategy:
168 * hash table is protected with spinlock unix_table_lock
169 * each socket state is protected by separate spin lock.
172 static inline unsigned int unix_hash_fold(__wsum n)
174 unsigned int hash = (__force unsigned int)csum_fold(n);
177 return hash&(UNIX_HASH_SIZE-1);
180 #define unix_peer(sk) (unix_sk(sk)->peer)
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184 return unix_peer(osk) == sk;
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192 static inline int unix_recvq_full(const struct sock *sk)
194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
199 return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 READ_ONCE(sk->sk_max_ack_backlog);
203 struct sock *unix_peer_get(struct sock *s)
211 unix_state_unlock(s);
214 EXPORT_SYMBOL_GPL(unix_peer_get);
216 static inline void unix_release_addr(struct unix_address *addr)
218 if (refcount_dec_and_test(&addr->refcnt))
223 * Check unix socket name:
224 * - should be not zero length.
225 * - if started by not zero, should be NULL terminated (FS object)
226 * - if started by zero, it is abstract name.
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
233 if (len <= sizeof(short) || len > sizeof(*sunaddr))
235 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
237 if (sunaddr->sun_path[0]) {
239 * This may look like an off by one error but it is a bit more
240 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 * sun_path[108] doesn't as such exist. However in kernel space
242 * we are guaranteed that it is a valid memory location in our
243 * kernel address buffer.
245 ((char *)sunaddr)[len] = 0;
246 len = strlen(sunaddr->sun_path)+1+sizeof(short);
250 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
254 static void __unix_remove_socket(struct sock *sk)
256 sk_del_node_init(sk);
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261 WARN_ON(!sk_unhashed(sk));
262 sk_add_node(sk, list);
265 static inline void unix_remove_socket(struct sock *sk)
267 spin_lock(&unix_table_lock);
268 __unix_remove_socket(sk);
269 spin_unlock(&unix_table_lock);
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
274 spin_lock(&unix_table_lock);
275 __unix_insert_socket(list, sk);
276 spin_unlock(&unix_table_lock);
279 static struct sock *__unix_find_socket_byname(struct net *net,
280 struct sockaddr_un *sunname,
281 int len, int type, unsigned int hash)
285 sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 struct unix_sock *u = unix_sk(s);
288 if (!net_eq(sock_net(s), net))
291 if (u->addr->len == len &&
292 !memcmp(u->addr->name, sunname, len))
300 static inline struct sock *unix_find_socket_byname(struct net *net,
301 struct sockaddr_un *sunname,
307 spin_lock(&unix_table_lock);
308 s = __unix_find_socket_byname(net, sunname, len, type, hash);
311 spin_unlock(&unix_table_lock);
315 static struct sock *unix_find_socket_byinode(struct inode *i)
319 spin_lock(&unix_table_lock);
321 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
322 struct dentry *dentry = unix_sk(s)->path.dentry;
324 if (dentry && d_backing_inode(dentry) == i) {
331 spin_unlock(&unix_table_lock);
335 /* Support code for asymmetrically connected dgram sockets
337 * If a datagram socket is connected to a socket not itself connected
338 * to the first socket (eg, /dev/log), clients may only enqueue more
339 * messages if the present receive queue of the server socket is not
340 * "too large". This means there's a second writeability condition
341 * poll and sendmsg need to test. The dgram recv code will do a wake
342 * up on the peer_wait wait queue of a socket upon reception of a
343 * datagram which needs to be propagated to sleeping would-be writers
344 * since these might not have sent anything so far. This can't be
345 * accomplished via poll_wait because the lifetime of the server
346 * socket might be less than that of its clients if these break their
347 * association with it or if the server socket is closed while clients
348 * are still connected to it and there's no way to inform "a polling
349 * implementation" that it should let go of a certain wait queue
351 * In order to propagate a wake up, a wait_queue_entry_t of the client
352 * socket is enqueued on the peer_wait queue of the server socket
353 * whose wake function does a wake_up on the ordinary client socket
354 * wait queue. This connection is established whenever a write (or
355 * poll for write) hit the flow control condition and broken when the
356 * association to the server socket is dissolved or after a wake up
360 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
364 wait_queue_head_t *u_sleep;
366 u = container_of(q, struct unix_sock, peer_wake);
368 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
370 u->peer_wake.private = NULL;
372 /* relaying can only happen while the wq still exists */
373 u_sleep = sk_sleep(&u->sk);
375 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
380 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
382 struct unix_sock *u, *u_other;
386 u_other = unix_sk(other);
388 spin_lock(&u_other->peer_wait.lock);
390 if (!u->peer_wake.private) {
391 u->peer_wake.private = other;
392 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
397 spin_unlock(&u_other->peer_wait.lock);
401 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
404 struct unix_sock *u, *u_other;
407 u_other = unix_sk(other);
408 spin_lock(&u_other->peer_wait.lock);
410 if (u->peer_wake.private == other) {
411 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
412 u->peer_wake.private = NULL;
415 spin_unlock(&u_other->peer_wait.lock);
418 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
421 unix_dgram_peer_wake_disconnect(sk, other);
422 wake_up_interruptible_poll(sk_sleep(sk),
429 * - unix_peer(sk) == other
430 * - association is stable
432 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
436 connected = unix_dgram_peer_wake_connect(sk, other);
438 /* If other is SOCK_DEAD, we want to make sure we signal
439 * POLLOUT, such that a subsequent write() can get a
440 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
441 * to other and its full, we will hang waiting for POLLOUT.
443 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
447 unix_dgram_peer_wake_disconnect(sk, other);
452 static int unix_writable(const struct sock *sk)
454 return sk->sk_state != TCP_LISTEN &&
455 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
458 static void unix_write_space(struct sock *sk)
460 struct socket_wq *wq;
463 if (unix_writable(sk)) {
464 wq = rcu_dereference(sk->sk_wq);
465 if (skwq_has_sleeper(wq))
466 wake_up_interruptible_sync_poll(&wq->wait,
467 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
468 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
473 /* When dgram socket disconnects (or changes its peer), we clear its receive
474 * queue of packets arrived from previous peer. First, it allows to do
475 * flow control based only on wmem_alloc; second, sk connected to peer
476 * may receive messages only from that peer. */
477 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
479 if (!skb_queue_empty(&sk->sk_receive_queue)) {
480 skb_queue_purge(&sk->sk_receive_queue);
481 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
483 /* If one link of bidirectional dgram pipe is disconnected,
484 * we signal error. Messages are lost. Do not make this,
485 * when peer was not connected to us.
487 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
488 other->sk_err = ECONNRESET;
489 other->sk_error_report(other);
494 static void unix_sock_destructor(struct sock *sk)
496 struct unix_sock *u = unix_sk(sk);
498 skb_queue_purge(&sk->sk_receive_queue);
500 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
501 WARN_ON(!sk_unhashed(sk));
502 WARN_ON(sk->sk_socket);
503 if (!sock_flag(sk, SOCK_DEAD)) {
504 pr_info("Attempt to release alive unix socket: %p\n", sk);
509 unix_release_addr(u->addr);
511 atomic_long_dec(&unix_nr_socks);
513 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
515 #ifdef UNIX_REFCNT_DEBUG
516 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
517 atomic_long_read(&unix_nr_socks));
521 static void unix_release_sock(struct sock *sk, int embrion)
523 struct unix_sock *u = unix_sk(sk);
529 unix_remove_socket(sk);
534 sk->sk_shutdown = SHUTDOWN_MASK;
536 u->path.dentry = NULL;
538 state = sk->sk_state;
539 sk->sk_state = TCP_CLOSE;
541 skpair = unix_peer(sk);
542 unix_peer(sk) = NULL;
544 unix_state_unlock(sk);
546 wake_up_interruptible_all(&u->peer_wait);
548 if (skpair != NULL) {
549 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
550 unix_state_lock(skpair);
552 skpair->sk_shutdown = SHUTDOWN_MASK;
553 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
554 skpair->sk_err = ECONNRESET;
555 unix_state_unlock(skpair);
556 skpair->sk_state_change(skpair);
557 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
560 unix_dgram_peer_wake_disconnect(sk, skpair);
561 sock_put(skpair); /* It may now die */
564 /* Try to flush out this socket. Throw out buffers at least */
566 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
567 if (state == TCP_LISTEN)
568 unix_release_sock(skb->sk, 1);
569 /* passed fds are erased in the kfree_skb hook */
570 UNIXCB(skb).consumed = skb->len;
579 /* ---- Socket is dead now and most probably destroyed ---- */
582 * Fixme: BSD difference: In BSD all sockets connected to us get
583 * ECONNRESET and we die on the spot. In Linux we behave
584 * like files and pipes do and wait for the last
587 * Can't we simply set sock->err?
589 * What the above comment does talk about? --ANK(980817)
592 if (unix_tot_inflight)
593 unix_gc(); /* Garbage collect fds */
596 static void init_peercred(struct sock *sk)
598 const struct cred *old_cred;
601 spin_lock(&sk->sk_peer_lock);
602 old_pid = sk->sk_peer_pid;
603 old_cred = sk->sk_peer_cred;
604 sk->sk_peer_pid = get_pid(task_tgid(current));
605 sk->sk_peer_cred = get_current_cred();
606 spin_unlock(&sk->sk_peer_lock);
612 static void copy_peercred(struct sock *sk, struct sock *peersk)
614 const struct cred *old_cred;
618 spin_lock(&sk->sk_peer_lock);
619 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
621 spin_lock(&peersk->sk_peer_lock);
622 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
624 old_pid = sk->sk_peer_pid;
625 old_cred = sk->sk_peer_cred;
626 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
627 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
629 spin_unlock(&sk->sk_peer_lock);
630 spin_unlock(&peersk->sk_peer_lock);
636 static int unix_listen(struct socket *sock, int backlog)
639 struct sock *sk = sock->sk;
640 struct unix_sock *u = unix_sk(sk);
641 struct pid *old_pid = NULL;
644 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
645 goto out; /* Only stream/seqpacket sockets accept */
648 goto out; /* No listens on an unbound socket */
650 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
652 if (backlog > sk->sk_max_ack_backlog)
653 wake_up_interruptible_all(&u->peer_wait);
654 sk->sk_max_ack_backlog = backlog;
655 sk->sk_state = TCP_LISTEN;
656 /* set credentials so connect can copy them */
661 unix_state_unlock(sk);
667 static int unix_release(struct socket *);
668 static int unix_bind(struct socket *, struct sockaddr *, int);
669 static int unix_stream_connect(struct socket *, struct sockaddr *,
670 int addr_len, int flags);
671 static int unix_socketpair(struct socket *, struct socket *);
672 static int unix_accept(struct socket *, struct socket *, int, bool);
673 static int unix_getname(struct socket *, struct sockaddr *, int);
674 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
675 static __poll_t unix_dgram_poll(struct file *, struct socket *,
677 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
679 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
681 static int unix_shutdown(struct socket *, int);
682 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
683 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
684 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
685 size_t size, int flags);
686 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
687 struct pipe_inode_info *, size_t size,
689 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
690 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
691 static int unix_dgram_connect(struct socket *, struct sockaddr *,
693 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
694 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
697 static int unix_set_peek_off(struct sock *sk, int val)
699 struct unix_sock *u = unix_sk(sk);
701 if (mutex_lock_interruptible(&u->iolock))
704 sk->sk_peek_off = val;
705 mutex_unlock(&u->iolock);
711 static const struct proto_ops unix_stream_ops = {
713 .owner = THIS_MODULE,
714 .release = unix_release,
716 .connect = unix_stream_connect,
717 .socketpair = unix_socketpair,
718 .accept = unix_accept,
719 .getname = unix_getname,
723 .compat_ioctl = unix_compat_ioctl,
725 .listen = unix_listen,
726 .shutdown = unix_shutdown,
727 .setsockopt = sock_no_setsockopt,
728 .getsockopt = sock_no_getsockopt,
729 .sendmsg = unix_stream_sendmsg,
730 .recvmsg = unix_stream_recvmsg,
731 .mmap = sock_no_mmap,
732 .sendpage = unix_stream_sendpage,
733 .splice_read = unix_stream_splice_read,
734 .set_peek_off = unix_set_peek_off,
737 static const struct proto_ops unix_dgram_ops = {
739 .owner = THIS_MODULE,
740 .release = unix_release,
742 .connect = unix_dgram_connect,
743 .socketpair = unix_socketpair,
744 .accept = sock_no_accept,
745 .getname = unix_getname,
746 .poll = unix_dgram_poll,
749 .compat_ioctl = unix_compat_ioctl,
751 .listen = sock_no_listen,
752 .shutdown = unix_shutdown,
753 .setsockopt = sock_no_setsockopt,
754 .getsockopt = sock_no_getsockopt,
755 .sendmsg = unix_dgram_sendmsg,
756 .recvmsg = unix_dgram_recvmsg,
757 .mmap = sock_no_mmap,
758 .sendpage = sock_no_sendpage,
759 .set_peek_off = unix_set_peek_off,
762 static const struct proto_ops unix_seqpacket_ops = {
764 .owner = THIS_MODULE,
765 .release = unix_release,
767 .connect = unix_stream_connect,
768 .socketpair = unix_socketpair,
769 .accept = unix_accept,
770 .getname = unix_getname,
771 .poll = unix_dgram_poll,
774 .compat_ioctl = unix_compat_ioctl,
776 .listen = unix_listen,
777 .shutdown = unix_shutdown,
778 .setsockopt = sock_no_setsockopt,
779 .getsockopt = sock_no_getsockopt,
780 .sendmsg = unix_seqpacket_sendmsg,
781 .recvmsg = unix_seqpacket_recvmsg,
782 .mmap = sock_no_mmap,
783 .sendpage = sock_no_sendpage,
784 .set_peek_off = unix_set_peek_off,
787 static struct proto unix_proto = {
789 .owner = THIS_MODULE,
790 .obj_size = sizeof(struct unix_sock),
793 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
795 struct sock *sk = NULL;
798 atomic_long_inc(&unix_nr_socks);
799 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
802 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
806 sock_init_data(sock, sk);
808 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
809 sk->sk_write_space = unix_write_space;
810 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
811 sk->sk_destruct = unix_sock_destructor;
813 u->path.dentry = NULL;
815 spin_lock_init(&u->lock);
816 atomic_long_set(&u->inflight, 0);
817 INIT_LIST_HEAD(&u->link);
818 mutex_init(&u->iolock); /* single task reading lock */
819 mutex_init(&u->bindlock); /* single task binding lock */
820 init_waitqueue_head(&u->peer_wait);
821 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
822 unix_insert_socket(unix_sockets_unbound(sk), sk);
825 atomic_long_dec(&unix_nr_socks);
828 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
834 static int unix_create(struct net *net, struct socket *sock, int protocol,
837 if (protocol && protocol != PF_UNIX)
838 return -EPROTONOSUPPORT;
840 sock->state = SS_UNCONNECTED;
842 switch (sock->type) {
844 sock->ops = &unix_stream_ops;
847 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
851 sock->type = SOCK_DGRAM;
854 sock->ops = &unix_dgram_ops;
857 sock->ops = &unix_seqpacket_ops;
860 return -ESOCKTNOSUPPORT;
863 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
866 static int unix_release(struct socket *sock)
868 struct sock *sk = sock->sk;
873 unix_release_sock(sk, 0);
879 static int unix_autobind(struct socket *sock)
881 struct sock *sk = sock->sk;
882 struct net *net = sock_net(sk);
883 struct unix_sock *u = unix_sk(sk);
884 static u32 ordernum = 1;
885 struct unix_address *addr;
887 unsigned int retries = 0;
889 err = mutex_lock_interruptible(&u->bindlock);
898 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
902 addr->name->sun_family = AF_UNIX;
903 refcount_set(&addr->refcnt, 1);
906 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
907 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
909 spin_lock(&unix_table_lock);
910 ordernum = (ordernum+1)&0xFFFFF;
912 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
914 spin_unlock(&unix_table_lock);
916 * __unix_find_socket_byname() may take long time if many names
917 * are already in use.
920 /* Give up if all names seems to be in use. */
921 if (retries++ == 0xFFFFF) {
928 addr->hash ^= sk->sk_type;
930 __unix_remove_socket(sk);
931 smp_store_release(&u->addr, addr);
932 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
933 spin_unlock(&unix_table_lock);
936 out: mutex_unlock(&u->bindlock);
940 static struct sock *unix_find_other(struct net *net,
941 struct sockaddr_un *sunname, int len,
942 int type, unsigned int hash, int *error)
948 if (sunname->sun_path[0]) {
950 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
953 inode = d_backing_inode(path.dentry);
954 err = inode_permission(inode, MAY_WRITE);
959 if (!S_ISSOCK(inode->i_mode))
961 u = unix_find_socket_byinode(inode);
965 if (u->sk_type == type)
971 if (u->sk_type != type) {
977 u = unix_find_socket_byname(net, sunname, len, type, hash);
979 struct dentry *dentry;
980 dentry = unix_sk(u)->path.dentry;
982 touch_atime(&unix_sk(u)->path);
995 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
997 struct dentry *dentry;
1001 * Get the parent directory, calculate the hash for last
1004 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1005 err = PTR_ERR(dentry);
1010 * All right, let's create it.
1012 err = security_path_mknod(&path, dentry, mode, 0);
1014 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1016 res->mnt = mntget(path.mnt);
1017 res->dentry = dget(dentry);
1020 done_path_create(&path, dentry);
1024 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1026 struct sock *sk = sock->sk;
1027 struct net *net = sock_net(sk);
1028 struct unix_sock *u = unix_sk(sk);
1029 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1030 char *sun_path = sunaddr->sun_path;
1033 struct unix_address *addr;
1034 struct hlist_head *list;
1035 struct path path = { };
1038 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1039 sunaddr->sun_family != AF_UNIX)
1042 if (addr_len == sizeof(short)) {
1043 err = unix_autobind(sock);
1047 err = unix_mkname(sunaddr, addr_len, &hash);
1053 umode_t mode = S_IFSOCK |
1054 (SOCK_INODE(sock)->i_mode & ~current_umask());
1055 err = unix_mknod(sun_path, mode, &path);
1063 err = mutex_lock_interruptible(&u->bindlock);
1072 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1076 memcpy(addr->name, sunaddr, addr_len);
1077 addr->len = addr_len;
1078 addr->hash = hash ^ sk->sk_type;
1079 refcount_set(&addr->refcnt, 1);
1082 addr->hash = UNIX_HASH_SIZE;
1083 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1084 spin_lock(&unix_table_lock);
1086 list = &unix_socket_table[hash];
1088 spin_lock(&unix_table_lock);
1090 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1091 sk->sk_type, hash)) {
1092 unix_release_addr(addr);
1096 list = &unix_socket_table[addr->hash];
1100 __unix_remove_socket(sk);
1101 smp_store_release(&u->addr, addr);
1102 __unix_insert_socket(list, sk);
1105 spin_unlock(&unix_table_lock);
1107 mutex_unlock(&u->bindlock);
1115 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1117 if (unlikely(sk1 == sk2) || !sk2) {
1118 unix_state_lock(sk1);
1122 unix_state_lock(sk1);
1123 unix_state_lock_nested(sk2);
1125 unix_state_lock(sk2);
1126 unix_state_lock_nested(sk1);
1130 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1132 if (unlikely(sk1 == sk2) || !sk2) {
1133 unix_state_unlock(sk1);
1136 unix_state_unlock(sk1);
1137 unix_state_unlock(sk2);
1140 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1141 int alen, int flags)
1143 struct sock *sk = sock->sk;
1144 struct net *net = sock_net(sk);
1145 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1151 if (alen < offsetofend(struct sockaddr, sa_family))
1154 if (addr->sa_family != AF_UNSPEC) {
1155 err = unix_mkname(sunaddr, alen, &hash);
1160 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1161 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1165 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1169 unix_state_double_lock(sk, other);
1171 /* Apparently VFS overslept socket death. Retry. */
1172 if (sock_flag(other, SOCK_DEAD)) {
1173 unix_state_double_unlock(sk, other);
1179 if (!unix_may_send(sk, other))
1182 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1188 * 1003.1g breaking connected state with AF_UNSPEC
1191 unix_state_double_lock(sk, other);
1195 * If it was connected, reconnect.
1197 if (unix_peer(sk)) {
1198 struct sock *old_peer = unix_peer(sk);
1199 unix_peer(sk) = other;
1200 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1202 unix_state_double_unlock(sk, other);
1204 if (other != old_peer)
1205 unix_dgram_disconnected(sk, old_peer);
1208 unix_peer(sk) = other;
1209 unix_state_double_unlock(sk, other);
1214 unix_state_double_unlock(sk, other);
1220 static long unix_wait_for_peer(struct sock *other, long timeo)
1222 struct unix_sock *u = unix_sk(other);
1226 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1228 sched = !sock_flag(other, SOCK_DEAD) &&
1229 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1230 unix_recvq_full(other);
1232 unix_state_unlock(other);
1235 timeo = schedule_timeout(timeo);
1237 finish_wait(&u->peer_wait, &wait);
1241 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1242 int addr_len, int flags)
1244 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1245 struct sock *sk = sock->sk;
1246 struct net *net = sock_net(sk);
1247 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1248 struct sock *newsk = NULL;
1249 struct sock *other = NULL;
1250 struct sk_buff *skb = NULL;
1256 err = unix_mkname(sunaddr, addr_len, &hash);
1261 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1262 (err = unix_autobind(sock)) != 0)
1265 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1267 /* First of all allocate resources.
1268 If we will make it after state is locked,
1269 we will have to recheck all again in any case.
1274 /* create new sock for complete connection */
1275 newsk = unix_create1(sock_net(sk), NULL, 0);
1279 /* Allocate skb for sending to listening sock */
1280 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1285 /* Find listening sock. */
1286 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1290 /* Latch state of peer */
1291 unix_state_lock(other);
1293 /* Apparently VFS overslept socket death. Retry. */
1294 if (sock_flag(other, SOCK_DEAD)) {
1295 unix_state_unlock(other);
1300 err = -ECONNREFUSED;
1301 if (other->sk_state != TCP_LISTEN)
1303 if (other->sk_shutdown & RCV_SHUTDOWN)
1306 if (unix_recvq_full(other)) {
1311 timeo = unix_wait_for_peer(other, timeo);
1313 err = sock_intr_errno(timeo);
1314 if (signal_pending(current))
1322 It is tricky place. We need to grab our state lock and cannot
1323 drop lock on peer. It is dangerous because deadlock is
1324 possible. Connect to self case and simultaneous
1325 attempt to connect are eliminated by checking socket
1326 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1327 check this before attempt to grab lock.
1329 Well, and we have to recheck the state after socket locked.
1335 /* This is ok... continue with connect */
1337 case TCP_ESTABLISHED:
1338 /* Socket is already connected */
1346 unix_state_lock_nested(sk);
1348 if (sk->sk_state != st) {
1349 unix_state_unlock(sk);
1350 unix_state_unlock(other);
1355 err = security_unix_stream_connect(sk, other, newsk);
1357 unix_state_unlock(sk);
1361 /* The way is open! Fastly set all the necessary fields... */
1364 unix_peer(newsk) = sk;
1365 newsk->sk_state = TCP_ESTABLISHED;
1366 newsk->sk_type = sk->sk_type;
1367 init_peercred(newsk);
1368 newu = unix_sk(newsk);
1369 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1370 otheru = unix_sk(other);
1372 /* copy address information from listening to new sock
1374 * The contents of *(otheru->addr) and otheru->path
1375 * are seen fully set up here, since we have found
1376 * otheru in hash under unix_table_lock. Insertion
1377 * into the hash chain we'd found it in had been done
1378 * in an earlier critical area protected by unix_table_lock,
1379 * the same one where we'd set *(otheru->addr) contents,
1380 * as well as otheru->path and otheru->addr itself.
1382 * Using smp_store_release() here to set newu->addr
1383 * is enough to make those stores, as well as stores
1384 * to newu->path visible to anyone who gets newu->addr
1385 * by smp_load_acquire(). IOW, the same warranties
1386 * as for unix_sock instances bound in unix_bind() or
1387 * in unix_autobind().
1389 if (otheru->path.dentry) {
1390 path_get(&otheru->path);
1391 newu->path = otheru->path;
1393 refcount_inc(&otheru->addr->refcnt);
1394 smp_store_release(&newu->addr, otheru->addr);
1396 /* Set credentials */
1397 copy_peercred(sk, other);
1399 sock->state = SS_CONNECTED;
1400 sk->sk_state = TCP_ESTABLISHED;
1403 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1404 unix_peer(sk) = newsk;
1406 unix_state_unlock(sk);
1408 /* take ten and and send info to listening sock */
1409 spin_lock(&other->sk_receive_queue.lock);
1410 __skb_queue_tail(&other->sk_receive_queue, skb);
1411 spin_unlock(&other->sk_receive_queue.lock);
1412 unix_state_unlock(other);
1413 other->sk_data_ready(other);
1419 unix_state_unlock(other);
1424 unix_release_sock(newsk, 0);
1430 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1432 struct sock *ska = socka->sk, *skb = sockb->sk;
1434 /* Join our sockets back to back */
1437 unix_peer(ska) = skb;
1438 unix_peer(skb) = ska;
1442 if (ska->sk_type != SOCK_DGRAM) {
1443 ska->sk_state = TCP_ESTABLISHED;
1444 skb->sk_state = TCP_ESTABLISHED;
1445 socka->state = SS_CONNECTED;
1446 sockb->state = SS_CONNECTED;
1451 static void unix_sock_inherit_flags(const struct socket *old,
1454 if (test_bit(SOCK_PASSCRED, &old->flags))
1455 set_bit(SOCK_PASSCRED, &new->flags);
1456 if (test_bit(SOCK_PASSSEC, &old->flags))
1457 set_bit(SOCK_PASSSEC, &new->flags);
1460 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1463 struct sock *sk = sock->sk;
1465 struct sk_buff *skb;
1469 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1473 if (sk->sk_state != TCP_LISTEN)
1476 /* If socket state is TCP_LISTEN it cannot change (for now...),
1477 * so that no locks are necessary.
1480 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1482 /* This means receive shutdown. */
1489 skb_free_datagram(sk, skb);
1490 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1492 /* attach accepted sock to socket */
1493 unix_state_lock(tsk);
1494 newsock->state = SS_CONNECTED;
1495 unix_sock_inherit_flags(sock, newsock);
1496 sock_graft(tsk, newsock);
1497 unix_state_unlock(tsk);
1505 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1507 struct sock *sk = sock->sk;
1508 struct unix_address *addr;
1509 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1513 sk = unix_peer_get(sk);
1523 addr = smp_load_acquire(&unix_sk(sk)->addr);
1525 sunaddr->sun_family = AF_UNIX;
1526 sunaddr->sun_path[0] = 0;
1527 err = sizeof(short);
1530 memcpy(sunaddr, addr->name, addr->len);
1537 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1539 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1542 * Garbage collection of unix sockets starts by selecting a set of
1543 * candidate sockets which have reference only from being in flight
1544 * (total_refs == inflight_refs). This condition is checked once during
1545 * the candidate collection phase, and candidates are marked as such, so
1546 * that non-candidates can later be ignored. While inflight_refs is
1547 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1548 * is an instantaneous decision.
1550 * Once a candidate, however, the socket must not be reinstalled into a
1551 * file descriptor while the garbage collection is in progress.
1553 * If the above conditions are met, then the directed graph of
1554 * candidates (*) does not change while unix_gc_lock is held.
1556 * Any operations that changes the file count through file descriptors
1557 * (dup, close, sendmsg) does not change the graph since candidates are
1558 * not installed in fds.
1560 * Dequeing a candidate via recvmsg would install it into an fd, but
1561 * that takes unix_gc_lock to decrement the inflight count, so it's
1562 * serialized with garbage collection.
1564 * MSG_PEEK is special in that it does not change the inflight count,
1565 * yet does install the socket into an fd. The following lock/unlock
1566 * pair is to ensure serialization with garbage collection. It must be
1567 * done between incrementing the file count and installing the file into
1570 * If garbage collection starts after the barrier provided by the
1571 * lock/unlock, then it will see the elevated refcount and not mark this
1572 * as a candidate. If a garbage collection is already in progress
1573 * before the file count was incremented, then the lock/unlock pair will
1574 * ensure that garbage collection is finished before progressing to
1575 * installing the fd.
1577 * (*) A -> B where B is on the queue of A or B is on the queue of C
1578 * which is on the queue of listening socket A.
1580 spin_lock(&unix_gc_lock);
1581 spin_unlock(&unix_gc_lock);
1584 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1588 UNIXCB(skb).pid = get_pid(scm->pid);
1589 UNIXCB(skb).uid = scm->creds.uid;
1590 UNIXCB(skb).gid = scm->creds.gid;
1591 UNIXCB(skb).fp = NULL;
1592 unix_get_secdata(scm, skb);
1593 if (scm->fp && send_fds)
1594 err = unix_attach_fds(scm, skb);
1596 skb->destructor = unix_destruct_scm;
1600 static bool unix_passcred_enabled(const struct socket *sock,
1601 const struct sock *other)
1603 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1604 !other->sk_socket ||
1605 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1609 * Some apps rely on write() giving SCM_CREDENTIALS
1610 * We include credentials if source or destination socket
1611 * asserted SOCK_PASSCRED.
1613 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1614 const struct sock *other)
1616 if (UNIXCB(skb).pid)
1618 if (unix_passcred_enabled(sock, other)) {
1619 UNIXCB(skb).pid = get_pid(task_tgid(current));
1620 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1624 static int maybe_init_creds(struct scm_cookie *scm,
1625 struct socket *socket,
1626 const struct sock *other)
1629 struct msghdr msg = { .msg_controllen = 0 };
1631 err = scm_send(socket, &msg, scm, false);
1635 if (unix_passcred_enabled(socket, other)) {
1636 scm->pid = get_pid(task_tgid(current));
1637 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1642 static bool unix_skb_scm_eq(struct sk_buff *skb,
1643 struct scm_cookie *scm)
1645 const struct unix_skb_parms *u = &UNIXCB(skb);
1647 return u->pid == scm->pid &&
1648 uid_eq(u->uid, scm->creds.uid) &&
1649 gid_eq(u->gid, scm->creds.gid) &&
1650 unix_secdata_eq(scm, skb);
1654 * Send AF_UNIX data.
1657 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1660 struct sock *sk = sock->sk;
1661 struct net *net = sock_net(sk);
1662 struct unix_sock *u = unix_sk(sk);
1663 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1664 struct sock *other = NULL;
1665 int namelen = 0; /* fake GCC */
1668 struct sk_buff *skb;
1670 struct scm_cookie scm;
1675 err = scm_send(sock, msg, &scm, false);
1680 if (msg->msg_flags&MSG_OOB)
1683 if (msg->msg_namelen) {
1684 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1691 other = unix_peer_get(sk);
1696 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1697 && (err = unix_autobind(sock)) != 0)
1701 if (len > sk->sk_sndbuf - 32)
1704 if (len > SKB_MAX_ALLOC) {
1705 data_len = min_t(size_t,
1706 len - SKB_MAX_ALLOC,
1707 MAX_SKB_FRAGS * PAGE_SIZE);
1708 data_len = PAGE_ALIGN(data_len);
1710 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1713 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1714 msg->msg_flags & MSG_DONTWAIT, &err,
1715 PAGE_ALLOC_COSTLY_ORDER);
1719 err = unix_scm_to_skb(&scm, skb, true);
1723 skb_put(skb, len - data_len);
1724 skb->data_len = data_len;
1726 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1730 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1735 if (sunaddr == NULL)
1738 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1744 if (sk_filter(other, skb) < 0) {
1745 /* Toss the packet but do not return any error to the sender */
1751 unix_state_lock(other);
1754 if (!unix_may_send(sk, other))
1757 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1759 * Check with 1003.1g - what should
1762 unix_state_unlock(other);
1766 unix_state_lock(sk);
1769 if (unix_peer(sk) == other) {
1770 unix_peer(sk) = NULL;
1771 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1773 unix_state_unlock(sk);
1775 unix_dgram_disconnected(sk, other);
1777 err = -ECONNREFUSED;
1779 unix_state_unlock(sk);
1789 if (other->sk_shutdown & RCV_SHUTDOWN)
1792 if (sk->sk_type != SOCK_SEQPACKET) {
1793 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1798 /* other == sk && unix_peer(other) != sk if
1799 * - unix_peer(sk) == NULL, destination address bound to sk
1800 * - unix_peer(sk) == sk by time of get but disconnected before lock
1803 unlikely(unix_peer(other) != sk &&
1804 unix_recvq_full_lockless(other))) {
1806 timeo = unix_wait_for_peer(other, timeo);
1808 err = sock_intr_errno(timeo);
1809 if (signal_pending(current))
1816 unix_state_unlock(other);
1817 unix_state_double_lock(sk, other);
1820 if (unix_peer(sk) != other ||
1821 unix_dgram_peer_wake_me(sk, other)) {
1829 goto restart_locked;
1833 if (unlikely(sk_locked))
1834 unix_state_unlock(sk);
1836 if (sock_flag(other, SOCK_RCVTSTAMP))
1837 __net_timestamp(skb);
1838 maybe_add_creds(skb, sock, other);
1839 skb_queue_tail(&other->sk_receive_queue, skb);
1840 unix_state_unlock(other);
1841 other->sk_data_ready(other);
1848 unix_state_unlock(sk);
1849 unix_state_unlock(other);
1859 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1860 * bytes, and a minimum of a full page.
1862 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1864 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1867 struct sock *sk = sock->sk;
1868 struct sock *other = NULL;
1870 struct sk_buff *skb;
1872 struct scm_cookie scm;
1873 bool fds_sent = false;
1877 err = scm_send(sock, msg, &scm, false);
1882 if (msg->msg_flags&MSG_OOB)
1885 if (msg->msg_namelen) {
1886 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1890 other = unix_peer(sk);
1895 if (sk->sk_shutdown & SEND_SHUTDOWN)
1898 while (sent < len) {
1901 /* Keep two messages in the pipe so it schedules better */
1902 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1904 /* allow fallback to order-0 allocations */
1905 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1907 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1909 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1911 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1912 msg->msg_flags & MSG_DONTWAIT, &err,
1913 get_order(UNIX_SKB_FRAGS_SZ));
1917 /* Only send the fds in the first buffer */
1918 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1925 skb_put(skb, size - data_len);
1926 skb->data_len = data_len;
1928 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1934 unix_state_lock(other);
1936 if (sock_flag(other, SOCK_DEAD) ||
1937 (other->sk_shutdown & RCV_SHUTDOWN))
1940 maybe_add_creds(skb, sock, other);
1941 skb_queue_tail(&other->sk_receive_queue, skb);
1942 unix_state_unlock(other);
1943 other->sk_data_ready(other);
1952 unix_state_unlock(other);
1955 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1956 send_sig(SIGPIPE, current, 0);
1960 return sent ? : err;
1963 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1964 int offset, size_t size, int flags)
1967 bool send_sigpipe = false;
1968 bool init_scm = true;
1969 struct scm_cookie scm;
1970 struct sock *other, *sk = socket->sk;
1971 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1973 if (flags & MSG_OOB)
1976 other = unix_peer(sk);
1977 if (!other || sk->sk_state != TCP_ESTABLISHED)
1982 unix_state_unlock(other);
1983 mutex_unlock(&unix_sk(other)->iolock);
1984 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1990 /* we must acquire iolock as we modify already present
1991 * skbs in the sk_receive_queue and mess with skb->len
1993 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1995 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1999 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2001 send_sigpipe = true;
2005 unix_state_lock(other);
2007 if (sock_flag(other, SOCK_DEAD) ||
2008 other->sk_shutdown & RCV_SHUTDOWN) {
2010 send_sigpipe = true;
2011 goto err_state_unlock;
2015 err = maybe_init_creds(&scm, socket, other);
2017 goto err_state_unlock;
2021 skb = skb_peek_tail(&other->sk_receive_queue);
2022 if (tail && tail == skb) {
2024 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2031 } else if (newskb) {
2032 /* this is fast path, we don't necessarily need to
2033 * call to kfree_skb even though with newskb == NULL
2034 * this - does no harm
2036 consume_skb(newskb);
2040 if (skb_append_pagefrags(skb, page, offset, size)) {
2046 skb->data_len += size;
2047 skb->truesize += size;
2048 refcount_add(size, &sk->sk_wmem_alloc);
2051 err = unix_scm_to_skb(&scm, skb, false);
2053 goto err_state_unlock;
2054 spin_lock(&other->sk_receive_queue.lock);
2055 __skb_queue_tail(&other->sk_receive_queue, newskb);
2056 spin_unlock(&other->sk_receive_queue.lock);
2059 unix_state_unlock(other);
2060 mutex_unlock(&unix_sk(other)->iolock);
2062 other->sk_data_ready(other);
2067 unix_state_unlock(other);
2069 mutex_unlock(&unix_sk(other)->iolock);
2072 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2073 send_sig(SIGPIPE, current, 0);
2079 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2083 struct sock *sk = sock->sk;
2085 err = sock_error(sk);
2089 if (sk->sk_state != TCP_ESTABLISHED)
2092 if (msg->msg_namelen)
2093 msg->msg_namelen = 0;
2095 return unix_dgram_sendmsg(sock, msg, len);
2098 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2099 size_t size, int flags)
2101 struct sock *sk = sock->sk;
2103 if (sk->sk_state != TCP_ESTABLISHED)
2106 return unix_dgram_recvmsg(sock, msg, size, flags);
2109 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2111 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2114 msg->msg_namelen = addr->len;
2115 memcpy(msg->msg_name, addr->name, addr->len);
2119 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2120 size_t size, int flags)
2122 struct scm_cookie scm;
2123 struct sock *sk = sock->sk;
2124 struct unix_sock *u = unix_sk(sk);
2125 struct sk_buff *skb, *last;
2134 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2137 mutex_lock(&u->iolock);
2139 skip = sk_peek_offset(sk, flags);
2140 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2145 mutex_unlock(&u->iolock);
2150 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2152 if (!skb) { /* implies iolock unlocked */
2153 unix_state_lock(sk);
2154 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2155 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2156 (sk->sk_shutdown & RCV_SHUTDOWN))
2158 unix_state_unlock(sk);
2162 if (wq_has_sleeper(&u->peer_wait))
2163 wake_up_interruptible_sync_poll(&u->peer_wait,
2164 EPOLLOUT | EPOLLWRNORM |
2168 unix_copy_addr(msg, skb->sk);
2170 if (size > skb->len - skip)
2171 size = skb->len - skip;
2172 else if (size < skb->len - skip)
2173 msg->msg_flags |= MSG_TRUNC;
2175 err = skb_copy_datagram_msg(skb, skip, msg, size);
2179 if (sock_flag(sk, SOCK_RCVTSTAMP))
2180 __sock_recv_timestamp(msg, sk, skb);
2182 memset(&scm, 0, sizeof(scm));
2184 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2185 unix_set_secdata(&scm, skb);
2187 if (!(flags & MSG_PEEK)) {
2189 unix_detach_fds(&scm, skb);
2191 sk_peek_offset_bwd(sk, skb->len);
2193 /* It is questionable: on PEEK we could:
2194 - do not return fds - good, but too simple 8)
2195 - return fds, and do not return them on read (old strategy,
2197 - clone fds (I chose it for now, it is the most universal
2200 POSIX 1003.1g does not actually define this clearly
2201 at all. POSIX 1003.1g doesn't define a lot of things
2206 sk_peek_offset_fwd(sk, size);
2209 unix_peek_fds(&scm, skb);
2211 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2213 scm_recv(sock, msg, &scm, flags);
2216 skb_free_datagram(sk, skb);
2217 mutex_unlock(&u->iolock);
2223 * Sleep until more data has arrived. But check for races..
2225 static long unix_stream_data_wait(struct sock *sk, long timeo,
2226 struct sk_buff *last, unsigned int last_len,
2229 struct sk_buff *tail;
2232 unix_state_lock(sk);
2235 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2237 tail = skb_peek_tail(&sk->sk_receive_queue);
2239 (tail && tail->len != last_len) ||
2241 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2242 signal_pending(current) ||
2246 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2247 unix_state_unlock(sk);
2249 timeo = freezable_schedule_timeout(timeo);
2251 timeo = schedule_timeout(timeo);
2252 unix_state_lock(sk);
2254 if (sock_flag(sk, SOCK_DEAD))
2257 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2260 finish_wait(sk_sleep(sk), &wait);
2261 unix_state_unlock(sk);
2265 static unsigned int unix_skb_len(const struct sk_buff *skb)
2267 return skb->len - UNIXCB(skb).consumed;
2270 struct unix_stream_read_state {
2271 int (*recv_actor)(struct sk_buff *, int, int,
2272 struct unix_stream_read_state *);
2273 struct socket *socket;
2275 struct pipe_inode_info *pipe;
2278 unsigned int splice_flags;
2281 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2284 struct scm_cookie scm;
2285 struct socket *sock = state->socket;
2286 struct sock *sk = sock->sk;
2287 struct unix_sock *u = unix_sk(sk);
2289 int flags = state->flags;
2290 int noblock = flags & MSG_DONTWAIT;
2291 bool check_creds = false;
2296 size_t size = state->size;
2297 unsigned int last_len;
2299 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2304 if (unlikely(flags & MSG_OOB)) {
2309 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2310 timeo = sock_rcvtimeo(sk, noblock);
2312 memset(&scm, 0, sizeof(scm));
2314 /* Lock the socket to prevent queue disordering
2315 * while sleeps in memcpy_tomsg
2317 mutex_lock(&u->iolock);
2319 skip = max(sk_peek_offset(sk, flags), 0);
2324 struct sk_buff *skb, *last;
2327 unix_state_lock(sk);
2328 if (sock_flag(sk, SOCK_DEAD)) {
2332 last = skb = skb_peek(&sk->sk_receive_queue);
2333 last_len = last ? last->len : 0;
2336 if (copied >= target)
2340 * POSIX 1003.1g mandates this order.
2343 err = sock_error(sk);
2346 if (sk->sk_shutdown & RCV_SHUTDOWN)
2349 unix_state_unlock(sk);
2355 mutex_unlock(&u->iolock);
2357 timeo = unix_stream_data_wait(sk, timeo, last,
2358 last_len, freezable);
2360 if (signal_pending(current)) {
2361 err = sock_intr_errno(timeo);
2366 mutex_lock(&u->iolock);
2369 unix_state_unlock(sk);
2373 while (skip >= unix_skb_len(skb)) {
2374 skip -= unix_skb_len(skb);
2376 last_len = skb->len;
2377 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2382 unix_state_unlock(sk);
2385 /* Never glue messages from different writers */
2386 if (!unix_skb_scm_eq(skb, &scm))
2388 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2389 /* Copy credentials */
2390 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2391 unix_set_secdata(&scm, skb);
2395 /* Copy address just once */
2396 if (state->msg && state->msg->msg_name) {
2397 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2398 state->msg->msg_name);
2399 unix_copy_addr(state->msg, skb->sk);
2403 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2405 chunk = state->recv_actor(skb, skip, chunk, state);
2406 drop_skb = !unix_skb_len(skb);
2407 /* skb is only safe to use if !drop_skb */
2418 /* the skb was touched by a concurrent reader;
2419 * we should not expect anything from this skb
2420 * anymore and assume it invalid - we can be
2421 * sure it was dropped from the socket queue
2423 * let's report a short read
2429 /* Mark read part of skb as used */
2430 if (!(flags & MSG_PEEK)) {
2431 UNIXCB(skb).consumed += chunk;
2433 sk_peek_offset_bwd(sk, chunk);
2436 unix_detach_fds(&scm, skb);
2438 if (unix_skb_len(skb))
2441 skb_unlink(skb, &sk->sk_receive_queue);
2447 /* It is questionable, see note in unix_dgram_recvmsg.
2450 unix_peek_fds(&scm, skb);
2452 sk_peek_offset_fwd(sk, chunk);
2459 last_len = skb->len;
2460 unix_state_lock(sk);
2461 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2464 unix_state_unlock(sk);
2469 mutex_unlock(&u->iolock);
2471 scm_recv(sock, state->msg, &scm, flags);
2475 return copied ? : err;
2478 static int unix_stream_read_actor(struct sk_buff *skb,
2479 int skip, int chunk,
2480 struct unix_stream_read_state *state)
2484 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2486 return ret ?: chunk;
2489 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2490 size_t size, int flags)
2492 struct unix_stream_read_state state = {
2493 .recv_actor = unix_stream_read_actor,
2500 return unix_stream_read_generic(&state, true);
2503 static int unix_stream_splice_actor(struct sk_buff *skb,
2504 int skip, int chunk,
2505 struct unix_stream_read_state *state)
2507 return skb_splice_bits(skb, state->socket->sk,
2508 UNIXCB(skb).consumed + skip,
2509 state->pipe, chunk, state->splice_flags);
2512 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2513 struct pipe_inode_info *pipe,
2514 size_t size, unsigned int flags)
2516 struct unix_stream_read_state state = {
2517 .recv_actor = unix_stream_splice_actor,
2521 .splice_flags = flags,
2524 if (unlikely(*ppos))
2527 if (sock->file->f_flags & O_NONBLOCK ||
2528 flags & SPLICE_F_NONBLOCK)
2529 state.flags = MSG_DONTWAIT;
2531 return unix_stream_read_generic(&state, false);
2534 static int unix_shutdown(struct socket *sock, int mode)
2536 struct sock *sk = sock->sk;
2539 if (mode < SHUT_RD || mode > SHUT_RDWR)
2542 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2543 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2544 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2548 unix_state_lock(sk);
2549 sk->sk_shutdown |= mode;
2550 other = unix_peer(sk);
2553 unix_state_unlock(sk);
2554 sk->sk_state_change(sk);
2557 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2561 if (mode&RCV_SHUTDOWN)
2562 peer_mode |= SEND_SHUTDOWN;
2563 if (mode&SEND_SHUTDOWN)
2564 peer_mode |= RCV_SHUTDOWN;
2565 unix_state_lock(other);
2566 other->sk_shutdown |= peer_mode;
2567 unix_state_unlock(other);
2568 other->sk_state_change(other);
2569 if (peer_mode == SHUTDOWN_MASK)
2570 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2571 else if (peer_mode & RCV_SHUTDOWN)
2572 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2580 long unix_inq_len(struct sock *sk)
2582 struct sk_buff *skb;
2585 if (sk->sk_state == TCP_LISTEN)
2588 spin_lock(&sk->sk_receive_queue.lock);
2589 if (sk->sk_type == SOCK_STREAM ||
2590 sk->sk_type == SOCK_SEQPACKET) {
2591 skb_queue_walk(&sk->sk_receive_queue, skb)
2592 amount += unix_skb_len(skb);
2594 skb = skb_peek(&sk->sk_receive_queue);
2598 spin_unlock(&sk->sk_receive_queue.lock);
2602 EXPORT_SYMBOL_GPL(unix_inq_len);
2604 long unix_outq_len(struct sock *sk)
2606 return sk_wmem_alloc_get(sk);
2608 EXPORT_SYMBOL_GPL(unix_outq_len);
2610 static int unix_open_file(struct sock *sk)
2616 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2619 if (!smp_load_acquire(&unix_sk(sk)->addr))
2622 path = unix_sk(sk)->path;
2628 fd = get_unused_fd_flags(O_CLOEXEC);
2632 f = dentry_open(&path, O_PATH, current_cred());
2646 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2648 struct sock *sk = sock->sk;
2654 amount = unix_outq_len(sk);
2655 err = put_user(amount, (int __user *)arg);
2658 amount = unix_inq_len(sk);
2662 err = put_user(amount, (int __user *)arg);
2665 err = unix_open_file(sk);
2674 #ifdef CONFIG_COMPAT
2675 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2677 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2681 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2683 struct sock *sk = sock->sk;
2686 sock_poll_wait(file, sock, wait);
2689 /* exceptional events? */
2692 if (sk->sk_shutdown == SHUTDOWN_MASK)
2694 if (sk->sk_shutdown & RCV_SHUTDOWN)
2695 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2698 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2699 mask |= EPOLLIN | EPOLLRDNORM;
2701 /* Connection-based need to check for termination and startup */
2702 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2703 sk->sk_state == TCP_CLOSE)
2707 * we set writable also when the other side has shut down the
2708 * connection. This prevents stuck sockets.
2710 if (unix_writable(sk))
2711 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2716 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2719 struct sock *sk = sock->sk, *other;
2720 unsigned int writable;
2723 sock_poll_wait(file, sock, wait);
2726 /* exceptional events? */
2727 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2729 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2731 if (sk->sk_shutdown & RCV_SHUTDOWN)
2732 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2733 if (sk->sk_shutdown == SHUTDOWN_MASK)
2737 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2738 mask |= EPOLLIN | EPOLLRDNORM;
2740 /* Connection-based need to check for termination and startup */
2741 if (sk->sk_type == SOCK_SEQPACKET) {
2742 if (sk->sk_state == TCP_CLOSE)
2744 /* connection hasn't started yet? */
2745 if (sk->sk_state == TCP_SYN_SENT)
2749 /* No write status requested, avoid expensive OUT tests. */
2750 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2753 writable = unix_writable(sk);
2755 unix_state_lock(sk);
2757 other = unix_peer(sk);
2758 if (other && unix_peer(other) != sk &&
2759 unix_recvq_full_lockless(other) &&
2760 unix_dgram_peer_wake_me(sk, other))
2763 unix_state_unlock(sk);
2767 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2769 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2774 #ifdef CONFIG_PROC_FS
2776 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2778 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2779 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2780 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2782 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2784 unsigned long offset = get_offset(*pos);
2785 unsigned long bucket = get_bucket(*pos);
2787 unsigned long count = 0;
2789 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2790 if (sock_net(sk) != seq_file_net(seq))
2792 if (++count == offset)
2799 static struct sock *unix_next_socket(struct seq_file *seq,
2803 unsigned long bucket;
2805 while (sk > (struct sock *)SEQ_START_TOKEN) {
2809 if (sock_net(sk) == seq_file_net(seq))
2814 sk = unix_from_bucket(seq, pos);
2819 bucket = get_bucket(*pos) + 1;
2820 *pos = set_bucket_offset(bucket, 1);
2821 } while (bucket < ARRAY_SIZE(unix_socket_table));
2826 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2827 __acquires(unix_table_lock)
2829 spin_lock(&unix_table_lock);
2832 return SEQ_START_TOKEN;
2834 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2837 return unix_next_socket(seq, NULL, pos);
2840 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2843 return unix_next_socket(seq, v, pos);
2846 static void unix_seq_stop(struct seq_file *seq, void *v)
2847 __releases(unix_table_lock)
2849 spin_unlock(&unix_table_lock);
2852 static int unix_seq_show(struct seq_file *seq, void *v)
2855 if (v == SEQ_START_TOKEN)
2856 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2860 struct unix_sock *u = unix_sk(s);
2863 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2865 refcount_read(&s->sk_refcnt),
2867 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2870 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2871 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2874 if (u->addr) { // under unix_table_lock here
2879 len = u->addr->len - sizeof(short);
2880 if (!UNIX_ABSTRACT(s))
2886 for ( ; i < len; i++)
2887 seq_putc(seq, u->addr->name->sun_path[i] ?:
2890 unix_state_unlock(s);
2891 seq_putc(seq, '\n');
2897 static const struct seq_operations unix_seq_ops = {
2898 .start = unix_seq_start,
2899 .next = unix_seq_next,
2900 .stop = unix_seq_stop,
2901 .show = unix_seq_show,
2905 static const struct net_proto_family unix_family_ops = {
2907 .create = unix_create,
2908 .owner = THIS_MODULE,
2912 static int __net_init unix_net_init(struct net *net)
2914 int error = -ENOMEM;
2916 net->unx.sysctl_max_dgram_qlen = 10;
2917 if (unix_sysctl_register(net))
2920 #ifdef CONFIG_PROC_FS
2921 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2922 sizeof(struct seq_net_private))) {
2923 unix_sysctl_unregister(net);
2932 static void __net_exit unix_net_exit(struct net *net)
2934 unix_sysctl_unregister(net);
2935 remove_proc_entry("unix", net->proc_net);
2938 static struct pernet_operations unix_net_ops = {
2939 .init = unix_net_init,
2940 .exit = unix_net_exit,
2943 static int __init af_unix_init(void)
2947 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2949 rc = proto_register(&unix_proto, 1);
2951 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2955 sock_register(&unix_family_ops);
2956 register_pernet_subsys(&unix_net_ops);
2961 static void __exit af_unix_exit(void)
2963 sock_unregister(PF_UNIX);
2964 proto_unregister(&unix_proto);
2965 unregister_pernet_subsys(&unix_net_ops);
2968 /* Earlier than device_initcall() so that other drivers invoking
2969 request_module() don't end up in a loop when modprobe tries
2970 to use a UNIX socket. But later than subsys_initcall() because
2971 we depend on stuff initialised there */
2972 fs_initcall(af_unix_init);
2973 module_exit(af_unix_exit);
2975 MODULE_LICENSE("GPL");
2976 MODULE_ALIAS_NETPROTO(PF_UNIX);