2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
53 * Known differences from reference BSD that was tested:
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
123 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
124 EXPORT_SYMBOL_GPL(unix_socket_table);
125 DEFINE_SPINLOCK(unix_table_lock);
126 EXPORT_SYMBOL_GPL(unix_table_lock);
127 static atomic_long_t unix_nr_socks;
130 static struct hlist_head *unix_sockets_unbound(void *addr)
132 unsigned long hash = (unsigned long)addr;
136 hash %= UNIX_HASH_SIZE;
137 return &unix_socket_table[UNIX_HASH_SIZE + hash];
140 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
142 #ifdef CONFIG_SECURITY_NETWORK
143 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 UNIXCB(skb).secid = scm->secid;
148 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
150 scm->secid = UNIXCB(skb).secid;
153 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
155 return (scm->secid == UNIXCB(skb).secid);
158 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
161 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
164 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
168 #endif /* CONFIG_SECURITY_NETWORK */
171 * SMP locking strategy:
172 * hash table is protected with spinlock unix_table_lock
173 * each socket state is protected by separate spin lock.
176 static inline unsigned int unix_hash_fold(__wsum n)
178 unsigned int hash = (__force unsigned int)csum_fold(n);
181 return hash&(UNIX_HASH_SIZE-1);
184 #define unix_peer(sk) (unix_sk(sk)->peer)
186 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
188 return unix_peer(osk) == sk;
191 static inline int unix_may_send(struct sock *sk, struct sock *osk)
193 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
196 static inline int unix_recvq_full(const struct sock *sk)
198 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
201 static inline int unix_recvq_full_lockless(const struct sock *sk)
203 return skb_queue_len_lockless(&sk->sk_receive_queue) >
204 READ_ONCE(sk->sk_max_ack_backlog);
207 struct sock *unix_peer_get(struct sock *s)
215 unix_state_unlock(s);
218 EXPORT_SYMBOL_GPL(unix_peer_get);
220 static inline void unix_release_addr(struct unix_address *addr)
222 if (atomic_dec_and_test(&addr->refcnt))
227 * Check unix socket name:
228 * - should be not zero length.
229 * - if started by not zero, should be NULL terminated (FS object)
230 * - if started by zero, it is abstract name.
233 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
237 if (len <= sizeof(short) || len > sizeof(*sunaddr))
239 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
241 if (sunaddr->sun_path[0]) {
243 * This may look like an off by one error but it is a bit more
244 * subtle. 108 is the longest valid AF_UNIX path for a binding.
245 * sun_path[108] doesn't as such exist. However in kernel space
246 * we are guaranteed that it is a valid memory location in our
247 * kernel address buffer.
249 ((char *)sunaddr)[len] = 0;
250 len = strlen(sunaddr->sun_path)+1+sizeof(short);
254 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
258 static void __unix_remove_socket(struct sock *sk)
260 sk_del_node_init(sk);
263 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
265 WARN_ON(!sk_unhashed(sk));
266 sk_add_node(sk, list);
269 static inline void unix_remove_socket(struct sock *sk)
271 spin_lock(&unix_table_lock);
272 __unix_remove_socket(sk);
273 spin_unlock(&unix_table_lock);
276 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
278 spin_lock(&unix_table_lock);
279 __unix_insert_socket(list, sk);
280 spin_unlock(&unix_table_lock);
283 static struct sock *__unix_find_socket_byname(struct net *net,
284 struct sockaddr_un *sunname,
285 int len, int type, unsigned int hash)
289 sk_for_each(s, &unix_socket_table[hash ^ type]) {
290 struct unix_sock *u = unix_sk(s);
292 if (!net_eq(sock_net(s), net))
295 if (u->addr->len == len &&
296 !memcmp(u->addr->name, sunname, len))
304 static inline struct sock *unix_find_socket_byname(struct net *net,
305 struct sockaddr_un *sunname,
311 spin_lock(&unix_table_lock);
312 s = __unix_find_socket_byname(net, sunname, len, type, hash);
315 spin_unlock(&unix_table_lock);
319 static struct sock *unix_find_socket_byinode(struct inode *i)
323 spin_lock(&unix_table_lock);
325 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
326 struct dentry *dentry = unix_sk(s)->path.dentry;
328 if (dentry && d_real_inode(dentry) == i) {
335 spin_unlock(&unix_table_lock);
339 /* Support code for asymmetrically connected dgram sockets
341 * If a datagram socket is connected to a socket not itself connected
342 * to the first socket (eg, /dev/log), clients may only enqueue more
343 * messages if the present receive queue of the server socket is not
344 * "too large". This means there's a second writeability condition
345 * poll and sendmsg need to test. The dgram recv code will do a wake
346 * up on the peer_wait wait queue of a socket upon reception of a
347 * datagram which needs to be propagated to sleeping would-be writers
348 * since these might not have sent anything so far. This can't be
349 * accomplished via poll_wait because the lifetime of the server
350 * socket might be less than that of its clients if these break their
351 * association with it or if the server socket is closed while clients
352 * are still connected to it and there's no way to inform "a polling
353 * implementation" that it should let go of a certain wait queue
355 * In order to propagate a wake up, a wait_queue_t of the client
356 * socket is enqueued on the peer_wait queue of the server socket
357 * whose wake function does a wake_up on the ordinary client socket
358 * wait queue. This connection is established whenever a write (or
359 * poll for write) hit the flow control condition and broken when the
360 * association to the server socket is dissolved or after a wake up
364 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
368 wait_queue_head_t *u_sleep;
370 u = container_of(q, struct unix_sock, peer_wake);
372 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
374 u->peer_wake.private = NULL;
376 /* relaying can only happen while the wq still exists */
377 u_sleep = sk_sleep(&u->sk);
379 wake_up_interruptible_poll(u_sleep, key);
384 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
386 struct unix_sock *u, *u_other;
390 u_other = unix_sk(other);
392 spin_lock(&u_other->peer_wait.lock);
394 if (!u->peer_wake.private) {
395 u->peer_wake.private = other;
396 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
401 spin_unlock(&u_other->peer_wait.lock);
405 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
408 struct unix_sock *u, *u_other;
411 u_other = unix_sk(other);
412 spin_lock(&u_other->peer_wait.lock);
414 if (u->peer_wake.private == other) {
415 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
416 u->peer_wake.private = NULL;
419 spin_unlock(&u_other->peer_wait.lock);
422 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
425 unix_dgram_peer_wake_disconnect(sk, other);
426 wake_up_interruptible_poll(sk_sleep(sk),
433 * - unix_peer(sk) == other
434 * - association is stable
436 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
440 connected = unix_dgram_peer_wake_connect(sk, other);
442 if (unix_recvq_full(other))
446 unix_dgram_peer_wake_disconnect(sk, other);
451 static int unix_writable(const struct sock *sk)
453 return sk->sk_state != TCP_LISTEN &&
454 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
457 static void unix_write_space(struct sock *sk)
459 struct socket_wq *wq;
462 if (unix_writable(sk)) {
463 wq = rcu_dereference(sk->sk_wq);
464 if (skwq_has_sleeper(wq))
465 wake_up_interruptible_sync_poll(&wq->wait,
466 POLLOUT | POLLWRNORM | POLLWRBAND);
467 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
472 /* When dgram socket disconnects (or changes its peer), we clear its receive
473 * queue of packets arrived from previous peer. First, it allows to do
474 * flow control based only on wmem_alloc; second, sk connected to peer
475 * may receive messages only from that peer. */
476 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
478 if (!skb_queue_empty(&sk->sk_receive_queue)) {
479 skb_queue_purge(&sk->sk_receive_queue);
480 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
482 /* If one link of bidirectional dgram pipe is disconnected,
483 * we signal error. Messages are lost. Do not make this,
484 * when peer was not connected to us.
486 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
487 other->sk_err = ECONNRESET;
488 other->sk_error_report(other);
493 static void unix_sock_destructor(struct sock *sk)
495 struct unix_sock *u = unix_sk(sk);
497 skb_queue_purge(&sk->sk_receive_queue);
499 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
500 WARN_ON(!sk_unhashed(sk));
501 WARN_ON(sk->sk_socket);
502 if (!sock_flag(sk, SOCK_DEAD)) {
503 pr_info("Attempt to release alive unix socket: %p\n", sk);
508 unix_release_addr(u->addr);
510 atomic_long_dec(&unix_nr_socks);
512 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
514 #ifdef UNIX_REFCNT_DEBUG
515 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
516 atomic_long_read(&unix_nr_socks));
520 static void unix_release_sock(struct sock *sk, int embrion)
522 struct unix_sock *u = unix_sk(sk);
528 unix_remove_socket(sk);
533 sk->sk_shutdown = SHUTDOWN_MASK;
535 u->path.dentry = NULL;
537 state = sk->sk_state;
538 sk->sk_state = TCP_CLOSE;
540 skpair = unix_peer(sk);
541 unix_peer(sk) = NULL;
543 unix_state_unlock(sk);
545 wake_up_interruptible_all(&u->peer_wait);
547 if (skpair != NULL) {
548 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
549 unix_state_lock(skpair);
551 skpair->sk_shutdown = SHUTDOWN_MASK;
552 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
553 skpair->sk_err = ECONNRESET;
554 unix_state_unlock(skpair);
555 skpair->sk_state_change(skpair);
556 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
559 unix_dgram_peer_wake_disconnect(sk, skpair);
560 sock_put(skpair); /* It may now die */
563 /* Try to flush out this socket. Throw out buffers at least */
565 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
566 if (state == TCP_LISTEN)
567 unix_release_sock(skb->sk, 1);
568 /* passed fds are erased in the kfree_skb hook */
569 UNIXCB(skb).consumed = skb->len;
578 /* ---- Socket is dead now and most probably destroyed ---- */
581 * Fixme: BSD difference: In BSD all sockets connected to us get
582 * ECONNRESET and we die on the spot. In Linux we behave
583 * like files and pipes do and wait for the last
586 * Can't we simply set sock->err?
588 * What the above comment does talk about? --ANK(980817)
591 if (unix_tot_inflight)
592 unix_gc(); /* Garbage collect fds */
595 static void init_peercred(struct sock *sk)
597 const struct cred *old_cred;
600 spin_lock(&sk->sk_peer_lock);
601 old_pid = sk->sk_peer_pid;
602 old_cred = sk->sk_peer_cred;
603 sk->sk_peer_pid = get_pid(task_tgid(current));
604 sk->sk_peer_cred = get_current_cred();
605 spin_unlock(&sk->sk_peer_lock);
611 static void copy_peercred(struct sock *sk, struct sock *peersk)
613 const struct cred *old_cred;
617 spin_lock(&sk->sk_peer_lock);
618 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
620 spin_lock(&peersk->sk_peer_lock);
621 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
623 old_pid = sk->sk_peer_pid;
624 old_cred = sk->sk_peer_cred;
625 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
626 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
628 spin_unlock(&sk->sk_peer_lock);
629 spin_unlock(&peersk->sk_peer_lock);
635 static int unix_listen(struct socket *sock, int backlog)
638 struct sock *sk = sock->sk;
639 struct unix_sock *u = unix_sk(sk);
640 struct pid *old_pid = NULL;
643 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
644 goto out; /* Only stream/seqpacket sockets accept */
647 goto out; /* No listens on an unbound socket */
649 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
651 if (backlog > sk->sk_max_ack_backlog)
652 wake_up_interruptible_all(&u->peer_wait);
653 sk->sk_max_ack_backlog = backlog;
654 sk->sk_state = TCP_LISTEN;
655 /* set credentials so connect can copy them */
660 unix_state_unlock(sk);
666 static int unix_release(struct socket *);
667 static int unix_bind(struct socket *, struct sockaddr *, int);
668 static int unix_stream_connect(struct socket *, struct sockaddr *,
669 int addr_len, int flags);
670 static int unix_socketpair(struct socket *, struct socket *);
671 static int unix_accept(struct socket *, struct socket *, int);
672 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
673 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
674 static unsigned int unix_dgram_poll(struct file *, struct socket *,
676 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
677 static int unix_shutdown(struct socket *, int);
678 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
679 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
680 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
681 size_t size, int flags);
682 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
683 struct pipe_inode_info *, size_t size,
685 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
686 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
687 static int unix_dgram_connect(struct socket *, struct sockaddr *,
689 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
690 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
693 static int unix_set_peek_off(struct sock *sk, int val)
695 struct unix_sock *u = unix_sk(sk);
697 if (mutex_lock_interruptible(&u->iolock))
700 sk->sk_peek_off = val;
701 mutex_unlock(&u->iolock);
707 static const struct proto_ops unix_stream_ops = {
709 .owner = THIS_MODULE,
710 .release = unix_release,
712 .connect = unix_stream_connect,
713 .socketpair = unix_socketpair,
714 .accept = unix_accept,
715 .getname = unix_getname,
718 .listen = unix_listen,
719 .shutdown = unix_shutdown,
720 .setsockopt = sock_no_setsockopt,
721 .getsockopt = sock_no_getsockopt,
722 .sendmsg = unix_stream_sendmsg,
723 .recvmsg = unix_stream_recvmsg,
724 .mmap = sock_no_mmap,
725 .sendpage = unix_stream_sendpage,
726 .splice_read = unix_stream_splice_read,
727 .set_peek_off = unix_set_peek_off,
730 static const struct proto_ops unix_dgram_ops = {
732 .owner = THIS_MODULE,
733 .release = unix_release,
735 .connect = unix_dgram_connect,
736 .socketpair = unix_socketpair,
737 .accept = sock_no_accept,
738 .getname = unix_getname,
739 .poll = unix_dgram_poll,
741 .listen = sock_no_listen,
742 .shutdown = unix_shutdown,
743 .setsockopt = sock_no_setsockopt,
744 .getsockopt = sock_no_getsockopt,
745 .sendmsg = unix_dgram_sendmsg,
746 .recvmsg = unix_dgram_recvmsg,
747 .mmap = sock_no_mmap,
748 .sendpage = sock_no_sendpage,
749 .set_peek_off = unix_set_peek_off,
752 static const struct proto_ops unix_seqpacket_ops = {
754 .owner = THIS_MODULE,
755 .release = unix_release,
757 .connect = unix_stream_connect,
758 .socketpair = unix_socketpair,
759 .accept = unix_accept,
760 .getname = unix_getname,
761 .poll = unix_dgram_poll,
763 .listen = unix_listen,
764 .shutdown = unix_shutdown,
765 .setsockopt = sock_no_setsockopt,
766 .getsockopt = sock_no_getsockopt,
767 .sendmsg = unix_seqpacket_sendmsg,
768 .recvmsg = unix_seqpacket_recvmsg,
769 .mmap = sock_no_mmap,
770 .sendpage = sock_no_sendpage,
771 .set_peek_off = unix_set_peek_off,
774 static struct proto unix_proto = {
776 .owner = THIS_MODULE,
777 .obj_size = sizeof(struct unix_sock),
781 * AF_UNIX sockets do not interact with hardware, hence they
782 * dont trigger interrupts - so it's safe for them to have
783 * bh-unsafe locking for their sk_receive_queue.lock. Split off
784 * this special lock-class by reinitializing the spinlock key:
786 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
788 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
790 struct sock *sk = NULL;
793 atomic_long_inc(&unix_nr_socks);
794 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
797 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
801 sock_init_data(sock, sk);
802 lockdep_set_class(&sk->sk_receive_queue.lock,
803 &af_unix_sk_receive_queue_lock_key);
805 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
806 sk->sk_write_space = unix_write_space;
807 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
808 sk->sk_destruct = unix_sock_destructor;
810 u->path.dentry = NULL;
812 spin_lock_init(&u->lock);
813 atomic_long_set(&u->inflight, 0);
814 INIT_LIST_HEAD(&u->link);
815 mutex_init(&u->iolock); /* single task reading lock */
816 mutex_init(&u->bindlock); /* single task binding lock */
817 init_waitqueue_head(&u->peer_wait);
818 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
819 unix_insert_socket(unix_sockets_unbound(sk), sk);
822 atomic_long_dec(&unix_nr_socks);
825 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
831 static int unix_create(struct net *net, struct socket *sock, int protocol,
834 if (protocol && protocol != PF_UNIX)
835 return -EPROTONOSUPPORT;
837 sock->state = SS_UNCONNECTED;
839 switch (sock->type) {
841 sock->ops = &unix_stream_ops;
844 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
848 sock->type = SOCK_DGRAM;
850 sock->ops = &unix_dgram_ops;
853 sock->ops = &unix_seqpacket_ops;
856 return -ESOCKTNOSUPPORT;
859 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
862 static int unix_release(struct socket *sock)
864 struct sock *sk = sock->sk;
869 unix_release_sock(sk, 0);
875 static int unix_autobind(struct socket *sock)
877 struct sock *sk = sock->sk;
878 struct net *net = sock_net(sk);
879 struct unix_sock *u = unix_sk(sk);
880 static u32 ordernum = 1;
881 struct unix_address *addr;
883 unsigned int retries = 0;
885 err = mutex_lock_interruptible(&u->bindlock);
894 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
898 addr->name->sun_family = AF_UNIX;
899 atomic_set(&addr->refcnt, 1);
902 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
903 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
905 spin_lock(&unix_table_lock);
906 ordernum = (ordernum+1)&0xFFFFF;
908 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
910 spin_unlock(&unix_table_lock);
912 * __unix_find_socket_byname() may take long time if many names
913 * are already in use.
916 /* Give up if all names seems to be in use. */
917 if (retries++ == 0xFFFFF) {
924 addr->hash ^= sk->sk_type;
926 __unix_remove_socket(sk);
927 smp_store_release(&u->addr, addr);
928 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
929 spin_unlock(&unix_table_lock);
932 out: mutex_unlock(&u->bindlock);
936 static struct sock *unix_find_other(struct net *net,
937 struct sockaddr_un *sunname, int len,
938 int type, unsigned int hash, int *error)
944 if (sunname->sun_path[0]) {
946 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
949 inode = d_real_inode(path.dentry);
950 err = inode_permission(inode, MAY_WRITE);
955 if (!S_ISSOCK(inode->i_mode))
957 u = unix_find_socket_byinode(inode);
961 if (u->sk_type == type)
967 if (u->sk_type != type) {
973 u = unix_find_socket_byname(net, sunname, len, type, hash);
975 struct dentry *dentry;
976 dentry = unix_sk(u)->path.dentry;
978 touch_atime(&unix_sk(u)->path);
991 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
993 struct dentry *dentry;
997 * Get the parent directory, calculate the hash for last
1000 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1001 err = PTR_ERR(dentry);
1006 * All right, let's create it.
1008 err = security_path_mknod(&path, dentry, mode, 0);
1010 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1012 res->mnt = mntget(path.mnt);
1013 res->dentry = dget(dentry);
1016 done_path_create(&path, dentry);
1020 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1022 struct sock *sk = sock->sk;
1023 struct net *net = sock_net(sk);
1024 struct unix_sock *u = unix_sk(sk);
1025 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1026 char *sun_path = sunaddr->sun_path;
1029 struct unix_address *addr;
1030 struct hlist_head *list;
1031 struct path path = { NULL, NULL };
1034 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1035 sunaddr->sun_family != AF_UNIX)
1038 if (addr_len == sizeof(short)) {
1039 err = unix_autobind(sock);
1043 err = unix_mkname(sunaddr, addr_len, &hash);
1049 umode_t mode = S_IFSOCK |
1050 (SOCK_INODE(sock)->i_mode & ~current_umask());
1051 err = unix_mknod(sun_path, mode, &path);
1059 err = mutex_lock_interruptible(&u->bindlock);
1068 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1072 memcpy(addr->name, sunaddr, addr_len);
1073 addr->len = addr_len;
1074 addr->hash = hash ^ sk->sk_type;
1075 atomic_set(&addr->refcnt, 1);
1078 addr->hash = UNIX_HASH_SIZE;
1079 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1080 spin_lock(&unix_table_lock);
1082 list = &unix_socket_table[hash];
1084 spin_lock(&unix_table_lock);
1086 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1087 sk->sk_type, hash)) {
1088 unix_release_addr(addr);
1092 list = &unix_socket_table[addr->hash];
1096 __unix_remove_socket(sk);
1097 smp_store_release(&u->addr, addr);
1098 __unix_insert_socket(list, sk);
1101 spin_unlock(&unix_table_lock);
1103 mutex_unlock(&u->bindlock);
1111 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1113 if (unlikely(sk1 == sk2) || !sk2) {
1114 unix_state_lock(sk1);
1118 unix_state_lock(sk1);
1119 unix_state_lock_nested(sk2);
1121 unix_state_lock(sk2);
1122 unix_state_lock_nested(sk1);
1126 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1128 if (unlikely(sk1 == sk2) || !sk2) {
1129 unix_state_unlock(sk1);
1132 unix_state_unlock(sk1);
1133 unix_state_unlock(sk2);
1136 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1137 int alen, int flags)
1139 struct sock *sk = sock->sk;
1140 struct net *net = sock_net(sk);
1141 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1147 if (alen < offsetofend(struct sockaddr, sa_family))
1150 if (addr->sa_family != AF_UNSPEC) {
1151 err = unix_mkname(sunaddr, alen, &hash);
1156 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1157 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1161 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1165 unix_state_double_lock(sk, other);
1167 /* Apparently VFS overslept socket death. Retry. */
1168 if (sock_flag(other, SOCK_DEAD)) {
1169 unix_state_double_unlock(sk, other);
1175 if (!unix_may_send(sk, other))
1178 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1184 * 1003.1g breaking connected state with AF_UNSPEC
1187 unix_state_double_lock(sk, other);
1191 * If it was connected, reconnect.
1193 if (unix_peer(sk)) {
1194 struct sock *old_peer = unix_peer(sk);
1195 unix_peer(sk) = other;
1196 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1198 unix_state_double_unlock(sk, other);
1200 if (other != old_peer)
1201 unix_dgram_disconnected(sk, old_peer);
1204 unix_peer(sk) = other;
1205 unix_state_double_unlock(sk, other);
1210 unix_state_double_unlock(sk, other);
1216 static long unix_wait_for_peer(struct sock *other, long timeo)
1218 struct unix_sock *u = unix_sk(other);
1222 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1224 sched = !sock_flag(other, SOCK_DEAD) &&
1225 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1226 unix_recvq_full(other);
1228 unix_state_unlock(other);
1231 timeo = schedule_timeout(timeo);
1233 finish_wait(&u->peer_wait, &wait);
1237 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1238 int addr_len, int flags)
1240 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1241 struct sock *sk = sock->sk;
1242 struct net *net = sock_net(sk);
1243 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1244 struct sock *newsk = NULL;
1245 struct sock *other = NULL;
1246 struct sk_buff *skb = NULL;
1252 err = unix_mkname(sunaddr, addr_len, &hash);
1257 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1258 (err = unix_autobind(sock)) != 0)
1261 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1263 /* First of all allocate resources.
1264 If we will make it after state is locked,
1265 we will have to recheck all again in any case.
1270 /* create new sock for complete connection */
1271 newsk = unix_create1(sock_net(sk), NULL, 0);
1275 /* Allocate skb for sending to listening sock */
1276 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1281 /* Find listening sock. */
1282 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1286 /* Latch state of peer */
1287 unix_state_lock(other);
1289 /* Apparently VFS overslept socket death. Retry. */
1290 if (sock_flag(other, SOCK_DEAD)) {
1291 unix_state_unlock(other);
1296 err = -ECONNREFUSED;
1297 if (other->sk_state != TCP_LISTEN)
1299 if (other->sk_shutdown & RCV_SHUTDOWN)
1302 if (unix_recvq_full(other)) {
1307 timeo = unix_wait_for_peer(other, timeo);
1309 err = sock_intr_errno(timeo);
1310 if (signal_pending(current))
1318 It is tricky place. We need to grab our state lock and cannot
1319 drop lock on peer. It is dangerous because deadlock is
1320 possible. Connect to self case and simultaneous
1321 attempt to connect are eliminated by checking socket
1322 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1323 check this before attempt to grab lock.
1325 Well, and we have to recheck the state after socket locked.
1331 /* This is ok... continue with connect */
1333 case TCP_ESTABLISHED:
1334 /* Socket is already connected */
1342 unix_state_lock_nested(sk);
1344 if (sk->sk_state != st) {
1345 unix_state_unlock(sk);
1346 unix_state_unlock(other);
1351 err = security_unix_stream_connect(sk, other, newsk);
1353 unix_state_unlock(sk);
1357 /* The way is open! Fastly set all the necessary fields... */
1360 unix_peer(newsk) = sk;
1361 newsk->sk_state = TCP_ESTABLISHED;
1362 newsk->sk_type = sk->sk_type;
1363 init_peercred(newsk);
1364 newu = unix_sk(newsk);
1365 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1366 otheru = unix_sk(other);
1368 /* copy address information from listening to new sock
1370 * The contents of *(otheru->addr) and otheru->path
1371 * are seen fully set up here, since we have found
1372 * otheru in hash under unix_table_lock. Insertion
1373 * into the hash chain we'd found it in had been done
1374 * in an earlier critical area protected by unix_table_lock,
1375 * the same one where we'd set *(otheru->addr) contents,
1376 * as well as otheru->path and otheru->addr itself.
1378 * Using smp_store_release() here to set newu->addr
1379 * is enough to make those stores, as well as stores
1380 * to newu->path visible to anyone who gets newu->addr
1381 * by smp_load_acquire(). IOW, the same warranties
1382 * as for unix_sock instances bound in unix_bind() or
1383 * in unix_autobind().
1385 if (otheru->path.dentry) {
1386 path_get(&otheru->path);
1387 newu->path = otheru->path;
1389 atomic_inc(&otheru->addr->refcnt);
1390 smp_store_release(&newu->addr, otheru->addr);
1392 /* Set credentials */
1393 copy_peercred(sk, other);
1395 sock->state = SS_CONNECTED;
1396 sk->sk_state = TCP_ESTABLISHED;
1399 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1400 unix_peer(sk) = newsk;
1402 unix_state_unlock(sk);
1404 /* take ten and and send info to listening sock */
1405 spin_lock(&other->sk_receive_queue.lock);
1406 __skb_queue_tail(&other->sk_receive_queue, skb);
1407 spin_unlock(&other->sk_receive_queue.lock);
1408 unix_state_unlock(other);
1409 other->sk_data_ready(other);
1415 unix_state_unlock(other);
1420 unix_release_sock(newsk, 0);
1426 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1428 struct sock *ska = socka->sk, *skb = sockb->sk;
1430 /* Join our sockets back to back */
1433 unix_peer(ska) = skb;
1434 unix_peer(skb) = ska;
1438 if (ska->sk_type != SOCK_DGRAM) {
1439 ska->sk_state = TCP_ESTABLISHED;
1440 skb->sk_state = TCP_ESTABLISHED;
1441 socka->state = SS_CONNECTED;
1442 sockb->state = SS_CONNECTED;
1447 static void unix_sock_inherit_flags(const struct socket *old,
1450 if (test_bit(SOCK_PASSCRED, &old->flags))
1451 set_bit(SOCK_PASSCRED, &new->flags);
1452 if (test_bit(SOCK_PASSSEC, &old->flags))
1453 set_bit(SOCK_PASSSEC, &new->flags);
1456 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1458 struct sock *sk = sock->sk;
1460 struct sk_buff *skb;
1464 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1468 if (sk->sk_state != TCP_LISTEN)
1471 /* If socket state is TCP_LISTEN it cannot change (for now...),
1472 * so that no locks are necessary.
1475 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1477 /* This means receive shutdown. */
1484 skb_free_datagram(sk, skb);
1485 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1487 /* attach accepted sock to socket */
1488 unix_state_lock(tsk);
1489 newsock->state = SS_CONNECTED;
1490 unix_sock_inherit_flags(sock, newsock);
1491 sock_graft(tsk, newsock);
1492 unix_state_unlock(tsk);
1500 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1502 struct sock *sk = sock->sk;
1503 struct unix_address *addr;
1504 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1508 sk = unix_peer_get(sk);
1518 addr = smp_load_acquire(&unix_sk(sk)->addr);
1520 sunaddr->sun_family = AF_UNIX;
1521 sunaddr->sun_path[0] = 0;
1522 *uaddr_len = sizeof(short);
1524 *uaddr_len = addr->len;
1525 memcpy(sunaddr, addr->name, *uaddr_len);
1532 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1537 * Garbage collection of unix sockets starts by selecting a set of
1538 * candidate sockets which have reference only from being in flight
1539 * (total_refs == inflight_refs). This condition is checked once during
1540 * the candidate collection phase, and candidates are marked as such, so
1541 * that non-candidates can later be ignored. While inflight_refs is
1542 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1543 * is an instantaneous decision.
1545 * Once a candidate, however, the socket must not be reinstalled into a
1546 * file descriptor while the garbage collection is in progress.
1548 * If the above conditions are met, then the directed graph of
1549 * candidates (*) does not change while unix_gc_lock is held.
1551 * Any operations that changes the file count through file descriptors
1552 * (dup, close, sendmsg) does not change the graph since candidates are
1553 * not installed in fds.
1555 * Dequeing a candidate via recvmsg would install it into an fd, but
1556 * that takes unix_gc_lock to decrement the inflight count, so it's
1557 * serialized with garbage collection.
1559 * MSG_PEEK is special in that it does not change the inflight count,
1560 * yet does install the socket into an fd. The following lock/unlock
1561 * pair is to ensure serialization with garbage collection. It must be
1562 * done between incrementing the file count and installing the file into
1565 * If garbage collection starts after the barrier provided by the
1566 * lock/unlock, then it will see the elevated refcount and not mark this
1567 * as a candidate. If a garbage collection is already in progress
1568 * before the file count was incremented, then the lock/unlock pair will
1569 * ensure that garbage collection is finished before progressing to
1570 * installing the fd.
1572 * (*) A -> B where B is on the queue of A or B is on the queue of C
1573 * which is on the queue of listening socket A.
1575 spin_lock(&unix_gc_lock);
1576 spin_unlock(&unix_gc_lock);
1579 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1583 UNIXCB(skb).pid = get_pid(scm->pid);
1584 UNIXCB(skb).uid = scm->creds.uid;
1585 UNIXCB(skb).gid = scm->creds.gid;
1586 UNIXCB(skb).fp = NULL;
1587 unix_get_secdata(scm, skb);
1588 if (scm->fp && send_fds)
1589 err = unix_attach_fds(scm, skb);
1591 skb->destructor = unix_destruct_scm;
1595 static bool unix_passcred_enabled(const struct socket *sock,
1596 const struct sock *other)
1598 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1599 !other->sk_socket ||
1600 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1604 * Some apps rely on write() giving SCM_CREDENTIALS
1605 * We include credentials if source or destination socket
1606 * asserted SOCK_PASSCRED.
1608 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1609 const struct sock *other)
1611 if (UNIXCB(skb).pid)
1613 if (unix_passcred_enabled(sock, other)) {
1614 UNIXCB(skb).pid = get_pid(task_tgid(current));
1615 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1619 static int maybe_init_creds(struct scm_cookie *scm,
1620 struct socket *socket,
1621 const struct sock *other)
1624 struct msghdr msg = { .msg_controllen = 0 };
1626 err = scm_send(socket, &msg, scm, false);
1630 if (unix_passcred_enabled(socket, other)) {
1631 scm->pid = get_pid(task_tgid(current));
1632 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1637 static bool unix_skb_scm_eq(struct sk_buff *skb,
1638 struct scm_cookie *scm)
1640 const struct unix_skb_parms *u = &UNIXCB(skb);
1642 return u->pid == scm->pid &&
1643 uid_eq(u->uid, scm->creds.uid) &&
1644 gid_eq(u->gid, scm->creds.gid) &&
1645 unix_secdata_eq(scm, skb);
1649 * Send AF_UNIX data.
1652 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1655 struct sock *sk = sock->sk;
1656 struct net *net = sock_net(sk);
1657 struct unix_sock *u = unix_sk(sk);
1658 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1659 struct sock *other = NULL;
1660 int namelen = 0; /* fake GCC */
1663 struct sk_buff *skb;
1665 struct scm_cookie scm;
1671 err = scm_send(sock, msg, &scm, false);
1676 if (msg->msg_flags&MSG_OOB)
1679 if (msg->msg_namelen) {
1680 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1687 other = unix_peer_get(sk);
1692 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1693 && (err = unix_autobind(sock)) != 0)
1697 if (len > sk->sk_sndbuf - 32)
1700 if (len > SKB_MAX_ALLOC) {
1701 data_len = min_t(size_t,
1702 len - SKB_MAX_ALLOC,
1703 MAX_SKB_FRAGS * PAGE_SIZE);
1704 data_len = PAGE_ALIGN(data_len);
1706 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1709 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1710 msg->msg_flags & MSG_DONTWAIT, &err,
1711 PAGE_ALLOC_COSTLY_ORDER);
1715 err = unix_scm_to_skb(&scm, skb, true);
1718 max_level = err + 1;
1720 skb_put(skb, len - data_len);
1721 skb->data_len = data_len;
1723 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1727 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1732 if (sunaddr == NULL)
1735 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1741 if (sk_filter(other, skb) < 0) {
1742 /* Toss the packet but do not return any error to the sender */
1748 unix_state_lock(other);
1751 if (!unix_may_send(sk, other))
1754 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1756 * Check with 1003.1g - what should
1759 unix_state_unlock(other);
1763 unix_state_lock(sk);
1766 if (unix_peer(sk) == other) {
1767 unix_peer(sk) = NULL;
1768 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1770 unix_state_unlock(sk);
1772 unix_dgram_disconnected(sk, other);
1774 err = -ECONNREFUSED;
1776 unix_state_unlock(sk);
1786 if (other->sk_shutdown & RCV_SHUTDOWN)
1789 if (sk->sk_type != SOCK_SEQPACKET) {
1790 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1795 /* other == sk && unix_peer(other) != sk if
1796 * - unix_peer(sk) == NULL, destination address bound to sk
1797 * - unix_peer(sk) == sk by time of get but disconnected before lock
1800 unlikely(unix_peer(other) != sk &&
1801 unix_recvq_full_lockless(other))) {
1803 timeo = unix_wait_for_peer(other, timeo);
1805 err = sock_intr_errno(timeo);
1806 if (signal_pending(current))
1813 unix_state_unlock(other);
1814 unix_state_double_lock(sk, other);
1817 if (unix_peer(sk) != other ||
1818 unix_dgram_peer_wake_me(sk, other)) {
1826 goto restart_locked;
1830 if (unlikely(sk_locked))
1831 unix_state_unlock(sk);
1833 if (sock_flag(other, SOCK_RCVTSTAMP))
1834 __net_timestamp(skb);
1835 maybe_add_creds(skb, sock, other);
1836 skb_queue_tail(&other->sk_receive_queue, skb);
1837 if (max_level > unix_sk(other)->recursion_level)
1838 unix_sk(other)->recursion_level = max_level;
1839 unix_state_unlock(other);
1840 other->sk_data_ready(other);
1847 unix_state_unlock(sk);
1848 unix_state_unlock(other);
1858 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1859 * bytes, and a minimun of a full page.
1861 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1863 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1866 struct sock *sk = sock->sk;
1867 struct sock *other = NULL;
1869 struct sk_buff *skb;
1871 struct scm_cookie scm;
1872 bool fds_sent = false;
1877 err = scm_send(sock, msg, &scm, false);
1882 if (msg->msg_flags&MSG_OOB)
1885 if (msg->msg_namelen) {
1886 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1890 other = unix_peer(sk);
1895 if (sk->sk_shutdown & SEND_SHUTDOWN)
1898 while (sent < len) {
1901 /* Keep two messages in the pipe so it schedules better */
1902 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1904 /* allow fallback to order-0 allocations */
1905 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1907 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1909 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1911 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1912 msg->msg_flags & MSG_DONTWAIT, &err,
1913 get_order(UNIX_SKB_FRAGS_SZ));
1917 /* Only send the fds in the first buffer */
1918 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1923 max_level = err + 1;
1926 skb_put(skb, size - data_len);
1927 skb->data_len = data_len;
1929 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1935 unix_state_lock(other);
1937 if (sock_flag(other, SOCK_DEAD) ||
1938 (other->sk_shutdown & RCV_SHUTDOWN))
1941 maybe_add_creds(skb, sock, other);
1942 skb_queue_tail(&other->sk_receive_queue, skb);
1943 if (max_level > unix_sk(other)->recursion_level)
1944 unix_sk(other)->recursion_level = max_level;
1945 unix_state_unlock(other);
1946 other->sk_data_ready(other);
1955 unix_state_unlock(other);
1958 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1959 send_sig(SIGPIPE, current, 0);
1963 return sent ? : err;
1966 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1967 int offset, size_t size, int flags)
1970 bool send_sigpipe = false;
1971 bool init_scm = true;
1972 struct scm_cookie scm;
1973 struct sock *other, *sk = socket->sk;
1974 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1976 if (flags & MSG_OOB)
1979 other = unix_peer(sk);
1980 if (!other || sk->sk_state != TCP_ESTABLISHED)
1985 unix_state_unlock(other);
1986 mutex_unlock(&unix_sk(other)->iolock);
1987 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1993 /* we must acquire iolock as we modify already present
1994 * skbs in the sk_receive_queue and mess with skb->len
1996 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1998 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2002 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2004 send_sigpipe = true;
2008 unix_state_lock(other);
2010 if (sock_flag(other, SOCK_DEAD) ||
2011 other->sk_shutdown & RCV_SHUTDOWN) {
2013 send_sigpipe = true;
2014 goto err_state_unlock;
2018 err = maybe_init_creds(&scm, socket, other);
2020 goto err_state_unlock;
2024 skb = skb_peek_tail(&other->sk_receive_queue);
2025 if (tail && tail == skb) {
2027 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2034 } else if (newskb) {
2035 /* this is fast path, we don't necessarily need to
2036 * call to kfree_skb even though with newskb == NULL
2037 * this - does no harm
2039 consume_skb(newskb);
2043 if (skb_append_pagefrags(skb, page, offset, size)) {
2049 skb->data_len += size;
2050 skb->truesize += size;
2051 atomic_add(size, &sk->sk_wmem_alloc);
2054 err = unix_scm_to_skb(&scm, skb, false);
2056 goto err_state_unlock;
2057 spin_lock(&other->sk_receive_queue.lock);
2058 __skb_queue_tail(&other->sk_receive_queue, newskb);
2059 spin_unlock(&other->sk_receive_queue.lock);
2062 unix_state_unlock(other);
2063 mutex_unlock(&unix_sk(other)->iolock);
2065 other->sk_data_ready(other);
2070 unix_state_unlock(other);
2072 mutex_unlock(&unix_sk(other)->iolock);
2075 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2076 send_sig(SIGPIPE, current, 0);
2082 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2086 struct sock *sk = sock->sk;
2088 err = sock_error(sk);
2092 if (sk->sk_state != TCP_ESTABLISHED)
2095 if (msg->msg_namelen)
2096 msg->msg_namelen = 0;
2098 return unix_dgram_sendmsg(sock, msg, len);
2101 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2102 size_t size, int flags)
2104 struct sock *sk = sock->sk;
2106 if (sk->sk_state != TCP_ESTABLISHED)
2109 return unix_dgram_recvmsg(sock, msg, size, flags);
2112 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2114 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2117 msg->msg_namelen = addr->len;
2118 memcpy(msg->msg_name, addr->name, addr->len);
2122 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2123 size_t size, int flags)
2125 struct scm_cookie scm;
2126 struct sock *sk = sock->sk;
2127 struct unix_sock *u = unix_sk(sk);
2128 struct sk_buff *skb, *last;
2137 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2140 mutex_lock(&u->iolock);
2142 skip = sk_peek_offset(sk, flags);
2143 skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2148 mutex_unlock(&u->iolock);
2153 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2155 if (!skb) { /* implies iolock unlocked */
2156 unix_state_lock(sk);
2157 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2158 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2159 (sk->sk_shutdown & RCV_SHUTDOWN))
2161 unix_state_unlock(sk);
2165 if (wq_has_sleeper(&u->peer_wait))
2166 wake_up_interruptible_sync_poll(&u->peer_wait,
2167 POLLOUT | POLLWRNORM |
2171 unix_copy_addr(msg, skb->sk);
2173 if (size > skb->len - skip)
2174 size = skb->len - skip;
2175 else if (size < skb->len - skip)
2176 msg->msg_flags |= MSG_TRUNC;
2178 err = skb_copy_datagram_msg(skb, skip, msg, size);
2182 if (sock_flag(sk, SOCK_RCVTSTAMP))
2183 __sock_recv_timestamp(msg, sk, skb);
2185 memset(&scm, 0, sizeof(scm));
2187 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2188 unix_set_secdata(&scm, skb);
2190 if (!(flags & MSG_PEEK)) {
2192 unix_detach_fds(&scm, skb);
2194 sk_peek_offset_bwd(sk, skb->len);
2196 /* It is questionable: on PEEK we could:
2197 - do not return fds - good, but too simple 8)
2198 - return fds, and do not return them on read (old strategy,
2200 - clone fds (I chose it for now, it is the most universal
2203 POSIX 1003.1g does not actually define this clearly
2204 at all. POSIX 1003.1g doesn't define a lot of things
2209 sk_peek_offset_fwd(sk, size);
2212 unix_peek_fds(&scm, skb);
2214 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2216 scm_recv(sock, msg, &scm, flags);
2219 skb_free_datagram(sk, skb);
2220 mutex_unlock(&u->iolock);
2226 * Sleep until more data has arrived. But check for races..
2228 static long unix_stream_data_wait(struct sock *sk, long timeo,
2229 struct sk_buff *last, unsigned int last_len,
2232 struct sk_buff *tail;
2235 unix_state_lock(sk);
2238 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2240 tail = skb_peek_tail(&sk->sk_receive_queue);
2242 (tail && tail->len != last_len) ||
2244 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2245 signal_pending(current) ||
2249 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2250 unix_state_unlock(sk);
2252 timeo = freezable_schedule_timeout(timeo);
2254 timeo = schedule_timeout(timeo);
2255 unix_state_lock(sk);
2257 if (sock_flag(sk, SOCK_DEAD))
2260 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2263 finish_wait(sk_sleep(sk), &wait);
2264 unix_state_unlock(sk);
2268 static unsigned int unix_skb_len(const struct sk_buff *skb)
2270 return skb->len - UNIXCB(skb).consumed;
2273 struct unix_stream_read_state {
2274 int (*recv_actor)(struct sk_buff *, int, int,
2275 struct unix_stream_read_state *);
2276 struct socket *socket;
2278 struct pipe_inode_info *pipe;
2281 unsigned int splice_flags;
2284 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2287 struct scm_cookie scm;
2288 struct socket *sock = state->socket;
2289 struct sock *sk = sock->sk;
2290 struct unix_sock *u = unix_sk(sk);
2292 int flags = state->flags;
2293 int noblock = flags & MSG_DONTWAIT;
2294 bool check_creds = false;
2299 size_t size = state->size;
2300 unsigned int last_len;
2302 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2307 if (unlikely(flags & MSG_OOB)) {
2312 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2313 timeo = sock_rcvtimeo(sk, noblock);
2315 memset(&scm, 0, sizeof(scm));
2317 /* Lock the socket to prevent queue disordering
2318 * while sleeps in memcpy_tomsg
2320 mutex_lock(&u->iolock);
2322 if (flags & MSG_PEEK)
2323 skip = sk_peek_offset(sk, flags);
2330 struct sk_buff *skb, *last;
2333 unix_state_lock(sk);
2334 if (sock_flag(sk, SOCK_DEAD)) {
2338 last = skb = skb_peek(&sk->sk_receive_queue);
2339 last_len = last ? last->len : 0;
2342 unix_sk(sk)->recursion_level = 0;
2343 if (copied >= target)
2347 * POSIX 1003.1g mandates this order.
2350 err = sock_error(sk);
2353 if (sk->sk_shutdown & RCV_SHUTDOWN)
2356 unix_state_unlock(sk);
2362 mutex_unlock(&u->iolock);
2364 timeo = unix_stream_data_wait(sk, timeo, last,
2365 last_len, freezable);
2367 if (signal_pending(current)) {
2368 err = sock_intr_errno(timeo);
2373 mutex_lock(&u->iolock);
2376 unix_state_unlock(sk);
2380 while (skip >= unix_skb_len(skb)) {
2381 skip -= unix_skb_len(skb);
2383 last_len = skb->len;
2384 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2389 unix_state_unlock(sk);
2392 /* Never glue messages from different writers */
2393 if (!unix_skb_scm_eq(skb, &scm))
2395 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2396 /* Copy credentials */
2397 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2398 unix_set_secdata(&scm, skb);
2402 /* Copy address just once */
2403 if (state->msg && state->msg->msg_name) {
2404 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2405 state->msg->msg_name);
2406 unix_copy_addr(state->msg, skb->sk);
2410 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2412 chunk = state->recv_actor(skb, skip, chunk, state);
2413 drop_skb = !unix_skb_len(skb);
2414 /* skb is only safe to use if !drop_skb */
2425 /* the skb was touched by a concurrent reader;
2426 * we should not expect anything from this skb
2427 * anymore and assume it invalid - we can be
2428 * sure it was dropped from the socket queue
2430 * let's report a short read
2436 /* Mark read part of skb as used */
2437 if (!(flags & MSG_PEEK)) {
2438 UNIXCB(skb).consumed += chunk;
2440 sk_peek_offset_bwd(sk, chunk);
2443 unix_detach_fds(&scm, skb);
2445 if (unix_skb_len(skb))
2448 skb_unlink(skb, &sk->sk_receive_queue);
2454 /* It is questionable, see note in unix_dgram_recvmsg.
2457 unix_peek_fds(&scm, skb);
2459 sk_peek_offset_fwd(sk, chunk);
2466 last_len = skb->len;
2467 unix_state_lock(sk);
2468 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2471 unix_state_unlock(sk);
2476 mutex_unlock(&u->iolock);
2478 scm_recv(sock, state->msg, &scm, flags);
2482 return copied ? : err;
2485 static int unix_stream_read_actor(struct sk_buff *skb,
2486 int skip, int chunk,
2487 struct unix_stream_read_state *state)
2491 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2493 return ret ?: chunk;
2496 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2497 size_t size, int flags)
2499 struct unix_stream_read_state state = {
2500 .recv_actor = unix_stream_read_actor,
2507 return unix_stream_read_generic(&state, true);
2510 static int unix_stream_splice_actor(struct sk_buff *skb,
2511 int skip, int chunk,
2512 struct unix_stream_read_state *state)
2514 return skb_splice_bits(skb, state->socket->sk,
2515 UNIXCB(skb).consumed + skip,
2516 state->pipe, chunk, state->splice_flags);
2519 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2520 struct pipe_inode_info *pipe,
2521 size_t size, unsigned int flags)
2523 struct unix_stream_read_state state = {
2524 .recv_actor = unix_stream_splice_actor,
2528 .splice_flags = flags,
2531 if (unlikely(*ppos))
2534 if (sock->file->f_flags & O_NONBLOCK ||
2535 flags & SPLICE_F_NONBLOCK)
2536 state.flags = MSG_DONTWAIT;
2538 return unix_stream_read_generic(&state, false);
2541 static int unix_shutdown(struct socket *sock, int mode)
2543 struct sock *sk = sock->sk;
2546 if (mode < SHUT_RD || mode > SHUT_RDWR)
2549 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2550 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2551 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2555 unix_state_lock(sk);
2556 sk->sk_shutdown |= mode;
2557 other = unix_peer(sk);
2560 unix_state_unlock(sk);
2561 sk->sk_state_change(sk);
2564 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2568 if (mode&RCV_SHUTDOWN)
2569 peer_mode |= SEND_SHUTDOWN;
2570 if (mode&SEND_SHUTDOWN)
2571 peer_mode |= RCV_SHUTDOWN;
2572 unix_state_lock(other);
2573 other->sk_shutdown |= peer_mode;
2574 unix_state_unlock(other);
2575 other->sk_state_change(other);
2576 if (peer_mode == SHUTDOWN_MASK)
2577 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2578 else if (peer_mode & RCV_SHUTDOWN)
2579 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2587 long unix_inq_len(struct sock *sk)
2589 struct sk_buff *skb;
2592 if (sk->sk_state == TCP_LISTEN)
2595 spin_lock(&sk->sk_receive_queue.lock);
2596 if (sk->sk_type == SOCK_STREAM ||
2597 sk->sk_type == SOCK_SEQPACKET) {
2598 skb_queue_walk(&sk->sk_receive_queue, skb)
2599 amount += unix_skb_len(skb);
2601 skb = skb_peek(&sk->sk_receive_queue);
2605 spin_unlock(&sk->sk_receive_queue.lock);
2609 EXPORT_SYMBOL_GPL(unix_inq_len);
2611 long unix_outq_len(struct sock *sk)
2613 return sk_wmem_alloc_get(sk);
2615 EXPORT_SYMBOL_GPL(unix_outq_len);
2617 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2619 struct sock *sk = sock->sk;
2625 amount = unix_outq_len(sk);
2626 err = put_user(amount, (int __user *)arg);
2629 amount = unix_inq_len(sk);
2633 err = put_user(amount, (int __user *)arg);
2642 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2644 struct sock *sk = sock->sk;
2647 sock_poll_wait(file, sk_sleep(sk), wait);
2650 /* exceptional events? */
2653 if (sk->sk_shutdown == SHUTDOWN_MASK)
2655 if (sk->sk_shutdown & RCV_SHUTDOWN)
2656 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2659 if (!skb_queue_empty(&sk->sk_receive_queue))
2660 mask |= POLLIN | POLLRDNORM;
2662 /* Connection-based need to check for termination and startup */
2663 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2664 sk->sk_state == TCP_CLOSE)
2668 * we set writable also when the other side has shut down the
2669 * connection. This prevents stuck sockets.
2671 if (unix_writable(sk))
2672 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2677 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2680 struct sock *sk = sock->sk, *other;
2681 unsigned int mask, writable;
2683 sock_poll_wait(file, sk_sleep(sk), wait);
2686 /* exceptional events? */
2687 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2689 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2691 if (sk->sk_shutdown & RCV_SHUTDOWN)
2692 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2693 if (sk->sk_shutdown == SHUTDOWN_MASK)
2697 if (!skb_queue_empty(&sk->sk_receive_queue))
2698 mask |= POLLIN | POLLRDNORM;
2700 /* Connection-based need to check for termination and startup */
2701 if (sk->sk_type == SOCK_SEQPACKET) {
2702 if (sk->sk_state == TCP_CLOSE)
2704 /* connection hasn't started yet? */
2705 if (sk->sk_state == TCP_SYN_SENT)
2709 /* No write status requested, avoid expensive OUT tests. */
2710 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2713 writable = unix_writable(sk);
2715 unix_state_lock(sk);
2717 other = unix_peer(sk);
2718 if (other && unix_peer(other) != sk &&
2719 unix_recvq_full_lockless(other) &&
2720 unix_dgram_peer_wake_me(sk, other))
2723 unix_state_unlock(sk);
2727 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2729 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2734 #ifdef CONFIG_PROC_FS
2736 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2738 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2739 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2740 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2742 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2744 unsigned long offset = get_offset(*pos);
2745 unsigned long bucket = get_bucket(*pos);
2747 unsigned long count = 0;
2749 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2750 if (sock_net(sk) != seq_file_net(seq))
2752 if (++count == offset)
2759 static struct sock *unix_next_socket(struct seq_file *seq,
2763 unsigned long bucket;
2765 while (sk > (struct sock *)SEQ_START_TOKEN) {
2769 if (sock_net(sk) == seq_file_net(seq))
2774 sk = unix_from_bucket(seq, pos);
2779 bucket = get_bucket(*pos) + 1;
2780 *pos = set_bucket_offset(bucket, 1);
2781 } while (bucket < ARRAY_SIZE(unix_socket_table));
2786 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2787 __acquires(unix_table_lock)
2789 spin_lock(&unix_table_lock);
2792 return SEQ_START_TOKEN;
2794 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2797 return unix_next_socket(seq, NULL, pos);
2800 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2803 return unix_next_socket(seq, v, pos);
2806 static void unix_seq_stop(struct seq_file *seq, void *v)
2807 __releases(unix_table_lock)
2809 spin_unlock(&unix_table_lock);
2812 static int unix_seq_show(struct seq_file *seq, void *v)
2815 if (v == SEQ_START_TOKEN)
2816 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2820 struct unix_sock *u = unix_sk(s);
2823 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2825 atomic_read(&s->sk_refcnt),
2827 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2830 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2831 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2834 if (u->addr) { // under unix_table_lock here
2839 len = u->addr->len - sizeof(short);
2840 if (!UNIX_ABSTRACT(s))
2846 for ( ; i < len; i++)
2847 seq_putc(seq, u->addr->name->sun_path[i] ?:
2850 unix_state_unlock(s);
2851 seq_putc(seq, '\n');
2857 static const struct seq_operations unix_seq_ops = {
2858 .start = unix_seq_start,
2859 .next = unix_seq_next,
2860 .stop = unix_seq_stop,
2861 .show = unix_seq_show,
2864 static int unix_seq_open(struct inode *inode, struct file *file)
2866 return seq_open_net(inode, file, &unix_seq_ops,
2867 sizeof(struct seq_net_private));
2870 static const struct file_operations unix_seq_fops = {
2871 .owner = THIS_MODULE,
2872 .open = unix_seq_open,
2874 .llseek = seq_lseek,
2875 .release = seq_release_net,
2880 static const struct net_proto_family unix_family_ops = {
2882 .create = unix_create,
2883 .owner = THIS_MODULE,
2887 static int __net_init unix_net_init(struct net *net)
2889 int error = -ENOMEM;
2891 net->unx.sysctl_max_dgram_qlen = 10;
2892 if (unix_sysctl_register(net))
2895 #ifdef CONFIG_PROC_FS
2896 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2897 unix_sysctl_unregister(net);
2906 static void __net_exit unix_net_exit(struct net *net)
2908 unix_sysctl_unregister(net);
2909 remove_proc_entry("unix", net->proc_net);
2912 static struct pernet_operations unix_net_ops = {
2913 .init = unix_net_init,
2914 .exit = unix_net_exit,
2917 static int __init af_unix_init(void)
2921 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2923 rc = proto_register(&unix_proto, 1);
2925 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2929 sock_register(&unix_family_ops);
2930 register_pernet_subsys(&unix_net_ops);
2935 static void __exit af_unix_exit(void)
2937 sock_unregister(PF_UNIX);
2938 proto_unregister(&unix_proto);
2939 unregister_pernet_subsys(&unix_net_ops);
2942 /* Earlier than device_initcall() so that other drivers invoking
2943 request_module() don't end up in a loop when modprobe tries
2944 to use a UNIX socket. But later than subsys_initcall() because
2945 we depend on stuff initialised there */
2946 fs_initcall(af_unix_init);
2947 module_exit(af_unix_exit);
2949 MODULE_LICENSE("GPL");
2950 MODULE_ALIAS_NETPROTO(PF_UNIX);