net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116 #include <linux/btf_ids.h>
 117
 118 #include "scm.h"
 119
 120 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 121 EXPORT_SYMBOL_GPL(unix_socket_table);
 122 DEFINE_SPINLOCK(unix_table_lock);
 123 EXPORT_SYMBOL_GPL(unix_table_lock);
 124 static atomic_long_t unix_nr_socks;
 125
 126
 127 static struct hlist_head *unix_sockets_unbound(void *addr)
 128 {
 129         unsigned long hash = (unsigned long)addr;
 130
 131         hash ^= hash >> 16;
 132         hash ^= hash >> 8;
 133         hash %= UNIX_HASH_SIZE;
 134         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 135 }
 136
 137 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 138
 139 #ifdef CONFIG_SECURITY_NETWORK
 140 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 {
 142         UNIXCB(skb).secid = scm->secid;
 143 }
 144
 145 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 146 {
 147         scm->secid = UNIXCB(skb).secid;
 148 }
 149
 150 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 151 {
 152         return (scm->secid == UNIXCB(skb).secid);
 153 }
 154 #else
 155 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 156 { }
 157
 158 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 159 { }
 160
 161 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 162 {
 163         return true;
 164 }
 165 #endif /* CONFIG_SECURITY_NETWORK */
 166
 167 /*
 168  *  SMP locking strategy:
 169  *    hash table is protected with spinlock unix_table_lock
 170  *    each socket state is protected by separate spin lock.
 171  */
 172
 173 static inline unsigned int unix_hash_fold(__wsum n)
 174 {
 175         unsigned int hash = (__force unsigned int)csum_fold(n);
 176
 177         hash ^= hash>>8;
 178         return hash&(UNIX_HASH_SIZE-1);
 179 }
 180
 181 #define unix_peer(sk) (unix_sk(sk)->peer)
 182
 183 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 184 {
 185         return unix_peer(osk) == sk;
 186 }
 187
 188 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 189 {
 190         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 191 }
 192
 193 static inline int unix_recvq_full(const struct sock *sk)
 194 {
 195         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 196 }
 197
 198 static inline int unix_recvq_full_lockless(const struct sock *sk)
 199 {
 200         return skb_queue_len_lockless(&sk->sk_receive_queue) >
 201                 READ_ONCE(sk->sk_max_ack_backlog);
 202 }
 203
 204 struct sock *unix_peer_get(struct sock *s)
 205 {
 206         struct sock *peer;
 207
 208         unix_state_lock(s);
 209         peer = unix_peer(s);
 210         if (peer)
 211                 sock_hold(peer);
 212         unix_state_unlock(s);
 213         return peer;
 214 }
 215 EXPORT_SYMBOL_GPL(unix_peer_get);
 216
 217 static inline void unix_release_addr(struct unix_address *addr)
 218 {
 219         if (refcount_dec_and_test(&addr->refcnt))
 220                 kfree(addr);
 221 }
 222
 223 /*
 224  *      Check unix socket name:
 225  *              - should be not zero length.
 226  *              - if started by not zero, should be NULL terminated (FS object)
 227  *              - if started by zero, it is abstract name.
 228  */
 229
 230 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 231 {
 232         *hashp = 0;
 233
 234         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 235                 return -EINVAL;
 236         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 237                 return -EINVAL;
 238         if (sunaddr->sun_path[0]) {
 239                 /*
 240                  * This may look like an off by one error but it is a bit more
 241                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 242                  * sun_path[108] doesn't as such exist.  However in kernel space
 243                  * we are guaranteed that it is a valid memory location in our
 244                  * kernel address buffer.
 245                  */
 246                 ((char *)sunaddr)[len] = 0;
 247                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 248                 return len;
 249         }
 250
 251         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 252         return len;
 253 }
 254
 255 static void __unix_remove_socket(struct sock *sk)
 256 {
 257         sk_del_node_init(sk);
 258 }
 259
 260 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 261 {
 262         WARN_ON(!sk_unhashed(sk));
 263         sk_add_node(sk, list);
 264 }
 265
 266 static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
 267                             unsigned hash)
 268 {
 269         __unix_remove_socket(sk);
 270         smp_store_release(&unix_sk(sk)->addr, addr);
 271         __unix_insert_socket(&unix_socket_table[hash], sk);
 272 }
 273
 274 static inline void unix_remove_socket(struct sock *sk)
 275 {
 276         spin_lock(&unix_table_lock);
 277         __unix_remove_socket(sk);
 278         spin_unlock(&unix_table_lock);
 279 }
 280
 281 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 282 {
 283         spin_lock(&unix_table_lock);
 284         __unix_insert_socket(list, sk);
 285         spin_unlock(&unix_table_lock);
 286 }
 287
 288 static struct sock *__unix_find_socket_byname(struct net *net,
 289                                               struct sockaddr_un *sunname,
 290                                               int len, unsigned int hash)
 291 {
 292         struct sock *s;
 293
 294         sk_for_each(s, &unix_socket_table[hash]) {
 295                 struct unix_sock *u = unix_sk(s);
 296
 297                 if (!net_eq(sock_net(s), net))
 298                         continue;
 299
 300                 if (u->addr->len == len &&
 301                     !memcmp(u->addr->name, sunname, len))
 302                         return s;
 303         }
 304         return NULL;
 305 }
 306
 307 static inline struct sock *unix_find_socket_byname(struct net *net,
 308                                                    struct sockaddr_un *sunname,
 309                                                    int len, unsigned int hash)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         s = __unix_find_socket_byname(net, sunname, len, hash);
 315         if (s)
 316                 sock_hold(s);
 317         spin_unlock(&unix_table_lock);
 318         return s;
 319 }
 320
 321 static struct sock *unix_find_socket_byinode(struct inode *i)
 322 {
 323         struct sock *s;
 324
 325         spin_lock(&unix_table_lock);
 326         sk_for_each(s,
 327                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 328                 struct dentry *dentry = unix_sk(s)->path.dentry;
 329
 330                 if (dentry && d_backing_inode(dentry) == i) {
 331                         sock_hold(s);
 332                         goto found;
 333                 }
 334         }
 335         s = NULL;
 336 found:
 337         spin_unlock(&unix_table_lock);
 338         return s;
 339 }
 340
 341 /* Support code for asymmetrically connected dgram sockets
 342  *
 343  * If a datagram socket is connected to a socket not itself connected
 344  * to the first socket (eg, /dev/log), clients may only enqueue more
 345  * messages if the present receive queue of the server socket is not
 346  * "too large". This means there's a second writeability condition
 347  * poll and sendmsg need to test. The dgram recv code will do a wake
 348  * up on the peer_wait wait queue of a socket upon reception of a
 349  * datagram which needs to be propagated to sleeping would-be writers
 350  * since these might not have sent anything so far. This can't be
 351  * accomplished via poll_wait because the lifetime of the server
 352  * socket might be less than that of its clients if these break their
 353  * association with it or if the server socket is closed while clients
 354  * are still connected to it and there's no way to inform "a polling
 355  * implementation" that it should let go of a certain wait queue
 356  *
 357  * In order to propagate a wake up, a wait_queue_entry_t of the client
 358  * socket is enqueued on the peer_wait queue of the server socket
 359  * whose wake function does a wake_up on the ordinary client socket
 360  * wait queue. This connection is established whenever a write (or
 361  * poll for write) hit the flow control condition and broken when the
 362  * association to the server socket is dissolved or after a wake up
 363  * was relayed.
 364  */
 365
 366 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 367                                       void *key)
 368 {
 369         struct unix_sock *u;
 370         wait_queue_head_t *u_sleep;
 371
 372         u = container_of(q, struct unix_sock, peer_wake);
 373
 374         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 375                             q);
 376         u->peer_wake.private = NULL;
 377
 378         /* relaying can only happen while the wq still exists */
 379         u_sleep = sk_sleep(&u->sk);
 380         if (u_sleep)
 381                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 382
 383         return 0;
 384 }
 385
 386 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 387 {
 388         struct unix_sock *u, *u_other;
 389         int rc;
 390
 391         u = unix_sk(sk);
 392         u_other = unix_sk(other);
 393         rc = 0;
 394         spin_lock(&u_other->peer_wait.lock);
 395
 396         if (!u->peer_wake.private) {
 397                 u->peer_wake.private = other;
 398                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 399
 400                 rc = 1;
 401         }
 402
 403         spin_unlock(&u_other->peer_wait.lock);
 404         return rc;
 405 }
 406
 407 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 408                                             struct sock *other)
 409 {
 410         struct unix_sock *u, *u_other;
 411
 412         u = unix_sk(sk);
 413         u_other = unix_sk(other);
 414         spin_lock(&u_other->peer_wait.lock);
 415
 416         if (u->peer_wake.private == other) {
 417                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 418                 u->peer_wake.private = NULL;
 419         }
 420
 421         spin_unlock(&u_other->peer_wait.lock);
 422 }
 423
 424 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 425                                                    struct sock *other)
 426 {
 427         unix_dgram_peer_wake_disconnect(sk, other);
 428         wake_up_interruptible_poll(sk_sleep(sk),
 429                                    EPOLLOUT |
 430                                    EPOLLWRNORM |
 431                                    EPOLLWRBAND);
 432 }
 433
 434 /* preconditions:
 435  *      - unix_peer(sk) == other
 436  *      - association is stable
 437  */
 438 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 439 {
 440         int connected;
 441
 442         connected = unix_dgram_peer_wake_connect(sk, other);
 443
 444         /* If other is SOCK_DEAD, we want to make sure we signal
 445          * POLLOUT, such that a subsequent write() can get a
 446          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 447          * to other and its full, we will hang waiting for POLLOUT.
 448          */
 449         if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
 450                 return 1;
 451
 452         if (connected)
 453                 unix_dgram_peer_wake_disconnect(sk, other);
 454
 455         return 0;
 456 }
 457
 458 static int unix_writable(const struct sock *sk)
 459 {
 460         return sk->sk_state != TCP_LISTEN &&
 461                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 462 }
 463
 464 static void unix_write_space(struct sock *sk)
 465 {
 466         struct socket_wq *wq;
 467
 468         rcu_read_lock();
 469         if (unix_writable(sk)) {
 470                 wq = rcu_dereference(sk->sk_wq);
 471                 if (skwq_has_sleeper(wq))
 472                         wake_up_interruptible_sync_poll(&wq->wait,
 473                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 474                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 475         }
 476         rcu_read_unlock();
 477 }
 478
 479 /* When dgram socket disconnects (or changes its peer), we clear its receive
 480  * queue of packets arrived from previous peer. First, it allows to do
 481  * flow control based only on wmem_alloc; second, sk connected to peer
 482  * may receive messages only from that peer. */
 483 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 484 {
 485         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 486                 skb_queue_purge(&sk->sk_receive_queue);
 487                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 488
 489                 /* If one link of bidirectional dgram pipe is disconnected,
 490                  * we signal error. Messages are lost. Do not make this,
 491                  * when peer was not connected to us.
 492                  */
 493                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 494                         other->sk_err = ECONNRESET;
 495                         sk_error_report(other);
 496                 }
 497         }
 498         other->sk_state = TCP_CLOSE;
 499 }
 500
 501 static void unix_sock_destructor(struct sock *sk)
 502 {
 503         struct unix_sock *u = unix_sk(sk);
 504
 505         skb_queue_purge(&sk->sk_receive_queue);
 506
 507         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 508         WARN_ON(!sk_unhashed(sk));
 509         WARN_ON(sk->sk_socket);
 510         if (!sock_flag(sk, SOCK_DEAD)) {
 511                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 512                 return;
 513         }
 514
 515         if (u->addr)
 516                 unix_release_addr(u->addr);
 517
 518         atomic_long_dec(&unix_nr_socks);
 519         local_bh_disable();
 520         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 521         local_bh_enable();
 522 #ifdef UNIX_REFCNT_DEBUG
 523         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 524                 atomic_long_read(&unix_nr_socks));
 525 #endif
 526 }
 527
 528 static void unix_release_sock(struct sock *sk, int embrion)
 529 {
 530         struct unix_sock *u = unix_sk(sk);
 531         struct path path;
 532         struct sock *skpair;
 533         struct sk_buff *skb;
 534         int state;
 535
 536         unix_remove_socket(sk);
 537
 538         /* Clear state */
 539         unix_state_lock(sk);
 540         sock_orphan(sk);
 541         WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
 542         path         = u->path;
 543         u->path.dentry = NULL;
 544         u->path.mnt = NULL;
 545         state = sk->sk_state;
 546         sk->sk_state = TCP_CLOSE;
 547
 548         skpair = unix_peer(sk);
 549         unix_peer(sk) = NULL;
 550
 551         unix_state_unlock(sk);
 552
 553 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 554         if (u->oob_skb) {
 555                 kfree_skb(u->oob_skb);
 556                 u->oob_skb = NULL;
 557         }
 558 #endif
 559
 560         wake_up_interruptible_all(&u->peer_wait);
 561
 562         if (skpair != NULL) {
 563                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 564                         unix_state_lock(skpair);
 565                         /* No more writes */
 566                         WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
 567                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 568                                 skpair->sk_err = ECONNRESET;
 569                         unix_state_unlock(skpair);
 570                         skpair->sk_state_change(skpair);
 571                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 572                 }
 573
 574                 unix_dgram_peer_wake_disconnect(sk, skpair);
 575                 sock_put(skpair); /* It may now die */
 576         }
 577
 578         /* Try to flush out this socket. Throw out buffers at least */
 579
 580         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 581                 if (state == TCP_LISTEN)
 582                         unix_release_sock(skb->sk, 1);
 583                 /* passed fds are erased in the kfree_skb hook        */
 584                 UNIXCB(skb).consumed = skb->len;
 585                 kfree_skb(skb);
 586         }
 587
 588         if (path.dentry)
 589                 path_put(&path);
 590
 591         sock_put(sk);
 592
 593         /* ---- Socket is dead now and most probably destroyed ---- */
 594
 595         /*
 596          * Fixme: BSD difference: In BSD all sockets connected to us get
 597          *        ECONNRESET and we die on the spot. In Linux we behave
 598          *        like files and pipes do and wait for the last
 599          *        dereference.
 600          *
 601          * Can't we simply set sock->err?
 602          *
 603          *        What the above comment does talk about? --ANK(980817)
 604          */
 605
 606         if (READ_ONCE(unix_tot_inflight))
 607                 unix_gc();              /* Garbage collect fds */
 608 }
 609
 610 static void init_peercred(struct sock *sk)
 611 {
 612         const struct cred *old_cred;
 613         struct pid *old_pid;
 614
 615         spin_lock(&sk->sk_peer_lock);
 616         old_pid = sk->sk_peer_pid;
 617         old_cred = sk->sk_peer_cred;
 618         sk->sk_peer_pid  = get_pid(task_tgid(current));
 619         sk->sk_peer_cred = get_current_cred();
 620         spin_unlock(&sk->sk_peer_lock);
 621
 622         put_pid(old_pid);
 623         put_cred(old_cred);
 624 }
 625
 626 static void copy_peercred(struct sock *sk, struct sock *peersk)
 627 {
 628         const struct cred *old_cred;
 629         struct pid *old_pid;
 630
 631         if (sk < peersk) {
 632                 spin_lock(&sk->sk_peer_lock);
 633                 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
 634         } else {
 635                 spin_lock(&peersk->sk_peer_lock);
 636                 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
 637         }
 638         old_pid = sk->sk_peer_pid;
 639         old_cred = sk->sk_peer_cred;
 640         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 641         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 642
 643         spin_unlock(&sk->sk_peer_lock);
 644         spin_unlock(&peersk->sk_peer_lock);
 645
 646         put_pid(old_pid);
 647         put_cred(old_cred);
 648 }
 649
 650 static int unix_listen(struct socket *sock, int backlog)
 651 {
 652         int err;
 653         struct sock *sk = sock->sk;
 654         struct unix_sock *u = unix_sk(sk);
 655
 656         err = -EOPNOTSUPP;
 657         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 658                 goto out;       /* Only stream/seqpacket sockets accept */
 659         err = -EINVAL;
 660         if (!u->addr)
 661                 goto out;       /* No listens on an unbound socket */
 662         unix_state_lock(sk);
 663         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 664                 goto out_unlock;
 665         if (backlog > sk->sk_max_ack_backlog)
 666                 wake_up_interruptible_all(&u->peer_wait);
 667         sk->sk_max_ack_backlog  = backlog;
 668         sk->sk_state            = TCP_LISTEN;
 669         /* set credentials so connect can copy them */
 670         init_peercred(sk);
 671         err = 0;
 672
 673 out_unlock:
 674         unix_state_unlock(sk);
 675 out:
 676         return err;
 677 }
 678
 679 static int unix_release(struct socket *);
 680 static int unix_bind(struct socket *, struct sockaddr *, int);
 681 static int unix_stream_connect(struct socket *, struct sockaddr *,
 682                                int addr_len, int flags);
 683 static int unix_socketpair(struct socket *, struct socket *);
 684 static int unix_accept(struct socket *, struct socket *, int, bool);
 685 static int unix_getname(struct socket *, struct sockaddr *, int);
 686 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 687 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 688                                     poll_table *);
 689 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 690 #ifdef CONFIG_COMPAT
 691 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 692 #endif
 693 static int unix_shutdown(struct socket *, int);
 694 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 695 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 696 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 697                                     size_t size, int flags);
 698 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 699                                        struct pipe_inode_info *, size_t size,
 700                                        unsigned int flags);
 701 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 702 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 703 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
 704                           sk_read_actor_t recv_actor);
 705 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
 706                                  sk_read_actor_t recv_actor);
 707 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 708                               int, int);
 709 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 710 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 711                                   int);
 712
 713 static int unix_set_peek_off(struct sock *sk, int val)
 714 {
 715         struct unix_sock *u = unix_sk(sk);
 716
 717         if (mutex_lock_interruptible(&u->iolock))
 718                 return -EINTR;
 719
 720         WRITE_ONCE(sk->sk_peek_off, val);
 721         mutex_unlock(&u->iolock);
 722
 723         return 0;
 724 }
 725
 726 #ifdef CONFIG_PROC_FS
 727 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
 728 {
 729         struct sock *sk = sock->sk;
 730         struct unix_sock *u;
 731
 732         if (sk) {
 733                 u = unix_sk(sock->sk);
 734                 seq_printf(m, "scm_fds: %u\n",
 735                            atomic_read(&u->scm_stat.nr_fds));
 736         }
 737 }
 738 #else
 739 #define unix_show_fdinfo NULL
 740 #endif
 741
 742 static const struct proto_ops unix_stream_ops = {
 743         .family =       PF_UNIX,
 744         .owner =        THIS_MODULE,
 745         .release =      unix_release,
 746         .bind =         unix_bind,
 747         .connect =      unix_stream_connect,
 748         .socketpair =   unix_socketpair,
 749         .accept =       unix_accept,
 750         .getname =      unix_getname,
 751         .poll =         unix_poll,
 752         .ioctl =        unix_ioctl,
 753 #ifdef CONFIG_COMPAT
 754         .compat_ioctl = unix_compat_ioctl,
 755 #endif
 756         .listen =       unix_listen,
 757         .shutdown =     unix_shutdown,
 758         .sendmsg =      unix_stream_sendmsg,
 759         .recvmsg =      unix_stream_recvmsg,
 760         .read_sock =    unix_stream_read_sock,
 761         .mmap =         sock_no_mmap,
 762         .sendpage =     unix_stream_sendpage,
 763         .splice_read =  unix_stream_splice_read,
 764         .set_peek_off = unix_set_peek_off,
 765         .show_fdinfo =  unix_show_fdinfo,
 766 };
 767
 768 static const struct proto_ops unix_dgram_ops = {
 769         .family =       PF_UNIX,
 770         .owner =        THIS_MODULE,
 771         .release =      unix_release,
 772         .bind =         unix_bind,
 773         .connect =      unix_dgram_connect,
 774         .socketpair =   unix_socketpair,
 775         .accept =       sock_no_accept,
 776         .getname =      unix_getname,
 777         .poll =         unix_dgram_poll,
 778         .ioctl =        unix_ioctl,
 779 #ifdef CONFIG_COMPAT
 780         .compat_ioctl = unix_compat_ioctl,
 781 #endif
 782         .listen =       sock_no_listen,
 783         .shutdown =     unix_shutdown,
 784         .sendmsg =      unix_dgram_sendmsg,
 785         .read_sock =    unix_read_sock,
 786         .recvmsg =      unix_dgram_recvmsg,
 787         .mmap =         sock_no_mmap,
 788         .sendpage =     sock_no_sendpage,
 789         .set_peek_off = unix_set_peek_off,
 790         .show_fdinfo =  unix_show_fdinfo,
 791 };
 792
 793 static const struct proto_ops unix_seqpacket_ops = {
 794         .family =       PF_UNIX,
 795         .owner =        THIS_MODULE,
 796         .release =      unix_release,
 797         .bind =         unix_bind,
 798         .connect =      unix_stream_connect,
 799         .socketpair =   unix_socketpair,
 800         .accept =       unix_accept,
 801         .getname =      unix_getname,
 802         .poll =         unix_dgram_poll,
 803         .ioctl =        unix_ioctl,
 804 #ifdef CONFIG_COMPAT
 805         .compat_ioctl = unix_compat_ioctl,
 806 #endif
 807         .listen =       unix_listen,
 808         .shutdown =     unix_shutdown,
 809         .sendmsg =      unix_seqpacket_sendmsg,
 810         .recvmsg =      unix_seqpacket_recvmsg,
 811         .mmap =         sock_no_mmap,
 812         .sendpage =     sock_no_sendpage,
 813         .set_peek_off = unix_set_peek_off,
 814         .show_fdinfo =  unix_show_fdinfo,
 815 };
 816
 817 static void unix_close(struct sock *sk, long timeout)
 818 {
 819         /* Nothing to do here, unix socket does not need a ->close().
 820          * This is merely for sockmap.
 821          */
 822 }
 823
 824 static void unix_unhash(struct sock *sk)
 825 {
 826         /* Nothing to do here, unix socket does not need a ->unhash().
 827          * This is merely for sockmap.
 828          */
 829 }
 830
 831 struct proto unix_dgram_proto = {
 832         .name                   = "UNIX",
 833         .owner                  = THIS_MODULE,
 834         .obj_size               = sizeof(struct unix_sock),
 835         .close                  = unix_close,
 836 #ifdef CONFIG_BPF_SYSCALL
 837         .psock_update_sk_prot   = unix_dgram_bpf_update_proto,
 838 #endif
 839 };
 840
 841 struct proto unix_stream_proto = {
 842         .name                   = "UNIX-STREAM",
 843         .owner                  = THIS_MODULE,
 844         .obj_size               = sizeof(struct unix_sock),
 845         .close                  = unix_close,
 846         .unhash                 = unix_unhash,
 847 #ifdef CONFIG_BPF_SYSCALL
 848         .psock_update_sk_prot   = unix_stream_bpf_update_proto,
 849 #endif
 850 };
 851
 852 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
 853 {
 854         struct unix_sock *u;
 855         struct sock *sk;
 856         int err;
 857
 858         atomic_long_inc(&unix_nr_socks);
 859         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
 860                 err = -ENFILE;
 861                 goto err;
 862         }
 863
 864         if (type == SOCK_STREAM)
 865                 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
 866         else /*dgram and  seqpacket */
 867                 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
 868
 869         if (!sk) {
 870                 err = -ENOMEM;
 871                 goto err;
 872         }
 873
 874         sock_init_data(sock, sk);
 875
 876         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 877         sk->sk_write_space      = unix_write_space;
 878         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 879         sk->sk_destruct         = unix_sock_destructor;
 880         u         = unix_sk(sk);
 881         u->path.dentry = NULL;
 882         u->path.mnt = NULL;
 883         spin_lock_init(&u->lock);
 884         atomic_long_set(&u->inflight, 0);
 885         INIT_LIST_HEAD(&u->link);
 886         mutex_init(&u->iolock); /* single task reading lock */
 887         mutex_init(&u->bindlock); /* single task binding lock */
 888         init_waitqueue_head(&u->peer_wait);
 889         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 890         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 891         unix_insert_socket(unix_sockets_unbound(sk), sk);
 892
 893         local_bh_disable();
 894         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 895         local_bh_enable();
 896
 897         return sk;
 898
 899 err:
 900         atomic_long_dec(&unix_nr_socks);
 901         return ERR_PTR(err);
 902 }
 903
 904 static int unix_create(struct net *net, struct socket *sock, int protocol,
 905                        int kern)
 906 {
 907         struct sock *sk;
 908
 909         if (protocol && protocol != PF_UNIX)
 910                 return -EPROTONOSUPPORT;
 911
 912         sock->state = SS_UNCONNECTED;
 913
 914         switch (sock->type) {
 915         case SOCK_STREAM:
 916                 sock->ops = &unix_stream_ops;
 917                 break;
 918                 /*
 919                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 920                  *      nothing uses it.
 921                  */
 922         case SOCK_RAW:
 923                 sock->type = SOCK_DGRAM;
 924                 fallthrough;
 925         case SOCK_DGRAM:
 926                 sock->ops = &unix_dgram_ops;
 927                 break;
 928         case SOCK_SEQPACKET:
 929                 sock->ops = &unix_seqpacket_ops;
 930                 break;
 931         default:
 932                 return -ESOCKTNOSUPPORT;
 933         }
 934
 935         sk = unix_create1(net, sock, kern, sock->type);
 936         if (IS_ERR(sk))
 937                 return PTR_ERR(sk);
 938
 939         return 0;
 940 }
 941
 942 static int unix_release(struct socket *sock)
 943 {
 944         struct sock *sk = sock->sk;
 945
 946         if (!sk)
 947                 return 0;
 948
 949         sk->sk_prot->close(sk, 0);
 950         unix_release_sock(sk, 0);
 951         sock->sk = NULL;
 952
 953         return 0;
 954 }
 955
 956 static int unix_autobind(struct socket *sock)
 957 {
 958         struct sock *sk = sock->sk;
 959         struct net *net = sock_net(sk);
 960         struct unix_sock *u = unix_sk(sk);
 961         static u32 ordernum = 1;
 962         struct unix_address *addr;
 963         int err;
 964         unsigned int retries = 0;
 965
 966         err = mutex_lock_interruptible(&u->bindlock);
 967         if (err)
 968                 return err;
 969
 970         if (u->addr)
 971                 goto out;
 972
 973         err = -ENOMEM;
 974         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 975         if (!addr)
 976                 goto out;
 977
 978         addr->name->sun_family = AF_UNIX;
 979         refcount_set(&addr->refcnt, 1);
 980
 981 retry:
 982         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 983         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 984         addr->hash ^= sk->sk_type;
 985
 986         spin_lock(&unix_table_lock);
 987         ordernum = (ordernum+1)&0xFFFFF;
 988
 989         if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
 990                 spin_unlock(&unix_table_lock);
 991                 /*
 992                  * __unix_find_socket_byname() may take long time if many names
 993                  * are already in use.
 994                  */
 995                 cond_resched();
 996                 /* Give up if all names seems to be in use. */
 997                 if (retries++ == 0xFFFFF) {
 998                         err = -ENOSPC;
 999                         kfree(addr);
1000                         goto out;
1001                 }
1002                 goto retry;
1003         }
1004
1005         __unix_set_addr(sk, addr, addr->hash);
1006         spin_unlock(&unix_table_lock);
1007         err = 0;
1008
1009 out:    mutex_unlock(&u->bindlock);
1010         return err;
1011 }
1012
1013 static struct sock *unix_find_other(struct net *net,
1014                                     struct sockaddr_un *sunname, int len,
1015                                     int type, unsigned int hash, int *error)
1016 {
1017         struct sock *u;
1018         struct path path;
1019         int err = 0;
1020
1021         if (sunname->sun_path[0]) {
1022                 struct inode *inode;
1023                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1024                 if (err)
1025                         goto fail;
1026                 inode = d_backing_inode(path.dentry);
1027                 err = path_permission(&path, MAY_WRITE);
1028                 if (err)
1029                         goto put_fail;
1030
1031                 err = -ECONNREFUSED;
1032                 if (!S_ISSOCK(inode->i_mode))
1033                         goto put_fail;
1034                 u = unix_find_socket_byinode(inode);
1035                 if (!u)
1036                         goto put_fail;
1037
1038                 if (u->sk_type == type)
1039                         touch_atime(&path);
1040
1041                 path_put(&path);
1042
1043                 err = -EPROTOTYPE;
1044                 if (u->sk_type != type) {
1045                         sock_put(u);
1046                         goto fail;
1047                 }
1048         } else {
1049                 err = -ECONNREFUSED;
1050                 u = unix_find_socket_byname(net, sunname, len, type ^ hash);
1051                 if (u) {
1052                         struct dentry *dentry;
1053                         dentry = unix_sk(u)->path.dentry;
1054                         if (dentry)
1055                                 touch_atime(&unix_sk(u)->path);
1056                 } else
1057                         goto fail;
1058         }
1059         return u;
1060
1061 put_fail:
1062         path_put(&path);
1063 fail:
1064         *error = err;
1065         return NULL;
1066 }
1067
1068 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
1069 {
1070         struct unix_sock *u = unix_sk(sk);
1071         umode_t mode = S_IFSOCK |
1072                (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1073         struct user_namespace *ns; // barf...
1074         struct path parent;
1075         struct dentry *dentry;
1076         unsigned int hash;
1077         int err;
1078
1079         /*
1080          * Get the parent directory, calculate the hash for last
1081          * component.
1082          */
1083         dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1084         if (IS_ERR(dentry))
1085                 return PTR_ERR(dentry);
1086         ns = mnt_user_ns(parent.mnt);
1087
1088         /*
1089          * All right, let's create it.
1090          */
1091         err = security_path_mknod(&parent, dentry, mode, 0);
1092         if (!err)
1093                 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1094         if (err)
1095                 goto out;
1096         err = mutex_lock_interruptible(&u->bindlock);
1097         if (err)
1098                 goto out_unlink;
1099         if (u->addr)
1100                 goto out_unlock;
1101
1102         addr->hash = UNIX_HASH_SIZE;
1103         hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1104         spin_lock(&unix_table_lock);
1105         u->path.mnt = mntget(parent.mnt);
1106         u->path.dentry = dget(dentry);
1107         __unix_set_addr(sk, addr, hash);
1108         spin_unlock(&unix_table_lock);
1109         mutex_unlock(&u->bindlock);
1110         done_path_create(&parent, dentry);
1111         return 0;
1112
1113 out_unlock:
1114         mutex_unlock(&u->bindlock);
1115         err = -EINVAL;
1116 out_unlink:
1117         /* failed after successful mknod?  unlink what we'd created... */
1118         vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1119 out:
1120         done_path_create(&parent, dentry);
1121         return err;
1122 }
1123
1124 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
1125 {
1126         struct unix_sock *u = unix_sk(sk);
1127         int err;
1128
1129         err = mutex_lock_interruptible(&u->bindlock);
1130         if (err)
1131                 return err;
1132
1133         if (u->addr) {
1134                 mutex_unlock(&u->bindlock);
1135                 return -EINVAL;
1136         }
1137
1138         spin_lock(&unix_table_lock);
1139         if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1140                                       addr->hash)) {
1141                 spin_unlock(&unix_table_lock);
1142                 mutex_unlock(&u->bindlock);
1143                 return -EADDRINUSE;
1144         }
1145         __unix_set_addr(sk, addr, addr->hash);
1146         spin_unlock(&unix_table_lock);
1147         mutex_unlock(&u->bindlock);
1148         return 0;
1149 }
1150
1151 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1152 {
1153         struct sock *sk = sock->sk;
1154         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1155         char *sun_path = sunaddr->sun_path;
1156         int err;
1157         unsigned int hash;
1158         struct unix_address *addr;
1159
1160         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1161             sunaddr->sun_family != AF_UNIX)
1162                 return -EINVAL;
1163
1164         if (addr_len == sizeof(short))
1165                 return unix_autobind(sock);
1166
1167         err = unix_mkname(sunaddr, addr_len, &hash);
1168         if (err < 0)
1169                 return err;
1170         addr_len = err;
1171         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1172         if (!addr)
1173                 return -ENOMEM;
1174
1175         memcpy(addr->name, sunaddr, addr_len);
1176         addr->len = addr_len;
1177         addr->hash = hash ^ sk->sk_type;
1178         refcount_set(&addr->refcnt, 1);
1179
1180         if (sun_path[0])
1181                 err = unix_bind_bsd(sk, addr);
1182         else
1183                 err = unix_bind_abstract(sk, addr);
1184         if (err)
1185                 unix_release_addr(addr);
1186         return err == -EEXIST ? -EADDRINUSE : err;
1187 }
1188
1189 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1190 {
1191         if (unlikely(sk1 == sk2) || !sk2) {
1192                 unix_state_lock(sk1);
1193                 return;
1194         }
1195         if (sk1 < sk2) {
1196                 unix_state_lock(sk1);
1197                 unix_state_lock_nested(sk2);
1198         } else {
1199                 unix_state_lock(sk2);
1200                 unix_state_lock_nested(sk1);
1201         }
1202 }
1203
1204 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1205 {
1206         if (unlikely(sk1 == sk2) || !sk2) {
1207                 unix_state_unlock(sk1);
1208                 return;
1209         }
1210         unix_state_unlock(sk1);
1211         unix_state_unlock(sk2);
1212 }
1213
1214 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1215                               int alen, int flags)
1216 {
1217         struct sock *sk = sock->sk;
1218         struct net *net = sock_net(sk);
1219         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1220         struct sock *other;
1221         unsigned int hash;
1222         int err;
1223
1224         err = -EINVAL;
1225         if (alen < offsetofend(struct sockaddr, sa_family))
1226                 goto out;
1227
1228         if (addr->sa_family != AF_UNSPEC) {
1229                 err = unix_mkname(sunaddr, alen, &hash);
1230                 if (err < 0)
1231                         goto out;
1232                 alen = err;
1233
1234                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1235                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1236                         goto out;
1237
1238 restart:
1239                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1240                 if (!other)
1241                         goto out;
1242
1243                 unix_state_double_lock(sk, other);
1244
1245                 /* Apparently VFS overslept socket death. Retry. */
1246                 if (sock_flag(other, SOCK_DEAD)) {
1247                         unix_state_double_unlock(sk, other);
1248                         sock_put(other);
1249                         goto restart;
1250                 }
1251
1252                 err = -EPERM;
1253                 if (!unix_may_send(sk, other))
1254                         goto out_unlock;
1255
1256                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1257                 if (err)
1258                         goto out_unlock;
1259
1260                 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1261         } else {
1262                 /*
1263                  *      1003.1g breaking connected state with AF_UNSPEC
1264                  */
1265                 other = NULL;
1266                 unix_state_double_lock(sk, other);
1267         }
1268
1269         /*
1270          * If it was connected, reconnect.
1271          */
1272         if (unix_peer(sk)) {
1273                 struct sock *old_peer = unix_peer(sk);
1274
1275                 unix_peer(sk) = other;
1276                 if (!other)
1277                         sk->sk_state = TCP_CLOSE;
1278                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1279
1280                 unix_state_double_unlock(sk, other);
1281
1282                 if (other != old_peer)
1283                         unix_dgram_disconnected(sk, old_peer);
1284                 sock_put(old_peer);
1285         } else {
1286                 unix_peer(sk) = other;
1287                 unix_state_double_unlock(sk, other);
1288         }
1289
1290         return 0;
1291
1292 out_unlock:
1293         unix_state_double_unlock(sk, other);
1294         sock_put(other);
1295 out:
1296         return err;
1297 }
1298
1299 static long unix_wait_for_peer(struct sock *other, long timeo)
1300         __releases(&unix_sk(other)->lock)
1301 {
1302         struct unix_sock *u = unix_sk(other);
1303         int sched;
1304         DEFINE_WAIT(wait);
1305
1306         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1307
1308         sched = !sock_flag(other, SOCK_DEAD) &&
1309                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1310                 unix_recvq_full_lockless(other);
1311
1312         unix_state_unlock(other);
1313
1314         if (sched)
1315                 timeo = schedule_timeout(timeo);
1316
1317         finish_wait(&u->peer_wait, &wait);
1318         return timeo;
1319 }
1320
1321 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1322                                int addr_len, int flags)
1323 {
1324         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1325         struct sock *sk = sock->sk;
1326         struct net *net = sock_net(sk);
1327         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1328         struct sock *newsk = NULL;
1329         struct sock *other = NULL;
1330         struct sk_buff *skb = NULL;
1331         unsigned int hash;
1332         int st;
1333         int err;
1334         long timeo;
1335
1336         err = unix_mkname(sunaddr, addr_len, &hash);
1337         if (err < 0)
1338                 goto out;
1339         addr_len = err;
1340
1341         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1342             (err = unix_autobind(sock)) != 0)
1343                 goto out;
1344
1345         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1346
1347         /* First of all allocate resources.
1348            If we will make it after state is locked,
1349            we will have to recheck all again in any case.
1350          */
1351
1352         /* create new sock for complete connection */
1353         newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
1354         if (IS_ERR(newsk)) {
1355                 err = PTR_ERR(newsk);
1356                 newsk = NULL;
1357                 goto out;
1358         }
1359
1360         err = -ENOMEM;
1361
1362         /* Allocate skb for sending to listening sock */
1363         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1364         if (skb == NULL)
1365                 goto out;
1366
1367 restart:
1368         /*  Find listening sock. */
1369         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1370         if (!other)
1371                 goto out;
1372
1373         /* Latch state of peer */
1374         unix_state_lock(other);
1375
1376         /* Apparently VFS overslept socket death. Retry. */
1377         if (sock_flag(other, SOCK_DEAD)) {
1378                 unix_state_unlock(other);
1379                 sock_put(other);
1380                 goto restart;
1381         }
1382
1383         err = -ECONNREFUSED;
1384         if (other->sk_state != TCP_LISTEN)
1385                 goto out_unlock;
1386         if (other->sk_shutdown & RCV_SHUTDOWN)
1387                 goto out_unlock;
1388
1389         if (unix_recvq_full(other)) {
1390                 err = -EAGAIN;
1391                 if (!timeo)
1392                         goto out_unlock;
1393
1394                 timeo = unix_wait_for_peer(other, timeo);
1395
1396                 err = sock_intr_errno(timeo);
1397                 if (signal_pending(current))
1398                         goto out;
1399                 sock_put(other);
1400                 goto restart;
1401         }
1402
1403         /* Latch our state.
1404
1405            It is tricky place. We need to grab our state lock and cannot
1406            drop lock on peer. It is dangerous because deadlock is
1407            possible. Connect to self case and simultaneous
1408            attempt to connect are eliminated by checking socket
1409            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1410            check this before attempt to grab lock.
1411
1412            Well, and we have to recheck the state after socket locked.
1413          */
1414         st = sk->sk_state;
1415
1416         switch (st) {
1417         case TCP_CLOSE:
1418                 /* This is ok... continue with connect */
1419                 break;
1420         case TCP_ESTABLISHED:
1421                 /* Socket is already connected */
1422                 err = -EISCONN;
1423                 goto out_unlock;
1424         default:
1425                 err = -EINVAL;
1426                 goto out_unlock;
1427         }
1428
1429         unix_state_lock_nested(sk);
1430
1431         if (sk->sk_state != st) {
1432                 unix_state_unlock(sk);
1433                 unix_state_unlock(other);
1434                 sock_put(other);
1435                 goto restart;
1436         }
1437
1438         err = security_unix_stream_connect(sk, other, newsk);
1439         if (err) {
1440                 unix_state_unlock(sk);
1441                 goto out_unlock;
1442         }
1443
1444         /* The way is open! Fastly set all the necessary fields... */
1445
1446         sock_hold(sk);
1447         unix_peer(newsk)        = sk;
1448         newsk->sk_state         = TCP_ESTABLISHED;
1449         newsk->sk_type          = sk->sk_type;
1450         init_peercred(newsk);
1451         newu = unix_sk(newsk);
1452         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1453         otheru = unix_sk(other);
1454
1455         /* copy address information from listening to new sock
1456          *
1457          * The contents of *(otheru->addr) and otheru->path
1458          * are seen fully set up here, since we have found
1459          * otheru in hash under unix_table_lock.  Insertion
1460          * into the hash chain we'd found it in had been done
1461          * in an earlier critical area protected by unix_table_lock,
1462          * the same one where we'd set *(otheru->addr) contents,
1463          * as well as otheru->path and otheru->addr itself.
1464          *
1465          * Using smp_store_release() here to set newu->addr
1466          * is enough to make those stores, as well as stores
1467          * to newu->path visible to anyone who gets newu->addr
1468          * by smp_load_acquire().  IOW, the same warranties
1469          * as for unix_sock instances bound in unix_bind() or
1470          * in unix_autobind().
1471          */
1472         if (otheru->path.dentry) {
1473                 path_get(&otheru->path);
1474                 newu->path = otheru->path;
1475         }
1476         refcount_inc(&otheru->addr->refcnt);
1477         smp_store_release(&newu->addr, otheru->addr);
1478
1479         /* Set credentials */
1480         copy_peercred(sk, other);
1481
1482         sock->state     = SS_CONNECTED;
1483         sk->sk_state    = TCP_ESTABLISHED;
1484         sock_hold(newsk);
1485
1486         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1487         unix_peer(sk)   = newsk;
1488
1489         unix_state_unlock(sk);
1490
1491         /* take ten and send info to listening sock */
1492         spin_lock(&other->sk_receive_queue.lock);
1493         __skb_queue_tail(&other->sk_receive_queue, skb);
1494         spin_unlock(&other->sk_receive_queue.lock);
1495         unix_state_unlock(other);
1496         other->sk_data_ready(other);
1497         sock_put(other);
1498         return 0;
1499
1500 out_unlock:
1501         if (other)
1502                 unix_state_unlock(other);
1503
1504 out:
1505         kfree_skb(skb);
1506         if (newsk)
1507                 unix_release_sock(newsk, 0);
1508         if (other)
1509                 sock_put(other);
1510         return err;
1511 }
1512
1513 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1514 {
1515         struct sock *ska = socka->sk, *skb = sockb->sk;
1516
1517         /* Join our sockets back to back */
1518         sock_hold(ska);
1519         sock_hold(skb);
1520         unix_peer(ska) = skb;
1521         unix_peer(skb) = ska;
1522         init_peercred(ska);
1523         init_peercred(skb);
1524
1525         ska->sk_state = TCP_ESTABLISHED;
1526         skb->sk_state = TCP_ESTABLISHED;
1527         socka->state  = SS_CONNECTED;
1528         sockb->state  = SS_CONNECTED;
1529         return 0;
1530 }
1531
1532 static void unix_sock_inherit_flags(const struct socket *old,
1533                                     struct socket *new)
1534 {
1535         if (test_bit(SOCK_PASSCRED, &old->flags))
1536                 set_bit(SOCK_PASSCRED, &new->flags);
1537         if (test_bit(SOCK_PASSSEC, &old->flags))
1538                 set_bit(SOCK_PASSSEC, &new->flags);
1539 }
1540
1541 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1542                        bool kern)
1543 {
1544         struct sock *sk = sock->sk;
1545         struct sock *tsk;
1546         struct sk_buff *skb;
1547         int err;
1548
1549         err = -EOPNOTSUPP;
1550         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1551                 goto out;
1552
1553         err = -EINVAL;
1554         if (sk->sk_state != TCP_LISTEN)
1555                 goto out;
1556
1557         /* If socket state is TCP_LISTEN it cannot change (for now...),
1558          * so that no locks are necessary.
1559          */
1560
1561         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1562         if (!skb) {
1563                 /* This means receive shutdown. */
1564                 if (err == 0)
1565                         err = -EINVAL;
1566                 goto out;
1567         }
1568
1569         tsk = skb->sk;
1570         skb_free_datagram(sk, skb);
1571         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1572
1573         /* attach accepted sock to socket */
1574         unix_state_lock(tsk);
1575         newsock->state = SS_CONNECTED;
1576         unix_sock_inherit_flags(sock, newsock);
1577         sock_graft(tsk, newsock);
1578         unix_state_unlock(tsk);
1579         return 0;
1580
1581 out:
1582         return err;
1583 }
1584
1585
1586 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1587 {
1588         struct sock *sk = sock->sk;
1589         struct unix_address *addr;
1590         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1591         int err = 0;
1592
1593         if (peer) {
1594                 sk = unix_peer_get(sk);
1595
1596                 err = -ENOTCONN;
1597                 if (!sk)
1598                         goto out;
1599                 err = 0;
1600         } else {
1601                 sock_hold(sk);
1602         }
1603
1604         addr = smp_load_acquire(&unix_sk(sk)->addr);
1605         if (!addr) {
1606                 sunaddr->sun_family = AF_UNIX;
1607                 sunaddr->sun_path[0] = 0;
1608                 err = sizeof(short);
1609         } else {
1610                 err = addr->len;
1611                 memcpy(sunaddr, addr->name, addr->len);
1612         }
1613         sock_put(sk);
1614 out:
1615         return err;
1616 }
1617
1618 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1619 {
1620         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1621
1622         /*
1623          * Garbage collection of unix sockets starts by selecting a set of
1624          * candidate sockets which have reference only from being in flight
1625          * (total_refs == inflight_refs).  This condition is checked once during
1626          * the candidate collection phase, and candidates are marked as such, so
1627          * that non-candidates can later be ignored.  While inflight_refs is
1628          * protected by unix_gc_lock, total_refs (file count) is not, hence this
1629          * is an instantaneous decision.
1630          *
1631          * Once a candidate, however, the socket must not be reinstalled into a
1632          * file descriptor while the garbage collection is in progress.
1633          *
1634          * If the above conditions are met, then the directed graph of
1635          * candidates (*) does not change while unix_gc_lock is held.
1636          *
1637          * Any operations that changes the file count through file descriptors
1638          * (dup, close, sendmsg) does not change the graph since candidates are
1639          * not installed in fds.
1640          *
1641          * Dequeing a candidate via recvmsg would install it into an fd, but
1642          * that takes unix_gc_lock to decrement the inflight count, so it's
1643          * serialized with garbage collection.
1644          *
1645          * MSG_PEEK is special in that it does not change the inflight count,
1646          * yet does install the socket into an fd.  The following lock/unlock
1647          * pair is to ensure serialization with garbage collection.  It must be
1648          * done between incrementing the file count and installing the file into
1649          * an fd.
1650          *
1651          * If garbage collection starts after the barrier provided by the
1652          * lock/unlock, then it will see the elevated refcount and not mark this
1653          * as a candidate.  If a garbage collection is already in progress
1654          * before the file count was incremented, then the lock/unlock pair will
1655          * ensure that garbage collection is finished before progressing to
1656          * installing the fd.
1657          *
1658          * (*) A -> B where B is on the queue of A or B is on the queue of C
1659          * which is on the queue of listening socket A.
1660          */
1661         spin_lock(&unix_gc_lock);
1662         spin_unlock(&unix_gc_lock);
1663 }
1664
1665 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1666 {
1667         int err = 0;
1668
1669         UNIXCB(skb).pid  = get_pid(scm->pid);
1670         UNIXCB(skb).uid = scm->creds.uid;
1671         UNIXCB(skb).gid = scm->creds.gid;
1672         UNIXCB(skb).fp = NULL;
1673         unix_get_secdata(scm, skb);
1674         if (scm->fp && send_fds)
1675                 err = unix_attach_fds(scm, skb);
1676
1677         skb->destructor = unix_destruct_scm;
1678         return err;
1679 }
1680
1681 static bool unix_passcred_enabled(const struct socket *sock,
1682                                   const struct sock *other)
1683 {
1684         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1685                !other->sk_socket ||
1686                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1687 }
1688
1689 /*
1690  * Some apps rely on write() giving SCM_CREDENTIALS
1691  * We include credentials if source or destination socket
1692  * asserted SOCK_PASSCRED.
1693  */
1694 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1695                             const struct sock *other)
1696 {
1697         if (UNIXCB(skb).pid)
1698                 return;
1699         if (unix_passcred_enabled(sock, other)) {
1700                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1701                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1702         }
1703 }
1704
1705 static int maybe_init_creds(struct scm_cookie *scm,
1706                             struct socket *socket,
1707                             const struct sock *other)
1708 {
1709         int err;
1710         struct msghdr msg = { .msg_controllen = 0 };
1711
1712         err = scm_send(socket, &msg, scm, false);
1713         if (err)
1714                 return err;
1715
1716         if (unix_passcred_enabled(socket, other)) {
1717                 scm->pid = get_pid(task_tgid(current));
1718                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1719         }
1720         return err;
1721 }
1722
1723 static bool unix_skb_scm_eq(struct sk_buff *skb,
1724                             struct scm_cookie *scm)
1725 {
1726         const struct unix_skb_parms *u = &UNIXCB(skb);
1727
1728         return u->pid == scm->pid &&
1729                uid_eq(u->uid, scm->creds.uid) &&
1730                gid_eq(u->gid, scm->creds.gid) &&
1731                unix_secdata_eq(scm, skb);
1732 }
1733
1734 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1735 {
1736         struct scm_fp_list *fp = UNIXCB(skb).fp;
1737         struct unix_sock *u = unix_sk(sk);
1738
1739         if (unlikely(fp && fp->count))
1740                 atomic_add(fp->count, &u->scm_stat.nr_fds);
1741 }
1742
1743 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1744 {
1745         struct scm_fp_list *fp = UNIXCB(skb).fp;
1746         struct unix_sock *u = unix_sk(sk);
1747
1748         if (unlikely(fp && fp->count))
1749                 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1750 }
1751
1752 /*
1753  *      Send AF_UNIX data.
1754  */
1755
1756 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1757                               size_t len)
1758 {
1759         struct sock *sk = sock->sk;
1760         struct net *net = sock_net(sk);
1761         struct unix_sock *u = unix_sk(sk);
1762         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1763         struct sock *other = NULL;
1764         int namelen = 0; /* fake GCC */
1765         int err;
1766         unsigned int hash;
1767         struct sk_buff *skb;
1768         long timeo;
1769         struct scm_cookie scm;
1770         int data_len = 0;
1771         int sk_locked;
1772
1773         wait_for_unix_gc();
1774         err = scm_send(sock, msg, &scm, false);
1775         if (err < 0)
1776                 return err;
1777
1778         err = -EOPNOTSUPP;
1779         if (msg->msg_flags&MSG_OOB)
1780                 goto out;
1781
1782         if (msg->msg_namelen) {
1783                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1784                 if (err < 0)
1785                         goto out;
1786                 namelen = err;
1787         } else {
1788                 sunaddr = NULL;
1789                 err = -ENOTCONN;
1790                 other = unix_peer_get(sk);
1791                 if (!other)
1792                         goto out;
1793         }
1794
1795         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1796             && (err = unix_autobind(sock)) != 0)
1797                 goto out;
1798
1799         err = -EMSGSIZE;
1800         if (len > sk->sk_sndbuf - 32)
1801                 goto out;
1802
1803         if (len > SKB_MAX_ALLOC) {
1804                 data_len = min_t(size_t,
1805                                  len - SKB_MAX_ALLOC,
1806                                  MAX_SKB_FRAGS * PAGE_SIZE);
1807                 data_len = PAGE_ALIGN(data_len);
1808
1809                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1810         }
1811
1812         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1813                                    msg->msg_flags & MSG_DONTWAIT, &err,
1814                                    PAGE_ALLOC_COSTLY_ORDER);
1815         if (skb == NULL)
1816                 goto out;
1817
1818         err = unix_scm_to_skb(&scm, skb, true);
1819         if (err < 0)
1820                 goto out_free;
1821
1822         skb_put(skb, len - data_len);
1823         skb->data_len = data_len;
1824         skb->len = len;
1825         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1826         if (err)
1827                 goto out_free;
1828
1829         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1830
1831 restart:
1832         if (!other) {
1833                 err = -ECONNRESET;
1834                 if (sunaddr == NULL)
1835                         goto out_free;
1836
1837                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1838                                         hash, &err);
1839                 if (other == NULL)
1840                         goto out_free;
1841         }
1842
1843         if (sk_filter(other, skb) < 0) {
1844                 /* Toss the packet but do not return any error to the sender */
1845                 err = len;
1846                 goto out_free;
1847         }
1848
1849         sk_locked = 0;
1850         unix_state_lock(other);
1851 restart_locked:
1852         err = -EPERM;
1853         if (!unix_may_send(sk, other))
1854                 goto out_unlock;
1855
1856         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1857                 /*
1858                  *      Check with 1003.1g - what should
1859                  *      datagram error
1860                  */
1861                 unix_state_unlock(other);
1862                 sock_put(other);
1863
1864                 if (!sk_locked)
1865                         unix_state_lock(sk);
1866
1867                 err = 0;
1868                 if (sk->sk_type == SOCK_SEQPACKET) {
1869                         /* We are here only when racing with unix_release_sock()
1870                          * is clearing @other. Never change state to TCP_CLOSE
1871                          * unlike SOCK_DGRAM wants.
1872                          */
1873                         unix_state_unlock(sk);
1874                         err = -EPIPE;
1875                 } else if (unix_peer(sk) == other) {
1876                         unix_peer(sk) = NULL;
1877                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1878
1879                         sk->sk_state = TCP_CLOSE;
1880                         unix_state_unlock(sk);
1881
1882                         unix_dgram_disconnected(sk, other);
1883                         sock_put(other);
1884                         err = -ECONNREFUSED;
1885                 } else {
1886                         unix_state_unlock(sk);
1887                 }
1888
1889                 other = NULL;
1890                 if (err)
1891                         goto out_free;
1892                 goto restart;
1893         }
1894
1895         err = -EPIPE;
1896         if (other->sk_shutdown & RCV_SHUTDOWN)
1897                 goto out_unlock;
1898
1899         if (sk->sk_type != SOCK_SEQPACKET) {
1900                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1901                 if (err)
1902                         goto out_unlock;
1903         }
1904
1905         /* other == sk && unix_peer(other) != sk if
1906          * - unix_peer(sk) == NULL, destination address bound to sk
1907          * - unix_peer(sk) == sk by time of get but disconnected before lock
1908          */
1909         if (other != sk &&
1910             unlikely(unix_peer(other) != sk &&
1911             unix_recvq_full_lockless(other))) {
1912                 if (timeo) {
1913                         timeo = unix_wait_for_peer(other, timeo);
1914
1915                         err = sock_intr_errno(timeo);
1916                         if (signal_pending(current))
1917                                 goto out_free;
1918
1919                         goto restart;
1920                 }
1921
1922                 if (!sk_locked) {
1923                         unix_state_unlock(other);
1924                         unix_state_double_lock(sk, other);
1925                 }
1926
1927                 if (unix_peer(sk) != other ||
1928                     unix_dgram_peer_wake_me(sk, other)) {
1929                         err = -EAGAIN;
1930                         sk_locked = 1;
1931                         goto out_unlock;
1932                 }
1933
1934                 if (!sk_locked) {
1935                         sk_locked = 1;
1936                         goto restart_locked;
1937                 }
1938         }
1939
1940         if (unlikely(sk_locked))
1941                 unix_state_unlock(sk);
1942
1943         if (sock_flag(other, SOCK_RCVTSTAMP))
1944                 __net_timestamp(skb);
1945         maybe_add_creds(skb, sock, other);
1946         scm_stat_add(other, skb);
1947         skb_queue_tail(&other->sk_receive_queue, skb);
1948         unix_state_unlock(other);
1949         other->sk_data_ready(other);
1950         sock_put(other);
1951         scm_destroy(&scm);
1952         return len;
1953
1954 out_unlock:
1955         if (sk_locked)
1956                 unix_state_unlock(sk);
1957         unix_state_unlock(other);
1958 out_free:
1959         kfree_skb(skb);
1960 out:
1961         if (other)
1962                 sock_put(other);
1963         scm_destroy(&scm);
1964         return err;
1965 }
1966
1967 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1968  * bytes, and a minimum of a full page.
1969  */
1970 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1971
1972 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
1973 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
1974                      struct scm_cookie *scm, bool fds_sent)
1975 {
1976         struct unix_sock *ousk = unix_sk(other);
1977         struct sk_buff *skb;
1978         int err = 0;
1979
1980         skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1981
1982         if (!skb)
1983                 return err;
1984
1985         err = unix_scm_to_skb(scm, skb, !fds_sent);
1986         if (err < 0) {
1987                 kfree_skb(skb);
1988                 return err;
1989         }
1990         skb_put(skb, 1);
1991         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1992
1993         if (err) {
1994                 kfree_skb(skb);
1995                 return err;
1996         }
1997
1998         unix_state_lock(other);
1999
2000         if (sock_flag(other, SOCK_DEAD) ||
2001             (other->sk_shutdown & RCV_SHUTDOWN)) {
2002                 unix_state_unlock(other);
2003                 kfree_skb(skb);
2004                 return -EPIPE;
2005         }
2006
2007         maybe_add_creds(skb, sock, other);
2008         skb_get(skb);
2009
2010         if (ousk->oob_skb)
2011                 consume_skb(ousk->oob_skb);
2012
2013         WRITE_ONCE(ousk->oob_skb, skb);
2014
2015         scm_stat_add(other, skb);
2016         skb_queue_tail(&other->sk_receive_queue, skb);
2017         sk_send_sigurg(other);
2018         unix_state_unlock(other);
2019         other->sk_data_ready(other);
2020
2021         return err;
2022 }
2023 #endif
2024
2025 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2026                                size_t len)
2027 {
2028         struct sock *sk = sock->sk;
2029         struct sock *other = NULL;
2030         int err, size;
2031         struct sk_buff *skb;
2032         int sent = 0;
2033         struct scm_cookie scm;
2034         bool fds_sent = false;
2035         int data_len;
2036
2037         wait_for_unix_gc();
2038         err = scm_send(sock, msg, &scm, false);
2039         if (err < 0)
2040                 return err;
2041
2042         err = -EOPNOTSUPP;
2043         if (msg->msg_flags & MSG_OOB) {
2044 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2045                 if (len)
2046                         len--;
2047                 else
2048 #endif
2049                         goto out_err;
2050         }
2051
2052         if (msg->msg_namelen) {
2053                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2054                 goto out_err;
2055         } else {
2056                 err = -ENOTCONN;
2057                 other = unix_peer(sk);
2058                 if (!other)
2059                         goto out_err;
2060         }
2061
2062         if (sk->sk_shutdown & SEND_SHUTDOWN)
2063                 goto pipe_err;
2064
2065         while (sent < len) {
2066                 size = len - sent;
2067
2068                 /* Keep two messages in the pipe so it schedules better */
2069                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2070
2071                 /* allow fallback to order-0 allocations */
2072                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2073
2074                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2075
2076                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2077
2078                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2079                                            msg->msg_flags & MSG_DONTWAIT, &err,
2080                                            get_order(UNIX_SKB_FRAGS_SZ));
2081                 if (!skb)
2082                         goto out_err;
2083
2084                 /* Only send the fds in the first buffer */
2085                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2086                 if (err < 0) {
2087                         kfree_skb(skb);
2088                         goto out_err;
2089                 }
2090                 fds_sent = true;
2091
2092                 skb_put(skb, size - data_len);
2093                 skb->data_len = data_len;
2094                 skb->len = size;
2095                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2096                 if (err) {
2097                         kfree_skb(skb);
2098                         goto out_err;
2099                 }
2100
2101                 unix_state_lock(other);
2102
2103                 if (sock_flag(other, SOCK_DEAD) ||
2104                     (other->sk_shutdown & RCV_SHUTDOWN))
2105                         goto pipe_err_free;
2106
2107                 maybe_add_creds(skb, sock, other);
2108                 scm_stat_add(other, skb);
2109                 skb_queue_tail(&other->sk_receive_queue, skb);
2110                 unix_state_unlock(other);
2111                 other->sk_data_ready(other);
2112                 sent += size;
2113         }
2114
2115 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2116         if (msg->msg_flags & MSG_OOB) {
2117                 err = queue_oob(sock, msg, other, &scm, fds_sent);
2118                 if (err)
2119                         goto out_err;
2120                 sent++;
2121         }
2122 #endif
2123
2124         scm_destroy(&scm);
2125
2126         return sent;
2127
2128 pipe_err_free:
2129         unix_state_unlock(other);
2130         kfree_skb(skb);
2131 pipe_err:
2132         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2133                 send_sig(SIGPIPE, current, 0);
2134         err = -EPIPE;
2135 out_err:
2136         scm_destroy(&scm);
2137         return sent ? : err;
2138 }
2139
2140 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2141                                     int offset, size_t size, int flags)
2142 {
2143         int err;
2144         bool send_sigpipe = false;
2145         bool init_scm = true;
2146         struct scm_cookie scm;
2147         struct sock *other, *sk = socket->sk;
2148         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2149
2150         if (flags & MSG_OOB)
2151                 return -EOPNOTSUPP;
2152
2153         other = unix_peer(sk);
2154         if (!other || sk->sk_state != TCP_ESTABLISHED)
2155                 return -ENOTCONN;
2156
2157         if (false) {
2158 alloc_skb:
2159                 spin_unlock(&other->sk_receive_queue.lock);
2160                 unix_state_unlock(other);
2161                 mutex_unlock(&unix_sk(other)->iolock);
2162                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2163                                               &err, 0);
2164                 if (!newskb)
2165                         goto err;
2166         }
2167
2168         /* we must acquire iolock as we modify already present
2169          * skbs in the sk_receive_queue and mess with skb->len
2170          */
2171         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2172         if (err) {
2173                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2174                 goto err;
2175         }
2176
2177         if (sk->sk_shutdown & SEND_SHUTDOWN) {
2178                 err = -EPIPE;
2179                 send_sigpipe = true;
2180                 goto err_unlock;
2181         }
2182
2183         unix_state_lock(other);
2184
2185         if (sock_flag(other, SOCK_DEAD) ||
2186             other->sk_shutdown & RCV_SHUTDOWN) {
2187                 err = -EPIPE;
2188                 send_sigpipe = true;
2189                 goto err_state_unlock;
2190         }
2191
2192         if (init_scm) {
2193                 err = maybe_init_creds(&scm, socket, other);
2194                 if (err)
2195                         goto err_state_unlock;
2196                 init_scm = false;
2197         }
2198
2199         spin_lock(&other->sk_receive_queue.lock);
2200         skb = skb_peek_tail(&other->sk_receive_queue);
2201         if (tail && tail == skb) {
2202                 skb = newskb;
2203         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2204                 if (newskb) {
2205                         skb = newskb;
2206                 } else {
2207                         tail = skb;
2208                         goto alloc_skb;
2209                 }
2210         } else if (newskb) {
2211                 /* this is fast path, we don't necessarily need to
2212                  * call to kfree_skb even though with newskb == NULL
2213                  * this - does no harm
2214                  */
2215                 consume_skb(newskb);
2216                 newskb = NULL;
2217         }
2218
2219         if (skb_append_pagefrags(skb, page, offset, size)) {
2220                 tail = skb;
2221                 goto alloc_skb;
2222         }
2223
2224         skb->len += size;
2225         skb->data_len += size;
2226         skb->truesize += size;
2227         refcount_add(size, &sk->sk_wmem_alloc);
2228
2229         if (newskb) {
2230                 unix_scm_to_skb(&scm, skb, false);
2231                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2232         }
2233
2234         spin_unlock(&other->sk_receive_queue.lock);
2235         unix_state_unlock(other);
2236         mutex_unlock(&unix_sk(other)->iolock);
2237
2238         other->sk_data_ready(other);
2239         scm_destroy(&scm);
2240         return size;
2241
2242 err_state_unlock:
2243         unix_state_unlock(other);
2244 err_unlock:
2245         mutex_unlock(&unix_sk(other)->iolock);
2246 err:
2247         kfree_skb(newskb);
2248         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2249                 send_sig(SIGPIPE, current, 0);
2250         if (!init_scm)
2251                 scm_destroy(&scm);
2252         return err;
2253 }
2254
2255 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2256                                   size_t len)
2257 {
2258         int err;
2259         struct sock *sk = sock->sk;
2260
2261         err = sock_error(sk);
2262         if (err)
2263                 return err;
2264
2265         if (sk->sk_state != TCP_ESTABLISHED)
2266                 return -ENOTCONN;
2267
2268         if (msg->msg_namelen)
2269                 msg->msg_namelen = 0;
2270
2271         return unix_dgram_sendmsg(sock, msg, len);
2272 }
2273
2274 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2275                                   size_t size, int flags)
2276 {
2277         struct sock *sk = sock->sk;
2278
2279         if (sk->sk_state != TCP_ESTABLISHED)
2280                 return -ENOTCONN;
2281
2282         return unix_dgram_recvmsg(sock, msg, size, flags);
2283 }
2284
2285 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2286 {
2287         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2288
2289         if (addr) {
2290                 msg->msg_namelen = addr->len;
2291                 memcpy(msg->msg_name, addr->name, addr->len);
2292         }
2293 }
2294
2295 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2296                          int flags)
2297 {
2298         struct scm_cookie scm;
2299         struct socket *sock = sk->sk_socket;
2300         struct unix_sock *u = unix_sk(sk);
2301         struct sk_buff *skb, *last;
2302         long timeo;
2303         int skip;
2304         int err;
2305
2306         err = -EOPNOTSUPP;
2307         if (flags&MSG_OOB)
2308                 goto out;
2309
2310         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2311
2312         do {
2313                 mutex_lock(&u->iolock);
2314
2315                 skip = sk_peek_offset(sk, flags);
2316                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2317                                               &skip, &err, &last);
2318                 if (skb) {
2319                         if (!(flags & MSG_PEEK))
2320                                 scm_stat_del(sk, skb);
2321                         break;
2322                 }
2323
2324                 mutex_unlock(&u->iolock);
2325
2326                 if (err != -EAGAIN)
2327                         break;
2328         } while (timeo &&
2329                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2330                                               &err, &timeo, last));
2331
2332         if (!skb) { /* implies iolock unlocked */
2333                 unix_state_lock(sk);
2334                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2335                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2336                     (sk->sk_shutdown & RCV_SHUTDOWN))
2337                         err = 0;
2338                 unix_state_unlock(sk);
2339                 goto out;
2340         }
2341
2342         if (wq_has_sleeper(&u->peer_wait))
2343                 wake_up_interruptible_sync_poll(&u->peer_wait,
2344                                                 EPOLLOUT | EPOLLWRNORM |
2345                                                 EPOLLWRBAND);
2346
2347         if (msg->msg_name)
2348                 unix_copy_addr(msg, skb->sk);
2349
2350         if (size > skb->len - skip)
2351                 size = skb->len - skip;
2352         else if (size < skb->len - skip)
2353                 msg->msg_flags |= MSG_TRUNC;
2354
2355         err = skb_copy_datagram_msg(skb, skip, msg, size);
2356         if (err)
2357                 goto out_free;
2358
2359         if (sock_flag(sk, SOCK_RCVTSTAMP))
2360                 __sock_recv_timestamp(msg, sk, skb);
2361
2362         memset(&scm, 0, sizeof(scm));
2363
2364         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2365         unix_set_secdata(&scm, skb);
2366
2367         if (!(flags & MSG_PEEK)) {
2368                 if (UNIXCB(skb).fp)
2369                         unix_detach_fds(&scm, skb);
2370
2371                 sk_peek_offset_bwd(sk, skb->len);
2372         } else {
2373                 /* It is questionable: on PEEK we could:
2374                    - do not return fds - good, but too simple 8)
2375                    - return fds, and do not return them on read (old strategy,
2376                      apparently wrong)
2377                    - clone fds (I chose it for now, it is the most universal
2378                      solution)
2379
2380                    POSIX 1003.1g does not actually define this clearly
2381                    at all. POSIX 1003.1g doesn't define a lot of things
2382                    clearly however!
2383
2384                 */
2385
2386                 sk_peek_offset_fwd(sk, size);
2387
2388                 if (UNIXCB(skb).fp)
2389                         unix_peek_fds(&scm, skb);
2390         }
2391         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2392
2393         scm_recv(sock, msg, &scm, flags);
2394
2395 out_free:
2396         skb_free_datagram(sk, skb);
2397         mutex_unlock(&u->iolock);
2398 out:
2399         return err;
2400 }
2401
2402 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2403                               int flags)
2404 {
2405         struct sock *sk = sock->sk;
2406
2407 #ifdef CONFIG_BPF_SYSCALL
2408         const struct proto *prot = READ_ONCE(sk->sk_prot);
2409
2410         if (prot != &unix_dgram_proto)
2411                 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2412                                             flags & ~MSG_DONTWAIT, NULL);
2413 #endif
2414         return __unix_dgram_recvmsg(sk, msg, size, flags);
2415 }
2416
2417 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2418                           sk_read_actor_t recv_actor)
2419 {
2420         int copied = 0;
2421
2422         while (1) {
2423                 struct unix_sock *u = unix_sk(sk);
2424                 struct sk_buff *skb;
2425                 int used, err;
2426
2427                 mutex_lock(&u->iolock);
2428                 skb = skb_recv_datagram(sk, 0, 1, &err);
2429                 mutex_unlock(&u->iolock);
2430                 if (!skb)
2431                         return err;
2432
2433                 used = recv_actor(desc, skb, 0, skb->len);
2434                 if (used <= 0) {
2435                         if (!copied)
2436                                 copied = used;
2437                         kfree_skb(skb);
2438                         break;
2439                 } else if (used <= skb->len) {
2440                         copied += used;
2441                 }
2442
2443                 kfree_skb(skb);
2444                 if (!desc->count)
2445                         break;
2446         }
2447
2448         return copied;
2449 }
2450
2451 /*
2452  *      Sleep until more data has arrived. But check for races..
2453  */
2454 static long unix_stream_data_wait(struct sock *sk, long timeo,
2455                                   struct sk_buff *last, unsigned int last_len,
2456                                   bool freezable)
2457 {
2458         struct sk_buff *tail;
2459         DEFINE_WAIT(wait);
2460
2461         unix_state_lock(sk);
2462
2463         for (;;) {
2464                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2465
2466                 tail = skb_peek_tail(&sk->sk_receive_queue);
2467                 if (tail != last ||
2468                     (tail && tail->len != last_len) ||
2469                     sk->sk_err ||
2470                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2471                     signal_pending(current) ||
2472                     !timeo)
2473                         break;
2474
2475                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2476                 unix_state_unlock(sk);
2477                 if (freezable)
2478                         timeo = freezable_schedule_timeout(timeo);
2479                 else
2480                         timeo = schedule_timeout(timeo);
2481                 unix_state_lock(sk);
2482
2483                 if (sock_flag(sk, SOCK_DEAD))
2484                         break;
2485
2486                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2487         }
2488
2489         finish_wait(sk_sleep(sk), &wait);
2490         unix_state_unlock(sk);
2491         return timeo;
2492 }
2493
2494 static unsigned int unix_skb_len(const struct sk_buff *skb)
2495 {
2496         return skb->len - UNIXCB(skb).consumed;
2497 }
2498
2499 struct unix_stream_read_state {
2500         int (*recv_actor)(struct sk_buff *, int, int,
2501                           struct unix_stream_read_state *);
2502         struct socket *socket;
2503         struct msghdr *msg;
2504         struct pipe_inode_info *pipe;
2505         size_t size;
2506         int flags;
2507         unsigned int splice_flags;
2508 };
2509
2510 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2511 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2512 {
2513         struct socket *sock = state->socket;
2514         struct sock *sk = sock->sk;
2515         struct unix_sock *u = unix_sk(sk);
2516         int chunk = 1;
2517         struct sk_buff *oob_skb;
2518
2519         mutex_lock(&u->iolock);
2520         unix_state_lock(sk);
2521
2522         if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2523                 unix_state_unlock(sk);
2524                 mutex_unlock(&u->iolock);
2525                 return -EINVAL;
2526         }
2527
2528         oob_skb = u->oob_skb;
2529
2530         if (!(state->flags & MSG_PEEK))
2531                 WRITE_ONCE(u->oob_skb, NULL);
2532
2533         unix_state_unlock(sk);
2534
2535         chunk = state->recv_actor(oob_skb, 0, chunk, state);
2536
2537         if (!(state->flags & MSG_PEEK)) {
2538                 UNIXCB(oob_skb).consumed += 1;
2539                 kfree_skb(oob_skb);
2540         }
2541
2542         mutex_unlock(&u->iolock);
2543
2544         if (chunk < 0)
2545                 return -EFAULT;
2546
2547         state->msg->msg_flags |= MSG_OOB;
2548         return 1;
2549 }
2550
2551 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2552                                   int flags, int copied)
2553 {
2554         struct unix_sock *u = unix_sk(sk);
2555
2556         if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2557                 skb_unlink(skb, &sk->sk_receive_queue);
2558                 consume_skb(skb);
2559                 skb = NULL;
2560         } else {
2561                 if (skb == u->oob_skb) {
2562                         if (copied) {
2563                                 skb = NULL;
2564                         } else if (sock_flag(sk, SOCK_URGINLINE)) {
2565                                 if (!(flags & MSG_PEEK)) {
2566                                         WRITE_ONCE(u->oob_skb, NULL);
2567                                         consume_skb(skb);
2568                                 }
2569                         } else if (!(flags & MSG_PEEK)) {
2570                                 skb_unlink(skb, &sk->sk_receive_queue);
2571                                 consume_skb(skb);
2572                                 skb = skb_peek(&sk->sk_receive_queue);
2573                         }
2574                 }
2575         }
2576         return skb;
2577 }
2578 #endif
2579
2580 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2581                                  sk_read_actor_t recv_actor)
2582 {
2583         if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2584                 return -ENOTCONN;
2585
2586         return unix_read_sock(sk, desc, recv_actor);
2587 }
2588
2589 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2590                                     bool freezable)
2591 {
2592         struct scm_cookie scm;
2593         struct socket *sock = state->socket;
2594         struct sock *sk = sock->sk;
2595         struct unix_sock *u = unix_sk(sk);
2596         int copied = 0;
2597         int flags = state->flags;
2598         int noblock = flags & MSG_DONTWAIT;
2599         bool check_creds = false;
2600         int target;
2601         int err = 0;
2602         long timeo;
2603         int skip;
2604         size_t size = state->size;
2605         unsigned int last_len;
2606
2607         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2608                 err = -EINVAL;
2609                 goto out;
2610         }
2611
2612         if (unlikely(flags & MSG_OOB)) {
2613                 err = -EOPNOTSUPP;
2614 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2615                 err = unix_stream_recv_urg(state);
2616 #endif
2617                 goto out;
2618         }
2619
2620         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2621         timeo = sock_rcvtimeo(sk, noblock);
2622
2623         memset(&scm, 0, sizeof(scm));
2624
2625         /* Lock the socket to prevent queue disordering
2626          * while sleeps in memcpy_tomsg
2627          */
2628         mutex_lock(&u->iolock);
2629
2630         skip = max(sk_peek_offset(sk, flags), 0);
2631
2632         do {
2633                 int chunk;
2634                 bool drop_skb;
2635                 struct sk_buff *skb, *last;
2636
2637 redo:
2638                 unix_state_lock(sk);
2639                 if (sock_flag(sk, SOCK_DEAD)) {
2640                         err = -ECONNRESET;
2641                         goto unlock;
2642                 }
2643                 last = skb = skb_peek(&sk->sk_receive_queue);
2644                 last_len = last ? last->len : 0;
2645
2646 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2647                 if (skb) {
2648                         skb = manage_oob(skb, sk, flags, copied);
2649                         if (!skb) {
2650                                 unix_state_unlock(sk);
2651                                 if (copied)
2652                                         break;
2653                                 goto redo;
2654                         }
2655                 }
2656 #endif
2657 again:
2658                 if (skb == NULL) {
2659                         if (copied >= target)
2660                                 goto unlock;
2661
2662                         /*
2663                          *      POSIX 1003.1g mandates this order.
2664                          */
2665
2666                         err = sock_error(sk);
2667                         if (err)
2668                                 goto unlock;
2669                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2670                                 goto unlock;
2671
2672                         unix_state_unlock(sk);
2673                         if (!timeo) {
2674                                 err = -EAGAIN;
2675                                 break;
2676                         }
2677
2678                         mutex_unlock(&u->iolock);
2679
2680                         timeo = unix_stream_data_wait(sk, timeo, last,
2681                                                       last_len, freezable);
2682
2683                         if (signal_pending(current)) {
2684                                 err = sock_intr_errno(timeo);
2685                                 scm_destroy(&scm);
2686                                 goto out;
2687                         }
2688
2689                         mutex_lock(&u->iolock);
2690                         goto redo;
2691 unlock:
2692                         unix_state_unlock(sk);
2693                         break;
2694                 }
2695
2696                 while (skip >= unix_skb_len(skb)) {
2697                         skip -= unix_skb_len(skb);
2698                         last = skb;
2699                         last_len = skb->len;
2700                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2701                         if (!skb)
2702                                 goto again;
2703                 }
2704
2705                 unix_state_unlock(sk);
2706
2707                 if (check_creds) {
2708                         /* Never glue messages from different writers */
2709                         if (!unix_skb_scm_eq(skb, &scm))
2710                                 break;
2711                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2712                         /* Copy credentials */
2713                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2714                         unix_set_secdata(&scm, skb);
2715                         check_creds = true;
2716                 }
2717
2718                 /* Copy address just once */
2719                 if (state->msg && state->msg->msg_name) {
2720                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2721                                          state->msg->msg_name);
2722                         unix_copy_addr(state->msg, skb->sk);
2723                         sunaddr = NULL;
2724                 }
2725
2726                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2727                 skb_get(skb);
2728                 chunk = state->recv_actor(skb, skip, chunk, state);
2729                 drop_skb = !unix_skb_len(skb);
2730                 /* skb is only safe to use if !drop_skb */
2731                 consume_skb(skb);
2732                 if (chunk < 0) {
2733                         if (copied == 0)
2734                                 copied = -EFAULT;
2735                         break;
2736                 }
2737                 copied += chunk;
2738                 size -= chunk;
2739
2740                 if (drop_skb) {
2741                         /* the skb was touched by a concurrent reader;
2742                          * we should not expect anything from this skb
2743                          * anymore and assume it invalid - we can be
2744                          * sure it was dropped from the socket queue
2745                          *
2746                          * let's report a short read
2747                          */
2748                         err = 0;
2749                         break;
2750                 }
2751
2752                 /* Mark read part of skb as used */
2753                 if (!(flags & MSG_PEEK)) {
2754                         UNIXCB(skb).consumed += chunk;
2755
2756                         sk_peek_offset_bwd(sk, chunk);
2757
2758                         if (UNIXCB(skb).fp) {
2759                                 scm_stat_del(sk, skb);
2760                                 unix_detach_fds(&scm, skb);
2761                         }
2762
2763                         if (unix_skb_len(skb))
2764                                 break;
2765
2766                         skb_unlink(skb, &sk->sk_receive_queue);
2767                         consume_skb(skb);
2768
2769                         if (scm.fp)
2770                                 break;
2771                 } else {
2772                         /* It is questionable, see note in unix_dgram_recvmsg.
2773                          */
2774                         if (UNIXCB(skb).fp)
2775                                 unix_peek_fds(&scm, skb);
2776
2777                         sk_peek_offset_fwd(sk, chunk);
2778
2779                         if (UNIXCB(skb).fp)
2780                                 break;
2781
2782                         skip = 0;
2783                         last = skb;
2784                         last_len = skb->len;
2785                         unix_state_lock(sk);
2786                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2787                         if (skb)
2788                                 goto again;
2789                         unix_state_unlock(sk);
2790                         break;
2791                 }
2792         } while (size);
2793
2794         mutex_unlock(&u->iolock);
2795         if (state->msg)
2796                 scm_recv(sock, state->msg, &scm, flags);
2797         else
2798                 scm_destroy(&scm);
2799 out:
2800         return copied ? : err;
2801 }
2802
2803 static int unix_stream_read_actor(struct sk_buff *skb,
2804                                   int skip, int chunk,
2805                                   struct unix_stream_read_state *state)
2806 {
2807         int ret;
2808
2809         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2810                                     state->msg, chunk);
2811         return ret ?: chunk;
2812 }
2813
2814 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2815                           size_t size, int flags)
2816 {
2817         struct unix_stream_read_state state = {
2818                 .recv_actor = unix_stream_read_actor,
2819                 .socket = sk->sk_socket,
2820                 .msg = msg,
2821                 .size = size,
2822                 .flags = flags
2823         };
2824
2825         return unix_stream_read_generic(&state, true);
2826 }
2827
2828 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2829                                size_t size, int flags)
2830 {
2831         struct unix_stream_read_state state = {
2832                 .recv_actor = unix_stream_read_actor,
2833                 .socket = sock,
2834                 .msg = msg,
2835                 .size = size,
2836                 .flags = flags
2837         };
2838
2839 #ifdef CONFIG_BPF_SYSCALL
2840         struct sock *sk = sock->sk;
2841         const struct proto *prot = READ_ONCE(sk->sk_prot);
2842
2843         if (prot != &unix_stream_proto)
2844                 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2845                                             flags & ~MSG_DONTWAIT, NULL);
2846 #endif
2847         return unix_stream_read_generic(&state, true);
2848 }
2849
2850 static int unix_stream_splice_actor(struct sk_buff *skb,
2851                                     int skip, int chunk,
2852                                     struct unix_stream_read_state *state)
2853 {
2854         return skb_splice_bits(skb, state->socket->sk,
2855                                UNIXCB(skb).consumed + skip,
2856                                state->pipe, chunk, state->splice_flags);
2857 }
2858
2859 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2860                                        struct pipe_inode_info *pipe,
2861                                        size_t size, unsigned int flags)
2862 {
2863         struct unix_stream_read_state state = {
2864                 .recv_actor = unix_stream_splice_actor,
2865                 .socket = sock,
2866                 .pipe = pipe,
2867                 .size = size,
2868                 .splice_flags = flags,
2869         };
2870
2871         if (unlikely(*ppos))
2872                 return -ESPIPE;
2873
2874         if (sock->file->f_flags & O_NONBLOCK ||
2875             flags & SPLICE_F_NONBLOCK)
2876                 state.flags = MSG_DONTWAIT;
2877
2878         return unix_stream_read_generic(&state, false);
2879 }
2880
2881 static int unix_shutdown(struct socket *sock, int mode)
2882 {
2883         struct sock *sk = sock->sk;
2884         struct sock *other;
2885
2886         if (mode < SHUT_RD || mode > SHUT_RDWR)
2887                 return -EINVAL;
2888         /* This maps:
2889          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2890          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2891          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2892          */
2893         ++mode;
2894
2895         unix_state_lock(sk);
2896         WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2897         other = unix_peer(sk);
2898         if (other)
2899                 sock_hold(other);
2900         unix_state_unlock(sk);
2901         sk->sk_state_change(sk);
2902
2903         if (other &&
2904                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2905
2906                 int peer_mode = 0;
2907                 const struct proto *prot = READ_ONCE(other->sk_prot);
2908
2909                 if (prot->unhash)
2910                         prot->unhash(other);
2911                 if (mode&RCV_SHUTDOWN)
2912                         peer_mode |= SEND_SHUTDOWN;
2913                 if (mode&SEND_SHUTDOWN)
2914                         peer_mode |= RCV_SHUTDOWN;
2915                 unix_state_lock(other);
2916                 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2917                 unix_state_unlock(other);
2918                 other->sk_state_change(other);
2919                 if (peer_mode == SHUTDOWN_MASK)
2920                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2921                 else if (peer_mode & RCV_SHUTDOWN)
2922                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2923         }
2924         if (other)
2925                 sock_put(other);
2926
2927         return 0;
2928 }
2929
2930 long unix_inq_len(struct sock *sk)
2931 {
2932         struct sk_buff *skb;
2933         long amount = 0;
2934
2935         if (sk->sk_state == TCP_LISTEN)
2936                 return -EINVAL;
2937
2938         spin_lock(&sk->sk_receive_queue.lock);
2939         if (sk->sk_type == SOCK_STREAM ||
2940             sk->sk_type == SOCK_SEQPACKET) {
2941                 skb_queue_walk(&sk->sk_receive_queue, skb)
2942                         amount += unix_skb_len(skb);
2943         } else {
2944                 skb = skb_peek(&sk->sk_receive_queue);
2945                 if (skb)
2946                         amount = skb->len;
2947         }
2948         spin_unlock(&sk->sk_receive_queue.lock);
2949
2950         return amount;
2951 }
2952 EXPORT_SYMBOL_GPL(unix_inq_len);
2953
2954 long unix_outq_len(struct sock *sk)
2955 {
2956         return sk_wmem_alloc_get(sk);
2957 }
2958 EXPORT_SYMBOL_GPL(unix_outq_len);
2959
2960 static int unix_open_file(struct sock *sk)
2961 {
2962         struct path path;
2963         struct file *f;
2964         int fd;
2965
2966         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2967                 return -EPERM;
2968
2969         if (!smp_load_acquire(&unix_sk(sk)->addr))
2970                 return -ENOENT;
2971
2972         path = unix_sk(sk)->path;
2973         if (!path.dentry)
2974                 return -ENOENT;
2975
2976         path_get(&path);
2977
2978         fd = get_unused_fd_flags(O_CLOEXEC);
2979         if (fd < 0)
2980                 goto out;
2981
2982         f = dentry_open(&path, O_PATH, current_cred());
2983         if (IS_ERR(f)) {
2984                 put_unused_fd(fd);
2985                 fd = PTR_ERR(f);
2986                 goto out;
2987         }
2988
2989         fd_install(fd, f);
2990 out:
2991         path_put(&path);
2992
2993         return fd;
2994 }
2995
2996 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2997 {
2998         struct sock *sk = sock->sk;
2999         long amount = 0;
3000         int err;
3001
3002         switch (cmd) {
3003         case SIOCOUTQ:
3004                 amount = unix_outq_len(sk);
3005                 err = put_user(amount, (int __user *)arg);
3006                 break;
3007         case SIOCINQ:
3008                 amount = unix_inq_len(sk);
3009                 if (amount < 0)
3010                         err = amount;
3011                 else
3012                         err = put_user(amount, (int __user *)arg);
3013                 break;
3014         case SIOCUNIXFILE:
3015                 err = unix_open_file(sk);
3016                 break;
3017 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3018         case SIOCATMARK:
3019                 {
3020                         struct sk_buff *skb;
3021                         int answ = 0;
3022
3023                         skb = skb_peek(&sk->sk_receive_queue);
3024                         if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3025                                 answ = 1;
3026                         err = put_user(answ, (int __user *)arg);
3027                 }
3028                 break;
3029 #endif
3030         default:
3031                 err = -ENOIOCTLCMD;
3032                 break;
3033         }
3034         return err;
3035 }
3036
3037 #ifdef CONFIG_COMPAT
3038 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3039 {
3040         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3041 }
3042 #endif
3043
3044 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3045 {
3046         struct sock *sk = sock->sk;
3047         __poll_t mask;
3048         u8 shutdown;
3049
3050         sock_poll_wait(file, sock, wait);
3051         mask = 0;
3052         shutdown = READ_ONCE(sk->sk_shutdown);
3053
3054         /* exceptional events? */
3055         if (sk->sk_err)
3056                 mask |= EPOLLERR;
3057         if (shutdown == SHUTDOWN_MASK)
3058                 mask |= EPOLLHUP;
3059         if (shutdown & RCV_SHUTDOWN)
3060                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3061
3062         /* readable? */
3063         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3064                 mask |= EPOLLIN | EPOLLRDNORM;
3065         if (sk_is_readable(sk))
3066                 mask |= EPOLLIN | EPOLLRDNORM;
3067 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3068         if (READ_ONCE(unix_sk(sk)->oob_skb))
3069                 mask |= EPOLLPRI;
3070 #endif
3071
3072         /* Connection-based need to check for termination and startup */
3073         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3074             sk->sk_state == TCP_CLOSE)
3075                 mask |= EPOLLHUP;
3076
3077         /*
3078          * we set writable also when the other side has shut down the
3079          * connection. This prevents stuck sockets.
3080          */
3081         if (unix_writable(sk))
3082                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3083
3084         return mask;
3085 }
3086
3087 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3088                                     poll_table *wait)
3089 {
3090         struct sock *sk = sock->sk, *other;
3091         unsigned int writable;
3092         __poll_t mask;
3093         u8 shutdown;
3094
3095         sock_poll_wait(file, sock, wait);
3096         mask = 0;
3097         shutdown = READ_ONCE(sk->sk_shutdown);
3098
3099         /* exceptional events? */
3100         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3101                 mask |= EPOLLERR |
3102                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3103
3104         if (shutdown & RCV_SHUTDOWN)
3105                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3106         if (shutdown == SHUTDOWN_MASK)
3107                 mask |= EPOLLHUP;
3108
3109         /* readable? */
3110         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3111                 mask |= EPOLLIN | EPOLLRDNORM;
3112         if (sk_is_readable(sk))
3113                 mask |= EPOLLIN | EPOLLRDNORM;
3114
3115         /* Connection-based need to check for termination and startup */
3116         if (sk->sk_type == SOCK_SEQPACKET) {
3117                 if (sk->sk_state == TCP_CLOSE)
3118                         mask |= EPOLLHUP;
3119                 /* connection hasn't started yet? */
3120                 if (sk->sk_state == TCP_SYN_SENT)
3121                         return mask;
3122         }
3123
3124         /* No write status requested, avoid expensive OUT tests. */
3125         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3126                 return mask;
3127
3128         writable = unix_writable(sk);
3129         if (writable) {
3130                 unix_state_lock(sk);
3131
3132                 other = unix_peer(sk);
3133                 if (other && unix_peer(other) != sk &&
3134                     unix_recvq_full_lockless(other) &&
3135                     unix_dgram_peer_wake_me(sk, other))
3136                         writable = 0;
3137
3138                 unix_state_unlock(sk);
3139         }
3140
3141         if (writable)
3142                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3143         else
3144                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3145
3146         return mask;
3147 }
3148
3149 #ifdef CONFIG_PROC_FS
3150
3151 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3152
3153 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3154 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3155 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3156
3157 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3158 {
3159         unsigned long offset = get_offset(*pos);
3160         unsigned long bucket = get_bucket(*pos);
3161         struct sock *sk;
3162         unsigned long count = 0;
3163
3164         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3165                 if (sock_net(sk) != seq_file_net(seq))
3166                         continue;
3167                 if (++count == offset)
3168                         break;
3169         }
3170
3171         return sk;
3172 }
3173
3174 static struct sock *unix_next_socket(struct seq_file *seq,
3175                                      struct sock *sk,
3176                                      loff_t *pos)
3177 {
3178         unsigned long bucket;
3179
3180         while (sk > (struct sock *)SEQ_START_TOKEN) {
3181                 sk = sk_next(sk);
3182                 if (!sk)
3183                         goto next_bucket;
3184                 if (sock_net(sk) == seq_file_net(seq))
3185                         return sk;
3186         }
3187
3188         do {
3189                 sk = unix_from_bucket(seq, pos);
3190                 if (sk)
3191                         return sk;
3192
3193 next_bucket:
3194                 bucket = get_bucket(*pos) + 1;
3195                 *pos = set_bucket_offset(bucket, 1);
3196         } while (bucket < ARRAY_SIZE(unix_socket_table));
3197
3198         return NULL;
3199 }
3200
3201 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3202         __acquires(unix_table_lock)
3203 {
3204         spin_lock(&unix_table_lock);
3205
3206         if (!*pos)
3207                 return SEQ_START_TOKEN;
3208
3209         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3210                 return NULL;
3211
3212         return unix_next_socket(seq, NULL, pos);
3213 }
3214
3215 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3216 {
3217         ++*pos;
3218         return unix_next_socket(seq, v, pos);
3219 }
3220
3221 static void unix_seq_stop(struct seq_file *seq, void *v)
3222         __releases(unix_table_lock)
3223 {
3224         spin_unlock(&unix_table_lock);
3225 }
3226
3227 static int unix_seq_show(struct seq_file *seq, void *v)
3228 {
3229
3230         if (v == SEQ_START_TOKEN)
3231                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3232                          "Inode Path\n");
3233         else {
3234                 struct sock *s = v;
3235                 struct unix_sock *u = unix_sk(s);
3236                 unix_state_lock(s);
3237
3238                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3239                         s,
3240                         refcount_read(&s->sk_refcnt),
3241                         0,
3242                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3243                         s->sk_type,
3244                         s->sk_socket ?
3245                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3246                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3247                         sock_i_ino(s));
3248
3249                 if (u->addr) {  // under unix_table_lock here
3250                         int i, len;
3251                         seq_putc(seq, ' ');
3252
3253                         i = 0;
3254                         len = u->addr->len - sizeof(short);
3255                         if (!UNIX_ABSTRACT(s))
3256                                 len--;
3257                         else {
3258                                 seq_putc(seq, '@');
3259                                 i++;
3260                         }
3261                         for ( ; i < len; i++)
3262                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
3263                                          '@');
3264                 }
3265                 unix_state_unlock(s);
3266                 seq_putc(seq, '\n');
3267         }
3268
3269         return 0;
3270 }
3271
3272 static const struct seq_operations unix_seq_ops = {
3273         .start  = unix_seq_start,
3274         .next   = unix_seq_next,
3275         .stop   = unix_seq_stop,
3276         .show   = unix_seq_show,
3277 };
3278
3279 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3280 struct bpf_iter__unix {
3281         __bpf_md_ptr(struct bpf_iter_meta *, meta);
3282         __bpf_md_ptr(struct unix_sock *, unix_sk);
3283         uid_t uid __aligned(8);
3284 };
3285
3286 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3287                               struct unix_sock *unix_sk, uid_t uid)
3288 {
3289         struct bpf_iter__unix ctx;
3290
3291         meta->seq_num--;  /* skip SEQ_START_TOKEN */
3292         ctx.meta = meta;
3293         ctx.unix_sk = unix_sk;
3294         ctx.uid = uid;
3295         return bpf_iter_run_prog(prog, &ctx);
3296 }
3297
3298 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3299 {
3300         struct bpf_iter_meta meta;
3301         struct bpf_prog *prog;
3302         struct sock *sk = v;
3303         uid_t uid;
3304
3305         if (v == SEQ_START_TOKEN)
3306                 return 0;
3307
3308         uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3309         meta.seq = seq;
3310         prog = bpf_iter_get_info(&meta, false);
3311         return unix_prog_seq_show(prog, &meta, v, uid);
3312 }
3313
3314 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3315 {
3316         struct bpf_iter_meta meta;
3317         struct bpf_prog *prog;
3318
3319         if (!v) {
3320                 meta.seq = seq;
3321                 prog = bpf_iter_get_info(&meta, true);
3322                 if (prog)
3323                         (void)unix_prog_seq_show(prog, &meta, v, 0);
3324         }
3325
3326         unix_seq_stop(seq, v);
3327 }
3328
3329 static const struct seq_operations bpf_iter_unix_seq_ops = {
3330         .start  = unix_seq_start,
3331         .next   = unix_seq_next,
3332         .stop   = bpf_iter_unix_seq_stop,
3333         .show   = bpf_iter_unix_seq_show,
3334 };
3335 #endif
3336 #endif
3337
3338 static const struct net_proto_family unix_family_ops = {
3339         .family = PF_UNIX,
3340         .create = unix_create,
3341         .owner  = THIS_MODULE,
3342 };
3343
3344
3345 static int __net_init unix_net_init(struct net *net)
3346 {
3347         int error = -ENOMEM;
3348
3349         net->unx.sysctl_max_dgram_qlen = 10;
3350         if (unix_sysctl_register(net))
3351                 goto out;
3352
3353 #ifdef CONFIG_PROC_FS
3354         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3355                         sizeof(struct seq_net_private))) {
3356                 unix_sysctl_unregister(net);
3357                 goto out;
3358         }
3359 #endif
3360         error = 0;
3361 out:
3362         return error;
3363 }
3364
3365 static void __net_exit unix_net_exit(struct net *net)
3366 {
3367         unix_sysctl_unregister(net);
3368         remove_proc_entry("unix", net->proc_net);
3369 }
3370
3371 static struct pernet_operations unix_net_ops = {
3372         .init = unix_net_init,
3373         .exit = unix_net_exit,
3374 };
3375
3376 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3377 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3378                      struct unix_sock *unix_sk, uid_t uid)
3379
3380 static const struct bpf_iter_seq_info unix_seq_info = {
3381         .seq_ops                = &bpf_iter_unix_seq_ops,
3382         .init_seq_private       = bpf_iter_init_seq_net,
3383         .fini_seq_private       = bpf_iter_fini_seq_net,
3384         .seq_priv_size          = sizeof(struct seq_net_private),
3385 };
3386
3387 static struct bpf_iter_reg unix_reg_info = {
3388         .target                 = "unix",
3389         .ctx_arg_info_size      = 1,
3390         .ctx_arg_info           = {
3391                 { offsetof(struct bpf_iter__unix, unix_sk),
3392                   PTR_TO_BTF_ID_OR_NULL },
3393         },
3394         .seq_info               = &unix_seq_info,
3395 };
3396
3397 static void __init bpf_iter_register(void)
3398 {
3399         unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3400         if (bpf_iter_reg_target(&unix_reg_info))
3401                 pr_warn("Warning: could not register bpf iterator unix\n");
3402 }
3403 #endif
3404
3405 static int __init af_unix_init(void)
3406 {
3407         int rc = -1;
3408
3409         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3410
3411         rc = proto_register(&unix_dgram_proto, 1);
3412         if (rc != 0) {
3413                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3414                 goto out;
3415         }
3416
3417         rc = proto_register(&unix_stream_proto, 1);
3418         if (rc != 0) {
3419                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3420                 proto_unregister(&unix_dgram_proto);
3421                 goto out;
3422         }
3423
3424         sock_register(&unix_family_ops);
3425         register_pernet_subsys(&unix_net_ops);
3426         unix_bpf_build_proto();
3427
3428 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3429         bpf_iter_register();
3430 #endif
3431
3432 out:
3433         return rc;
3434 }
3435
3436 static void __exit af_unix_exit(void)
3437 {
3438         sock_unregister(PF_UNIX);
3439         proto_unregister(&unix_dgram_proto);
3440         proto_unregister(&unix_stream_proto);
3441         unregister_pernet_subsys(&unix_net_ops);
3442 }
3443
3444 /* Earlier than device_initcall() so that other drivers invoking
3445    request_module() don't end up in a loop when modprobe tries
3446    to use a UNIX socket. But later than subsys_initcall() because
3447    we depend on stuff initialised there */
3448 fs_initcall(af_unix_init);
3449 module_exit(af_unix_exit);
3450
3451 MODULE_LICENSE("GPL");
3452 MODULE_ALIAS_NETPROTO(PF_UNIX);