arm64: dts: qcom: sm8550: add TRNG node
[linux-modified.git] / ipc / shm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *       Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <linux/init.h>
33 #include <linux/file.h>
34 #include <linux/mman.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/audit.h>
39 #include <linux/capability.h>
40 #include <linux/ptrace.h>
41 #include <linux/seq_file.h>
42 #include <linux/rwsem.h>
43 #include <linux/nsproxy.h>
44 #include <linux/mount.h>
45 #include <linux/ipc_namespace.h>
46 #include <linux/rhashtable.h>
47
48 #include <linux/uaccess.h>
49
50 #include "util.h"
51
52 struct shmid_kernel /* private to the kernel */
53 {
54         struct kern_ipc_perm    shm_perm;
55         struct file             *shm_file;
56         unsigned long           shm_nattch;
57         unsigned long           shm_segsz;
58         time64_t                shm_atim;
59         time64_t                shm_dtim;
60         time64_t                shm_ctim;
61         struct pid              *shm_cprid;
62         struct pid              *shm_lprid;
63         struct ucounts          *mlock_ucounts;
64
65         /*
66          * The task created the shm object, for
67          * task_lock(shp->shm_creator)
68          */
69         struct task_struct      *shm_creator;
70
71         /*
72          * List by creator. task_lock(->shm_creator) required for read/write.
73          * If list_empty(), then the creator is dead already.
74          */
75         struct list_head        shm_clist;
76         struct ipc_namespace    *ns;
77 } __randomize_layout;
78
79 /* shm_mode upper byte flags */
80 #define SHM_DEST        01000   /* segment will be destroyed on last detach */
81 #define SHM_LOCKED      02000   /* segment will not be swapped */
82
83 struct shm_file_data {
84         int id;
85         struct ipc_namespace *ns;
86         struct file *file;
87         const struct vm_operations_struct *vm_ops;
88 };
89
90 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
91
92 static const struct file_operations shm_file_operations;
93 static const struct vm_operations_struct shm_vm_ops;
94
95 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
96
97 #define shm_unlock(shp)                 \
98         ipc_unlock(&(shp)->shm_perm)
99
100 static int newseg(struct ipc_namespace *, struct ipc_params *);
101 static void shm_open(struct vm_area_struct *vma);
102 static void shm_close(struct vm_area_struct *vma);
103 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
104 #ifdef CONFIG_PROC_FS
105 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
106 #endif
107
108 void shm_init_ns(struct ipc_namespace *ns)
109 {
110         ns->shm_ctlmax = SHMMAX;
111         ns->shm_ctlall = SHMALL;
112         ns->shm_ctlmni = SHMMNI;
113         ns->shm_rmid_forced = 0;
114         ns->shm_tot = 0;
115         ipc_init_ids(&shm_ids(ns));
116 }
117
118 /*
119  * Called with shm_ids.rwsem (writer) and the shp structure locked.
120  * Only shm_ids.rwsem remains locked on exit.
121  */
122 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
123 {
124         struct shmid_kernel *shp;
125
126         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
127         WARN_ON(ns != shp->ns);
128
129         if (shp->shm_nattch) {
130                 shp->shm_perm.mode |= SHM_DEST;
131                 /* Do not find it any more */
132                 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
133                 shm_unlock(shp);
134         } else
135                 shm_destroy(ns, shp);
136 }
137
138 #ifdef CONFIG_IPC_NS
139 void shm_exit_ns(struct ipc_namespace *ns)
140 {
141         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
142         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
143         rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
144 }
145 #endif
146
147 static int __init ipc_ns_init(void)
148 {
149         shm_init_ns(&init_ipc_ns);
150         return 0;
151 }
152
153 pure_initcall(ipc_ns_init);
154
155 void __init shm_init(void)
156 {
157         ipc_init_proc_interface("sysvipc/shm",
158 #if BITS_PER_LONG <= 32
159                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
160 #else
161                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
162 #endif
163                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
164 }
165
166 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
167 {
168         struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
169
170         if (IS_ERR(ipcp))
171                 return ERR_CAST(ipcp);
172
173         return container_of(ipcp, struct shmid_kernel, shm_perm);
174 }
175
176 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
177 {
178         struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
179
180         if (IS_ERR(ipcp))
181                 return ERR_CAST(ipcp);
182
183         return container_of(ipcp, struct shmid_kernel, shm_perm);
184 }
185
186 /*
187  * shm_lock_(check_) routines are called in the paths where the rwsem
188  * is not necessarily held.
189  */
190 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
191 {
192         struct kern_ipc_perm *ipcp;
193
194         rcu_read_lock();
195         ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
196         if (IS_ERR(ipcp))
197                 goto err;
198
199         ipc_lock_object(ipcp);
200         /*
201          * ipc_rmid() may have already freed the ID while ipc_lock_object()
202          * was spinning: here verify that the structure is still valid.
203          * Upon races with RMID, return -EIDRM, thus indicating that
204          * the ID points to a removed identifier.
205          */
206         if (ipc_valid_object(ipcp)) {
207                 /* return a locked ipc object upon success */
208                 return container_of(ipcp, struct shmid_kernel, shm_perm);
209         }
210
211         ipc_unlock_object(ipcp);
212         ipcp = ERR_PTR(-EIDRM);
213 err:
214         rcu_read_unlock();
215         /*
216          * Callers of shm_lock() must validate the status of the returned ipc
217          * object pointer and error out as appropriate.
218          */
219         return ERR_CAST(ipcp);
220 }
221
222 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
223 {
224         rcu_read_lock();
225         ipc_lock_object(&ipcp->shm_perm);
226 }
227
228 static void shm_rcu_free(struct rcu_head *head)
229 {
230         struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
231                                                         rcu);
232         struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
233                                                         shm_perm);
234         security_shm_free(&shp->shm_perm);
235         kfree(shp);
236 }
237
238 /*
239  * It has to be called with shp locked.
240  * It must be called before ipc_rmid()
241  */
242 static inline void shm_clist_rm(struct shmid_kernel *shp)
243 {
244         struct task_struct *creator;
245
246         /* ensure that shm_creator does not disappear */
247         rcu_read_lock();
248
249         /*
250          * A concurrent exit_shm may do a list_del_init() as well.
251          * Just do nothing if exit_shm already did the work
252          */
253         if (!list_empty(&shp->shm_clist)) {
254                 /*
255                  * shp->shm_creator is guaranteed to be valid *only*
256                  * if shp->shm_clist is not empty.
257                  */
258                 creator = shp->shm_creator;
259
260                 task_lock(creator);
261                 /*
262                  * list_del_init() is a nop if the entry was already removed
263                  * from the list.
264                  */
265                 list_del_init(&shp->shm_clist);
266                 task_unlock(creator);
267         }
268         rcu_read_unlock();
269 }
270
271 static inline void shm_rmid(struct shmid_kernel *s)
272 {
273         shm_clist_rm(s);
274         ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
275 }
276
277
278 static int __shm_open(struct shm_file_data *sfd)
279 {
280         struct shmid_kernel *shp;
281
282         shp = shm_lock(sfd->ns, sfd->id);
283
284         if (IS_ERR(shp))
285                 return PTR_ERR(shp);
286
287         if (shp->shm_file != sfd->file) {
288                 /* ID was reused */
289                 shm_unlock(shp);
290                 return -EINVAL;
291         }
292
293         shp->shm_atim = ktime_get_real_seconds();
294         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
295         shp->shm_nattch++;
296         shm_unlock(shp);
297         return 0;
298 }
299
300 /* This is called by fork, once for every shm attach. */
301 static void shm_open(struct vm_area_struct *vma)
302 {
303         struct file *file = vma->vm_file;
304         struct shm_file_data *sfd = shm_file_data(file);
305         int err;
306
307         /* Always call underlying open if present */
308         if (sfd->vm_ops->open)
309                 sfd->vm_ops->open(vma);
310
311         err = __shm_open(sfd);
312         /*
313          * We raced in the idr lookup or with shm_destroy().
314          * Either way, the ID is busted.
315          */
316         WARN_ON_ONCE(err);
317 }
318
319 /*
320  * shm_destroy - free the struct shmid_kernel
321  *
322  * @ns: namespace
323  * @shp: struct to free
324  *
325  * It has to be called with shp and shm_ids.rwsem (writer) locked,
326  * but returns with shp unlocked and freed.
327  */
328 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
329 {
330         struct file *shm_file;
331
332         shm_file = shp->shm_file;
333         shp->shm_file = NULL;
334         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
335         shm_rmid(shp);
336         shm_unlock(shp);
337         if (!is_file_hugepages(shm_file))
338                 shmem_lock(shm_file, 0, shp->mlock_ucounts);
339         fput(shm_file);
340         ipc_update_pid(&shp->shm_cprid, NULL);
341         ipc_update_pid(&shp->shm_lprid, NULL);
342         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
343 }
344
345 /*
346  * shm_may_destroy - identifies whether shm segment should be destroyed now
347  *
348  * Returns true if and only if there are no active users of the segment and
349  * one of the following is true:
350  *
351  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
352  *
353  * 2) sysctl kernel.shm_rmid_forced is set to 1.
354  */
355 static bool shm_may_destroy(struct shmid_kernel *shp)
356 {
357         return (shp->shm_nattch == 0) &&
358                (shp->ns->shm_rmid_forced ||
359                 (shp->shm_perm.mode & SHM_DEST));
360 }
361
362 /*
363  * remove the attach descriptor vma.
364  * free memory for segment if it is marked destroyed.
365  * The descriptor has already been removed from the current->mm->mmap list
366  * and will later be kfree()d.
367  */
368 static void __shm_close(struct shm_file_data *sfd)
369 {
370         struct shmid_kernel *shp;
371         struct ipc_namespace *ns = sfd->ns;
372
373         down_write(&shm_ids(ns).rwsem);
374         /* remove from the list of attaches of the shm segment */
375         shp = shm_lock(ns, sfd->id);
376
377         /*
378          * We raced in the idr lookup or with shm_destroy().
379          * Either way, the ID is busted.
380          */
381         if (WARN_ON_ONCE(IS_ERR(shp)))
382                 goto done; /* no-op */
383
384         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
385         shp->shm_dtim = ktime_get_real_seconds();
386         shp->shm_nattch--;
387         if (shm_may_destroy(shp))
388                 shm_destroy(ns, shp);
389         else
390                 shm_unlock(shp);
391 done:
392         up_write(&shm_ids(ns).rwsem);
393 }
394
395 static void shm_close(struct vm_area_struct *vma)
396 {
397         struct file *file = vma->vm_file;
398         struct shm_file_data *sfd = shm_file_data(file);
399
400         /* Always call underlying close if present */
401         if (sfd->vm_ops->close)
402                 sfd->vm_ops->close(vma);
403
404         __shm_close(sfd);
405 }
406
407 /* Called with ns->shm_ids(ns).rwsem locked */
408 static int shm_try_destroy_orphaned(int id, void *p, void *data)
409 {
410         struct ipc_namespace *ns = data;
411         struct kern_ipc_perm *ipcp = p;
412         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
413
414         /*
415          * We want to destroy segments without users and with already
416          * exit'ed originating process.
417          *
418          * As shp->* are changed under rwsem, it's safe to skip shp locking.
419          */
420         if (!list_empty(&shp->shm_clist))
421                 return 0;
422
423         if (shm_may_destroy(shp)) {
424                 shm_lock_by_ptr(shp);
425                 shm_destroy(ns, shp);
426         }
427         return 0;
428 }
429
430 void shm_destroy_orphaned(struct ipc_namespace *ns)
431 {
432         down_write(&shm_ids(ns).rwsem);
433         if (shm_ids(ns).in_use)
434                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
435         up_write(&shm_ids(ns).rwsem);
436 }
437
438 /* Locking assumes this will only be called with task == current */
439 void exit_shm(struct task_struct *task)
440 {
441         for (;;) {
442                 struct shmid_kernel *shp;
443                 struct ipc_namespace *ns;
444
445                 task_lock(task);
446
447                 if (list_empty(&task->sysvshm.shm_clist)) {
448                         task_unlock(task);
449                         break;
450                 }
451
452                 shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
453                                 shm_clist);
454
455                 /*
456                  * 1) Get pointer to the ipc namespace. It is worth to say
457                  * that this pointer is guaranteed to be valid because
458                  * shp lifetime is always shorter than namespace lifetime
459                  * in which shp lives.
460                  * We taken task_lock it means that shp won't be freed.
461                  */
462                 ns = shp->ns;
463
464                 /*
465                  * 2) If kernel.shm_rmid_forced is not set then only keep track of
466                  * which shmids are orphaned, so that a later set of the sysctl
467                  * can clean them up.
468                  */
469                 if (!ns->shm_rmid_forced)
470                         goto unlink_continue;
471
472                 /*
473                  * 3) get a reference to the namespace.
474                  *    The refcount could be already 0. If it is 0, then
475                  *    the shm objects will be free by free_ipc_work().
476                  */
477                 ns = get_ipc_ns_not_zero(ns);
478                 if (!ns) {
479 unlink_continue:
480                         list_del_init(&shp->shm_clist);
481                         task_unlock(task);
482                         continue;
483                 }
484
485                 /*
486                  * 4) get a reference to shp.
487                  *   This cannot fail: shm_clist_rm() is called before
488                  *   ipc_rmid(), thus the refcount cannot be 0.
489                  */
490                 WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
491
492                 /*
493                  * 5) unlink the shm segment from the list of segments
494                  *    created by current.
495                  *    This must be done last. After unlinking,
496                  *    only the refcounts obtained above prevent IPC_RMID
497                  *    from destroying the segment or the namespace.
498                  */
499                 list_del_init(&shp->shm_clist);
500
501                 task_unlock(task);
502
503                 /*
504                  * 6) we have all references
505                  *    Thus lock & if needed destroy shp.
506                  */
507                 down_write(&shm_ids(ns).rwsem);
508                 shm_lock_by_ptr(shp);
509                 /*
510                  * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
511                  * safe to call ipc_rcu_putref here
512                  */
513                 ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
514
515                 if (ipc_valid_object(&shp->shm_perm)) {
516                         if (shm_may_destroy(shp))
517                                 shm_destroy(ns, shp);
518                         else
519                                 shm_unlock(shp);
520                 } else {
521                         /*
522                          * Someone else deleted the shp from namespace
523                          * idr/kht while we have waited.
524                          * Just unlock and continue.
525                          */
526                         shm_unlock(shp);
527                 }
528
529                 up_write(&shm_ids(ns).rwsem);
530                 put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
531         }
532 }
533
534 static vm_fault_t shm_fault(struct vm_fault *vmf)
535 {
536         struct file *file = vmf->vma->vm_file;
537         struct shm_file_data *sfd = shm_file_data(file);
538
539         return sfd->vm_ops->fault(vmf);
540 }
541
542 static int shm_may_split(struct vm_area_struct *vma, unsigned long addr)
543 {
544         struct file *file = vma->vm_file;
545         struct shm_file_data *sfd = shm_file_data(file);
546
547         if (sfd->vm_ops->may_split)
548                 return sfd->vm_ops->may_split(vma, addr);
549
550         return 0;
551 }
552
553 static unsigned long shm_pagesize(struct vm_area_struct *vma)
554 {
555         struct file *file = vma->vm_file;
556         struct shm_file_data *sfd = shm_file_data(file);
557
558         if (sfd->vm_ops->pagesize)
559                 return sfd->vm_ops->pagesize(vma);
560
561         return PAGE_SIZE;
562 }
563
564 #ifdef CONFIG_NUMA
565 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
566 {
567         struct shm_file_data *sfd = shm_file_data(vma->vm_file);
568         int err = 0;
569
570         if (sfd->vm_ops->set_policy)
571                 err = sfd->vm_ops->set_policy(vma, mpol);
572         return err;
573 }
574
575 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
576                                         unsigned long addr, pgoff_t *ilx)
577 {
578         struct shm_file_data *sfd = shm_file_data(vma->vm_file);
579         struct mempolicy *mpol = vma->vm_policy;
580
581         if (sfd->vm_ops->get_policy)
582                 mpol = sfd->vm_ops->get_policy(vma, addr, ilx);
583         return mpol;
584 }
585 #endif
586
587 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
588 {
589         struct shm_file_data *sfd = shm_file_data(file);
590         int ret;
591
592         /*
593          * In case of remap_file_pages() emulation, the file can represent an
594          * IPC ID that was removed, and possibly even reused by another shm
595          * segment already.  Propagate this case as an error to caller.
596          */
597         ret = __shm_open(sfd);
598         if (ret)
599                 return ret;
600
601         ret = call_mmap(sfd->file, vma);
602         if (ret) {
603                 __shm_close(sfd);
604                 return ret;
605         }
606         sfd->vm_ops = vma->vm_ops;
607 #ifdef CONFIG_MMU
608         WARN_ON(!sfd->vm_ops->fault);
609 #endif
610         vma->vm_ops = &shm_vm_ops;
611         return 0;
612 }
613
614 static int shm_release(struct inode *ino, struct file *file)
615 {
616         struct shm_file_data *sfd = shm_file_data(file);
617
618         put_ipc_ns(sfd->ns);
619         fput(sfd->file);
620         shm_file_data(file) = NULL;
621         kfree(sfd);
622         return 0;
623 }
624
625 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
626 {
627         struct shm_file_data *sfd = shm_file_data(file);
628
629         if (!sfd->file->f_op->fsync)
630                 return -EINVAL;
631         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
632 }
633
634 static long shm_fallocate(struct file *file, int mode, loff_t offset,
635                           loff_t len)
636 {
637         struct shm_file_data *sfd = shm_file_data(file);
638
639         if (!sfd->file->f_op->fallocate)
640                 return -EOPNOTSUPP;
641         return sfd->file->f_op->fallocate(file, mode, offset, len);
642 }
643
644 static unsigned long shm_get_unmapped_area(struct file *file,
645         unsigned long addr, unsigned long len, unsigned long pgoff,
646         unsigned long flags)
647 {
648         struct shm_file_data *sfd = shm_file_data(file);
649
650         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
651                                                 pgoff, flags);
652 }
653
654 static const struct file_operations shm_file_operations = {
655         .mmap           = shm_mmap,
656         .fsync          = shm_fsync,
657         .release        = shm_release,
658         .get_unmapped_area      = shm_get_unmapped_area,
659         .llseek         = noop_llseek,
660         .fallocate      = shm_fallocate,
661 };
662
663 /*
664  * shm_file_operations_huge is now identical to shm_file_operations,
665  * but we keep it distinct for the sake of is_file_shm_hugepages().
666  */
667 static const struct file_operations shm_file_operations_huge = {
668         .mmap           = shm_mmap,
669         .fsync          = shm_fsync,
670         .release        = shm_release,
671         .get_unmapped_area      = shm_get_unmapped_area,
672         .llseek         = noop_llseek,
673         .fallocate      = shm_fallocate,
674 };
675
676 bool is_file_shm_hugepages(struct file *file)
677 {
678         return file->f_op == &shm_file_operations_huge;
679 }
680
681 static const struct vm_operations_struct shm_vm_ops = {
682         .open   = shm_open,     /* callback for a new vm-area open */
683         .close  = shm_close,    /* callback for when the vm-area is released */
684         .fault  = shm_fault,
685         .may_split = shm_may_split,
686         .pagesize = shm_pagesize,
687 #if defined(CONFIG_NUMA)
688         .set_policy = shm_set_policy,
689         .get_policy = shm_get_policy,
690 #endif
691 };
692
693 /**
694  * newseg - Create a new shared memory segment
695  * @ns: namespace
696  * @params: ptr to the structure that contains key, size and shmflg
697  *
698  * Called with shm_ids.rwsem held as a writer.
699  */
700 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
701 {
702         key_t key = params->key;
703         int shmflg = params->flg;
704         size_t size = params->u.size;
705         int error;
706         struct shmid_kernel *shp;
707         size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
708         struct file *file;
709         char name[13];
710         vm_flags_t acctflag = 0;
711
712         if (size < SHMMIN || size > ns->shm_ctlmax)
713                 return -EINVAL;
714
715         if (numpages << PAGE_SHIFT < size)
716                 return -ENOSPC;
717
718         if (ns->shm_tot + numpages < ns->shm_tot ||
719                         ns->shm_tot + numpages > ns->shm_ctlall)
720                 return -ENOSPC;
721
722         shp = kmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT);
723         if (unlikely(!shp))
724                 return -ENOMEM;
725
726         shp->shm_perm.key = key;
727         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
728         shp->mlock_ucounts = NULL;
729
730         shp->shm_perm.security = NULL;
731         error = security_shm_alloc(&shp->shm_perm);
732         if (error) {
733                 kfree(shp);
734                 return error;
735         }
736
737         sprintf(name, "SYSV%08x", key);
738         if (shmflg & SHM_HUGETLB) {
739                 struct hstate *hs;
740                 size_t hugesize;
741
742                 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
743                 if (!hs) {
744                         error = -EINVAL;
745                         goto no_file;
746                 }
747                 hugesize = ALIGN(size, huge_page_size(hs));
748
749                 /* hugetlb_file_setup applies strict accounting */
750                 if (shmflg & SHM_NORESERVE)
751                         acctflag = VM_NORESERVE;
752                 file = hugetlb_file_setup(name, hugesize, acctflag,
753                                 HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
754         } else {
755                 /*
756                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
757                  * if it's asked for.
758                  */
759                 if  ((shmflg & SHM_NORESERVE) &&
760                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
761                         acctflag = VM_NORESERVE;
762                 file = shmem_kernel_file_setup(name, size, acctflag);
763         }
764         error = PTR_ERR(file);
765         if (IS_ERR(file))
766                 goto no_file;
767
768         shp->shm_cprid = get_pid(task_tgid(current));
769         shp->shm_lprid = NULL;
770         shp->shm_atim = shp->shm_dtim = 0;
771         shp->shm_ctim = ktime_get_real_seconds();
772         shp->shm_segsz = size;
773         shp->shm_nattch = 0;
774         shp->shm_file = file;
775         shp->shm_creator = current;
776
777         /* ipc_addid() locks shp upon success. */
778         error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
779         if (error < 0)
780                 goto no_id;
781
782         shp->ns = ns;
783
784         task_lock(current);
785         list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
786         task_unlock(current);
787
788         /*
789          * shmid gets reported as "inode#" in /proc/pid/maps.
790          * proc-ps tools use this. Changing this will break them.
791          */
792         file_inode(file)->i_ino = shp->shm_perm.id;
793
794         ns->shm_tot += numpages;
795         error = shp->shm_perm.id;
796
797         ipc_unlock_object(&shp->shm_perm);
798         rcu_read_unlock();
799         return error;
800
801 no_id:
802         ipc_update_pid(&shp->shm_cprid, NULL);
803         ipc_update_pid(&shp->shm_lprid, NULL);
804         fput(file);
805         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
806         return error;
807 no_file:
808         call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
809         return error;
810 }
811
812 /*
813  * Called with shm_ids.rwsem and ipcp locked.
814  */
815 static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
816 {
817         struct shmid_kernel *shp;
818
819         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
820         if (shp->shm_segsz < params->u.size)
821                 return -EINVAL;
822
823         return 0;
824 }
825
826 long ksys_shmget(key_t key, size_t size, int shmflg)
827 {
828         struct ipc_namespace *ns;
829         static const struct ipc_ops shm_ops = {
830                 .getnew = newseg,
831                 .associate = security_shm_associate,
832                 .more_checks = shm_more_checks,
833         };
834         struct ipc_params shm_params;
835
836         ns = current->nsproxy->ipc_ns;
837
838         shm_params.key = key;
839         shm_params.flg = shmflg;
840         shm_params.u.size = size;
841
842         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
843 }
844
845 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
846 {
847         return ksys_shmget(key, size, shmflg);
848 }
849
850 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
851 {
852         switch (version) {
853         case IPC_64:
854                 return copy_to_user(buf, in, sizeof(*in));
855         case IPC_OLD:
856             {
857                 struct shmid_ds out;
858
859                 memset(&out, 0, sizeof(out));
860                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
861                 out.shm_segsz   = in->shm_segsz;
862                 out.shm_atime   = in->shm_atime;
863                 out.shm_dtime   = in->shm_dtime;
864                 out.shm_ctime   = in->shm_ctime;
865                 out.shm_cpid    = in->shm_cpid;
866                 out.shm_lpid    = in->shm_lpid;
867                 out.shm_nattch  = in->shm_nattch;
868
869                 return copy_to_user(buf, &out, sizeof(out));
870             }
871         default:
872                 return -EINVAL;
873         }
874 }
875
876 static inline unsigned long
877 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
878 {
879         switch (version) {
880         case IPC_64:
881                 if (copy_from_user(out, buf, sizeof(*out)))
882                         return -EFAULT;
883                 return 0;
884         case IPC_OLD:
885             {
886                 struct shmid_ds tbuf_old;
887
888                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
889                         return -EFAULT;
890
891                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
892                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
893                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
894
895                 return 0;
896             }
897         default:
898                 return -EINVAL;
899         }
900 }
901
902 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
903 {
904         switch (version) {
905         case IPC_64:
906                 return copy_to_user(buf, in, sizeof(*in));
907         case IPC_OLD:
908             {
909                 struct shminfo out;
910
911                 if (in->shmmax > INT_MAX)
912                         out.shmmax = INT_MAX;
913                 else
914                         out.shmmax = (int)in->shmmax;
915
916                 out.shmmin      = in->shmmin;
917                 out.shmmni      = in->shmmni;
918                 out.shmseg      = in->shmseg;
919                 out.shmall      = in->shmall;
920
921                 return copy_to_user(buf, &out, sizeof(out));
922             }
923         default:
924                 return -EINVAL;
925         }
926 }
927
928 /*
929  * Calculate and add used RSS and swap pages of a shm.
930  * Called with shm_ids.rwsem held as a reader
931  */
932 static void shm_add_rss_swap(struct shmid_kernel *shp,
933         unsigned long *rss_add, unsigned long *swp_add)
934 {
935         struct inode *inode;
936
937         inode = file_inode(shp->shm_file);
938
939         if (is_file_hugepages(shp->shm_file)) {
940                 struct address_space *mapping = inode->i_mapping;
941                 struct hstate *h = hstate_file(shp->shm_file);
942                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
943         } else {
944 #ifdef CONFIG_SHMEM
945                 struct shmem_inode_info *info = SHMEM_I(inode);
946
947                 spin_lock_irq(&info->lock);
948                 *rss_add += inode->i_mapping->nrpages;
949                 *swp_add += info->swapped;
950                 spin_unlock_irq(&info->lock);
951 #else
952                 *rss_add += inode->i_mapping->nrpages;
953 #endif
954         }
955 }
956
957 /*
958  * Called with shm_ids.rwsem held as a reader
959  */
960 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
961                 unsigned long *swp)
962 {
963         int next_id;
964         int total, in_use;
965
966         *rss = 0;
967         *swp = 0;
968
969         in_use = shm_ids(ns).in_use;
970
971         for (total = 0, next_id = 0; total < in_use; next_id++) {
972                 struct kern_ipc_perm *ipc;
973                 struct shmid_kernel *shp;
974
975                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
976                 if (ipc == NULL)
977                         continue;
978                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
979
980                 shm_add_rss_swap(shp, rss, swp);
981
982                 total++;
983         }
984 }
985
986 /*
987  * This function handles some shmctl commands which require the rwsem
988  * to be held in write mode.
989  * NOTE: no locks must be held, the rwsem is taken inside this function.
990  */
991 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
992                        struct shmid64_ds *shmid64)
993 {
994         struct kern_ipc_perm *ipcp;
995         struct shmid_kernel *shp;
996         int err;
997
998         down_write(&shm_ids(ns).rwsem);
999         rcu_read_lock();
1000
1001         ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
1002                                       &shmid64->shm_perm, 0);
1003         if (IS_ERR(ipcp)) {
1004                 err = PTR_ERR(ipcp);
1005                 goto out_unlock1;
1006         }
1007
1008         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1009
1010         err = security_shm_shmctl(&shp->shm_perm, cmd);
1011         if (err)
1012                 goto out_unlock1;
1013
1014         switch (cmd) {
1015         case IPC_RMID:
1016                 ipc_lock_object(&shp->shm_perm);
1017                 /* do_shm_rmid unlocks the ipc object and rcu */
1018                 do_shm_rmid(ns, ipcp);
1019                 goto out_up;
1020         case IPC_SET:
1021                 ipc_lock_object(&shp->shm_perm);
1022                 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
1023                 if (err)
1024                         goto out_unlock0;
1025                 shp->shm_ctim = ktime_get_real_seconds();
1026                 break;
1027         default:
1028                 err = -EINVAL;
1029                 goto out_unlock1;
1030         }
1031
1032 out_unlock0:
1033         ipc_unlock_object(&shp->shm_perm);
1034 out_unlock1:
1035         rcu_read_unlock();
1036 out_up:
1037         up_write(&shm_ids(ns).rwsem);
1038         return err;
1039 }
1040
1041 static int shmctl_ipc_info(struct ipc_namespace *ns,
1042                            struct shminfo64 *shminfo)
1043 {
1044         int err = security_shm_shmctl(NULL, IPC_INFO);
1045         if (!err) {
1046                 memset(shminfo, 0, sizeof(*shminfo));
1047                 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
1048                 shminfo->shmmax = ns->shm_ctlmax;
1049                 shminfo->shmall = ns->shm_ctlall;
1050                 shminfo->shmmin = SHMMIN;
1051                 down_read(&shm_ids(ns).rwsem);
1052                 err = ipc_get_maxidx(&shm_ids(ns));
1053                 up_read(&shm_ids(ns).rwsem);
1054                 if (err < 0)
1055                         err = 0;
1056         }
1057         return err;
1058 }
1059
1060 static int shmctl_shm_info(struct ipc_namespace *ns,
1061                            struct shm_info *shm_info)
1062 {
1063         int err = security_shm_shmctl(NULL, SHM_INFO);
1064         if (!err) {
1065                 memset(shm_info, 0, sizeof(*shm_info));
1066                 down_read(&shm_ids(ns).rwsem);
1067                 shm_info->used_ids = shm_ids(ns).in_use;
1068                 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
1069                 shm_info->shm_tot = ns->shm_tot;
1070                 shm_info->swap_attempts = 0;
1071                 shm_info->swap_successes = 0;
1072                 err = ipc_get_maxidx(&shm_ids(ns));
1073                 up_read(&shm_ids(ns).rwsem);
1074                 if (err < 0)
1075                         err = 0;
1076         }
1077         return err;
1078 }
1079
1080 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
1081                         int cmd, struct shmid64_ds *tbuf)
1082 {
1083         struct shmid_kernel *shp;
1084         int err;
1085
1086         memset(tbuf, 0, sizeof(*tbuf));
1087
1088         rcu_read_lock();
1089         if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
1090                 shp = shm_obtain_object(ns, shmid);
1091                 if (IS_ERR(shp)) {
1092                         err = PTR_ERR(shp);
1093                         goto out_unlock;
1094                 }
1095         } else { /* IPC_STAT */
1096                 shp = shm_obtain_object_check(ns, shmid);
1097                 if (IS_ERR(shp)) {
1098                         err = PTR_ERR(shp);
1099                         goto out_unlock;
1100                 }
1101         }
1102
1103         /*
1104          * Semantically SHM_STAT_ANY ought to be identical to
1105          * that functionality provided by the /proc/sysvipc/
1106          * interface. As such, only audit these calls and
1107          * do not do traditional S_IRUGO permission checks on
1108          * the ipc object.
1109          */
1110         if (cmd == SHM_STAT_ANY)
1111                 audit_ipc_obj(&shp->shm_perm);
1112         else {
1113                 err = -EACCES;
1114                 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
1115                         goto out_unlock;
1116         }
1117
1118         err = security_shm_shmctl(&shp->shm_perm, cmd);
1119         if (err)
1120                 goto out_unlock;
1121
1122         ipc_lock_object(&shp->shm_perm);
1123
1124         if (!ipc_valid_object(&shp->shm_perm)) {
1125                 ipc_unlock_object(&shp->shm_perm);
1126                 err = -EIDRM;
1127                 goto out_unlock;
1128         }
1129
1130         kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1131         tbuf->shm_segsz = shp->shm_segsz;
1132         tbuf->shm_atime = shp->shm_atim;
1133         tbuf->shm_dtime = shp->shm_dtim;
1134         tbuf->shm_ctime = shp->shm_ctim;
1135 #ifndef CONFIG_64BIT
1136         tbuf->shm_atime_high = shp->shm_atim >> 32;
1137         tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1138         tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1139 #endif
1140         tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1141         tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1142         tbuf->shm_nattch = shp->shm_nattch;
1143
1144         if (cmd == IPC_STAT) {
1145                 /*
1146                  * As defined in SUS:
1147                  * Return 0 on success
1148                  */
1149                 err = 0;
1150         } else {
1151                 /*
1152                  * SHM_STAT and SHM_STAT_ANY (both Linux specific)
1153                  * Return the full id, including the sequence number
1154                  */
1155                 err = shp->shm_perm.id;
1156         }
1157
1158         ipc_unlock_object(&shp->shm_perm);
1159 out_unlock:
1160         rcu_read_unlock();
1161         return err;
1162 }
1163
1164 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1165 {
1166         struct shmid_kernel *shp;
1167         struct file *shm_file;
1168         int err;
1169
1170         rcu_read_lock();
1171         shp = shm_obtain_object_check(ns, shmid);
1172         if (IS_ERR(shp)) {
1173                 err = PTR_ERR(shp);
1174                 goto out_unlock1;
1175         }
1176
1177         audit_ipc_obj(&(shp->shm_perm));
1178         err = security_shm_shmctl(&shp->shm_perm, cmd);
1179         if (err)
1180                 goto out_unlock1;
1181
1182         ipc_lock_object(&shp->shm_perm);
1183
1184         /* check if shm_destroy() is tearing down shp */
1185         if (!ipc_valid_object(&shp->shm_perm)) {
1186                 err = -EIDRM;
1187                 goto out_unlock0;
1188         }
1189
1190         if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1191                 kuid_t euid = current_euid();
1192
1193                 if (!uid_eq(euid, shp->shm_perm.uid) &&
1194                     !uid_eq(euid, shp->shm_perm.cuid)) {
1195                         err = -EPERM;
1196                         goto out_unlock0;
1197                 }
1198                 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1199                         err = -EPERM;
1200                         goto out_unlock0;
1201                 }
1202         }
1203
1204         shm_file = shp->shm_file;
1205         if (is_file_hugepages(shm_file))
1206                 goto out_unlock0;
1207
1208         if (cmd == SHM_LOCK) {
1209                 struct ucounts *ucounts = current_ucounts();
1210
1211                 err = shmem_lock(shm_file, 1, ucounts);
1212                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1213                         shp->shm_perm.mode |= SHM_LOCKED;
1214                         shp->mlock_ucounts = ucounts;
1215                 }
1216                 goto out_unlock0;
1217         }
1218
1219         /* SHM_UNLOCK */
1220         if (!(shp->shm_perm.mode & SHM_LOCKED))
1221                 goto out_unlock0;
1222         shmem_lock(shm_file, 0, shp->mlock_ucounts);
1223         shp->shm_perm.mode &= ~SHM_LOCKED;
1224         shp->mlock_ucounts = NULL;
1225         get_file(shm_file);
1226         ipc_unlock_object(&shp->shm_perm);
1227         rcu_read_unlock();
1228         shmem_unlock_mapping(shm_file->f_mapping);
1229
1230         fput(shm_file);
1231         return err;
1232
1233 out_unlock0:
1234         ipc_unlock_object(&shp->shm_perm);
1235 out_unlock1:
1236         rcu_read_unlock();
1237         return err;
1238 }
1239
1240 static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version)
1241 {
1242         int err;
1243         struct ipc_namespace *ns;
1244         struct shmid64_ds sem64;
1245
1246         if (cmd < 0 || shmid < 0)
1247                 return -EINVAL;
1248
1249         ns = current->nsproxy->ipc_ns;
1250
1251         switch (cmd) {
1252         case IPC_INFO: {
1253                 struct shminfo64 shminfo;
1254                 err = shmctl_ipc_info(ns, &shminfo);
1255                 if (err < 0)
1256                         return err;
1257                 if (copy_shminfo_to_user(buf, &shminfo, version))
1258                         err = -EFAULT;
1259                 return err;
1260         }
1261         case SHM_INFO: {
1262                 struct shm_info shm_info;
1263                 err = shmctl_shm_info(ns, &shm_info);
1264                 if (err < 0)
1265                         return err;
1266                 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1267                         err = -EFAULT;
1268                 return err;
1269         }
1270         case SHM_STAT:
1271         case SHM_STAT_ANY:
1272         case IPC_STAT: {
1273                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1274                 if (err < 0)
1275                         return err;
1276                 if (copy_shmid_to_user(buf, &sem64, version))
1277                         err = -EFAULT;
1278                 return err;
1279         }
1280         case IPC_SET:
1281                 if (copy_shmid_from_user(&sem64, buf, version))
1282                         return -EFAULT;
1283                 fallthrough;
1284         case IPC_RMID:
1285                 return shmctl_down(ns, shmid, cmd, &sem64);
1286         case SHM_LOCK:
1287         case SHM_UNLOCK:
1288                 return shmctl_do_lock(ns, shmid, cmd);
1289         default:
1290                 return -EINVAL;
1291         }
1292 }
1293
1294 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1295 {
1296         return ksys_shmctl(shmid, cmd, buf, IPC_64);
1297 }
1298
1299 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
1300 long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1301 {
1302         int version = ipc_parse_version(&cmd);
1303
1304         return ksys_shmctl(shmid, cmd, buf, version);
1305 }
1306
1307 SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1308 {
1309         return ksys_old_shmctl(shmid, cmd, buf);
1310 }
1311 #endif
1312
1313 #ifdef CONFIG_COMPAT
1314
1315 struct compat_shmid_ds {
1316         struct compat_ipc_perm shm_perm;
1317         int shm_segsz;
1318         old_time32_t shm_atime;
1319         old_time32_t shm_dtime;
1320         old_time32_t shm_ctime;
1321         compat_ipc_pid_t shm_cpid;
1322         compat_ipc_pid_t shm_lpid;
1323         unsigned short shm_nattch;
1324         unsigned short shm_unused;
1325         compat_uptr_t shm_unused2;
1326         compat_uptr_t shm_unused3;
1327 };
1328
1329 struct compat_shminfo64 {
1330         compat_ulong_t shmmax;
1331         compat_ulong_t shmmin;
1332         compat_ulong_t shmmni;
1333         compat_ulong_t shmseg;
1334         compat_ulong_t shmall;
1335         compat_ulong_t __unused1;
1336         compat_ulong_t __unused2;
1337         compat_ulong_t __unused3;
1338         compat_ulong_t __unused4;
1339 };
1340
1341 struct compat_shm_info {
1342         compat_int_t used_ids;
1343         compat_ulong_t shm_tot, shm_rss, shm_swp;
1344         compat_ulong_t swap_attempts, swap_successes;
1345 };
1346
1347 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1348                                         int version)
1349 {
1350         if (in->shmmax > INT_MAX)
1351                 in->shmmax = INT_MAX;
1352         if (version == IPC_64) {
1353                 struct compat_shminfo64 info;
1354                 memset(&info, 0, sizeof(info));
1355                 info.shmmax = in->shmmax;
1356                 info.shmmin = in->shmmin;
1357                 info.shmmni = in->shmmni;
1358                 info.shmseg = in->shmseg;
1359                 info.shmall = in->shmall;
1360                 return copy_to_user(buf, &info, sizeof(info));
1361         } else {
1362                 struct shminfo info;
1363                 memset(&info, 0, sizeof(info));
1364                 info.shmmax = in->shmmax;
1365                 info.shmmin = in->shmmin;
1366                 info.shmmni = in->shmmni;
1367                 info.shmseg = in->shmseg;
1368                 info.shmall = in->shmall;
1369                 return copy_to_user(buf, &info, sizeof(info));
1370         }
1371 }
1372
1373 static int put_compat_shm_info(struct shm_info *ip,
1374                                 struct compat_shm_info __user *uip)
1375 {
1376         struct compat_shm_info info;
1377
1378         memset(&info, 0, sizeof(info));
1379         info.used_ids = ip->used_ids;
1380         info.shm_tot = ip->shm_tot;
1381         info.shm_rss = ip->shm_rss;
1382         info.shm_swp = ip->shm_swp;
1383         info.swap_attempts = ip->swap_attempts;
1384         info.swap_successes = ip->swap_successes;
1385         return copy_to_user(uip, &info, sizeof(info));
1386 }
1387
1388 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1389                                         int version)
1390 {
1391         if (version == IPC_64) {
1392                 struct compat_shmid64_ds v;
1393                 memset(&v, 0, sizeof(v));
1394                 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1395                 v.shm_atime      = lower_32_bits(in->shm_atime);
1396                 v.shm_atime_high = upper_32_bits(in->shm_atime);
1397                 v.shm_dtime      = lower_32_bits(in->shm_dtime);
1398                 v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1399                 v.shm_ctime      = lower_32_bits(in->shm_ctime);
1400                 v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1401                 v.shm_segsz = in->shm_segsz;
1402                 v.shm_nattch = in->shm_nattch;
1403                 v.shm_cpid = in->shm_cpid;
1404                 v.shm_lpid = in->shm_lpid;
1405                 return copy_to_user(buf, &v, sizeof(v));
1406         } else {
1407                 struct compat_shmid_ds v;
1408                 memset(&v, 0, sizeof(v));
1409                 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1410                 v.shm_perm.key = in->shm_perm.key;
1411                 v.shm_atime = in->shm_atime;
1412                 v.shm_dtime = in->shm_dtime;
1413                 v.shm_ctime = in->shm_ctime;
1414                 v.shm_segsz = in->shm_segsz;
1415                 v.shm_nattch = in->shm_nattch;
1416                 v.shm_cpid = in->shm_cpid;
1417                 v.shm_lpid = in->shm_lpid;
1418                 return copy_to_user(buf, &v, sizeof(v));
1419         }
1420 }
1421
1422 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1423                                         int version)
1424 {
1425         memset(out, 0, sizeof(*out));
1426         if (version == IPC_64) {
1427                 struct compat_shmid64_ds __user *p = buf;
1428                 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1429         } else {
1430                 struct compat_shmid_ds __user *p = buf;
1431                 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1432         }
1433 }
1434
1435 static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
1436 {
1437         struct ipc_namespace *ns;
1438         struct shmid64_ds sem64;
1439         int err;
1440
1441         ns = current->nsproxy->ipc_ns;
1442
1443         if (cmd < 0 || shmid < 0)
1444                 return -EINVAL;
1445
1446         switch (cmd) {
1447         case IPC_INFO: {
1448                 struct shminfo64 shminfo;
1449                 err = shmctl_ipc_info(ns, &shminfo);
1450                 if (err < 0)
1451                         return err;
1452                 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1453                         err = -EFAULT;
1454                 return err;
1455         }
1456         case SHM_INFO: {
1457                 struct shm_info shm_info;
1458                 err = shmctl_shm_info(ns, &shm_info);
1459                 if (err < 0)
1460                         return err;
1461                 if (put_compat_shm_info(&shm_info, uptr))
1462                         err = -EFAULT;
1463                 return err;
1464         }
1465         case IPC_STAT:
1466         case SHM_STAT_ANY:
1467         case SHM_STAT:
1468                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1469                 if (err < 0)
1470                         return err;
1471                 if (copy_compat_shmid_to_user(uptr, &sem64, version))
1472                         err = -EFAULT;
1473                 return err;
1474
1475         case IPC_SET:
1476                 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1477                         return -EFAULT;
1478                 fallthrough;
1479         case IPC_RMID:
1480                 return shmctl_down(ns, shmid, cmd, &sem64);
1481         case SHM_LOCK:
1482         case SHM_UNLOCK:
1483                 return shmctl_do_lock(ns, shmid, cmd);
1484         default:
1485                 return -EINVAL;
1486         }
1487         return err;
1488 }
1489
1490 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1491 {
1492         return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64);
1493 }
1494
1495 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
1496 long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr)
1497 {
1498         int version = compat_ipc_parse_version(&cmd);
1499
1500         return compat_ksys_shmctl(shmid, cmd, uptr, version);
1501 }
1502
1503 COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr)
1504 {
1505         return compat_ksys_old_shmctl(shmid, cmd, uptr);
1506 }
1507 #endif
1508 #endif
1509
1510 /*
1511  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1512  *
1513  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1514  * "raddr" thing points to kernel space, and there has to be a wrapper around
1515  * this.
1516  */
1517 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1518               ulong *raddr, unsigned long shmlba)
1519 {
1520         struct shmid_kernel *shp;
1521         unsigned long addr = (unsigned long)shmaddr;
1522         unsigned long size;
1523         struct file *file, *base;
1524         int    err;
1525         unsigned long flags = MAP_SHARED;
1526         unsigned long prot;
1527         int acc_mode;
1528         struct ipc_namespace *ns;
1529         struct shm_file_data *sfd;
1530         int f_flags;
1531         unsigned long populate = 0;
1532
1533         err = -EINVAL;
1534         if (shmid < 0)
1535                 goto out;
1536
1537         if (addr) {
1538                 if (addr & (shmlba - 1)) {
1539                         if (shmflg & SHM_RND) {
1540                                 addr &= ~(shmlba - 1);  /* round down */
1541
1542                                 /*
1543                                  * Ensure that the round-down is non-nil
1544                                  * when remapping. This can happen for
1545                                  * cases when addr < shmlba.
1546                                  */
1547                                 if (!addr && (shmflg & SHM_REMAP))
1548                                         goto out;
1549                         } else
1550 #ifndef __ARCH_FORCE_SHMLBA
1551                                 if (addr & ~PAGE_MASK)
1552 #endif
1553                                         goto out;
1554                 }
1555
1556                 flags |= MAP_FIXED;
1557         } else if ((shmflg & SHM_REMAP))
1558                 goto out;
1559
1560         if (shmflg & SHM_RDONLY) {
1561                 prot = PROT_READ;
1562                 acc_mode = S_IRUGO;
1563                 f_flags = O_RDONLY;
1564         } else {
1565                 prot = PROT_READ | PROT_WRITE;
1566                 acc_mode = S_IRUGO | S_IWUGO;
1567                 f_flags = O_RDWR;
1568         }
1569         if (shmflg & SHM_EXEC) {
1570                 prot |= PROT_EXEC;
1571                 acc_mode |= S_IXUGO;
1572         }
1573
1574         /*
1575          * We cannot rely on the fs check since SYSV IPC does have an
1576          * additional creator id...
1577          */
1578         ns = current->nsproxy->ipc_ns;
1579         rcu_read_lock();
1580         shp = shm_obtain_object_check(ns, shmid);
1581         if (IS_ERR(shp)) {
1582                 err = PTR_ERR(shp);
1583                 goto out_unlock;
1584         }
1585
1586         err = -EACCES;
1587         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1588                 goto out_unlock;
1589
1590         err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1591         if (err)
1592                 goto out_unlock;
1593
1594         ipc_lock_object(&shp->shm_perm);
1595
1596         /* check if shm_destroy() is tearing down shp */
1597         if (!ipc_valid_object(&shp->shm_perm)) {
1598                 ipc_unlock_object(&shp->shm_perm);
1599                 err = -EIDRM;
1600                 goto out_unlock;
1601         }
1602
1603         /*
1604          * We need to take a reference to the real shm file to prevent the
1605          * pointer from becoming stale in cases where the lifetime of the outer
1606          * file extends beyond that of the shm segment.  It's not usually
1607          * possible, but it can happen during remap_file_pages() emulation as
1608          * that unmaps the memory, then does ->mmap() via file reference only.
1609          * We'll deny the ->mmap() if the shm segment was since removed, but to
1610          * detect shm ID reuse we need to compare the file pointers.
1611          */
1612         base = get_file(shp->shm_file);
1613         shp->shm_nattch++;
1614         size = i_size_read(file_inode(base));
1615         ipc_unlock_object(&shp->shm_perm);
1616         rcu_read_unlock();
1617
1618         err = -ENOMEM;
1619         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1620         if (!sfd) {
1621                 fput(base);
1622                 goto out_nattch;
1623         }
1624
1625         file = alloc_file_clone(base, f_flags,
1626                           is_file_hugepages(base) ?
1627                                 &shm_file_operations_huge :
1628                                 &shm_file_operations);
1629         err = PTR_ERR(file);
1630         if (IS_ERR(file)) {
1631                 kfree(sfd);
1632                 fput(base);
1633                 goto out_nattch;
1634         }
1635
1636         sfd->id = shp->shm_perm.id;
1637         sfd->ns = get_ipc_ns(ns);
1638         sfd->file = base;
1639         sfd->vm_ops = NULL;
1640         file->private_data = sfd;
1641
1642         err = security_mmap_file(file, prot, flags);
1643         if (err)
1644                 goto out_fput;
1645
1646         if (mmap_write_lock_killable(current->mm)) {
1647                 err = -EINTR;
1648                 goto out_fput;
1649         }
1650
1651         if (addr && !(shmflg & SHM_REMAP)) {
1652                 err = -EINVAL;
1653                 if (addr + size < addr)
1654                         goto invalid;
1655
1656                 if (find_vma_intersection(current->mm, addr, addr + size))
1657                         goto invalid;
1658         }
1659
1660         addr = do_mmap(file, addr, size, prot, flags, 0, 0, &populate, NULL);
1661         *raddr = addr;
1662         err = 0;
1663         if (IS_ERR_VALUE(addr))
1664                 err = (long)addr;
1665 invalid:
1666         mmap_write_unlock(current->mm);
1667         if (populate)
1668                 mm_populate(addr, populate);
1669
1670 out_fput:
1671         fput(file);
1672
1673 out_nattch:
1674         down_write(&shm_ids(ns).rwsem);
1675         shp = shm_lock(ns, shmid);
1676         shp->shm_nattch--;
1677
1678         if (shm_may_destroy(shp))
1679                 shm_destroy(ns, shp);
1680         else
1681                 shm_unlock(shp);
1682         up_write(&shm_ids(ns).rwsem);
1683         return err;
1684
1685 out_unlock:
1686         rcu_read_unlock();
1687 out:
1688         return err;
1689 }
1690
1691 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1692 {
1693         unsigned long ret;
1694         long err;
1695
1696         err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1697         if (err)
1698                 return err;
1699         force_successful_syscall_return();
1700         return (long)ret;
1701 }
1702
1703 #ifdef CONFIG_COMPAT
1704
1705 #ifndef COMPAT_SHMLBA
1706 #define COMPAT_SHMLBA   SHMLBA
1707 #endif
1708
1709 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1710 {
1711         unsigned long ret;
1712         long err;
1713
1714         err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1715         if (err)
1716                 return err;
1717         force_successful_syscall_return();
1718         return (long)ret;
1719 }
1720 #endif
1721
1722 /*
1723  * detach and kill segment if marked destroyed.
1724  * The work is done in shm_close.
1725  */
1726 long ksys_shmdt(char __user *shmaddr)
1727 {
1728         struct mm_struct *mm = current->mm;
1729         struct vm_area_struct *vma;
1730         unsigned long addr = (unsigned long)shmaddr;
1731         int retval = -EINVAL;
1732 #ifdef CONFIG_MMU
1733         loff_t size = 0;
1734         struct file *file;
1735         VMA_ITERATOR(vmi, mm, addr);
1736 #endif
1737
1738         if (addr & ~PAGE_MASK)
1739                 return retval;
1740
1741         if (mmap_write_lock_killable(mm))
1742                 return -EINTR;
1743
1744         /*
1745          * This function tries to be smart and unmap shm segments that
1746          * were modified by partial mlock or munmap calls:
1747          * - It first determines the size of the shm segment that should be
1748          *   unmapped: It searches for a vma that is backed by shm and that
1749          *   started at address shmaddr. It records it's size and then unmaps
1750          *   it.
1751          * - Then it unmaps all shm vmas that started at shmaddr and that
1752          *   are within the initially determined size and that are from the
1753          *   same shm segment from which we determined the size.
1754          * Errors from do_munmap are ignored: the function only fails if
1755          * it's called with invalid parameters or if it's called to unmap
1756          * a part of a vma. Both calls in this function are for full vmas,
1757          * the parameters are directly copied from the vma itself and always
1758          * valid - therefore do_munmap cannot fail. (famous last words?)
1759          */
1760         /*
1761          * If it had been mremap()'d, the starting address would not
1762          * match the usual checks anyway. So assume all vma's are
1763          * above the starting address given.
1764          */
1765
1766 #ifdef CONFIG_MMU
1767         for_each_vma(vmi, vma) {
1768                 /*
1769                  * Check if the starting address would match, i.e. it's
1770                  * a fragment created by mprotect() and/or munmap(), or it
1771                  * otherwise it starts at this address with no hassles.
1772                  */
1773                 if ((vma->vm_ops == &shm_vm_ops) &&
1774                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1775
1776                         /*
1777                          * Record the file of the shm segment being
1778                          * unmapped.  With mremap(), someone could place
1779                          * page from another segment but with equal offsets
1780                          * in the range we are unmapping.
1781                          */
1782                         file = vma->vm_file;
1783                         size = i_size_read(file_inode(vma->vm_file));
1784                         do_vma_munmap(&vmi, vma, vma->vm_start, vma->vm_end,
1785                                       NULL, false);
1786                         /*
1787                          * We discovered the size of the shm segment, so
1788                          * break out of here and fall through to the next
1789                          * loop that uses the size information to stop
1790                          * searching for matching vma's.
1791                          */
1792                         retval = 0;
1793                         vma = vma_next(&vmi);
1794                         break;
1795                 }
1796         }
1797
1798         /*
1799          * We need look no further than the maximum address a fragment
1800          * could possibly have landed at. Also cast things to loff_t to
1801          * prevent overflows and make comparisons vs. equal-width types.
1802          */
1803         size = PAGE_ALIGN(size);
1804         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1805                 /* finding a matching vma now does not alter retval */
1806                 if ((vma->vm_ops == &shm_vm_ops) &&
1807                     ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1808                     (vma->vm_file == file)) {
1809                         do_vma_munmap(&vmi, vma, vma->vm_start, vma->vm_end,
1810                                       NULL, false);
1811                 }
1812
1813                 vma = vma_next(&vmi);
1814         }
1815
1816 #else   /* CONFIG_MMU */
1817         vma = vma_lookup(mm, addr);
1818         /* under NOMMU conditions, the exact address to be destroyed must be
1819          * given
1820          */
1821         if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1822                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1823                 retval = 0;
1824         }
1825
1826 #endif
1827
1828         mmap_write_unlock(mm);
1829         return retval;
1830 }
1831
1832 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1833 {
1834         return ksys_shmdt(shmaddr);
1835 }
1836
1837 #ifdef CONFIG_PROC_FS
1838 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1839 {
1840         struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1841         struct user_namespace *user_ns = seq_user_ns(s);
1842         struct kern_ipc_perm *ipcp = it;
1843         struct shmid_kernel *shp;
1844         unsigned long rss = 0, swp = 0;
1845
1846         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1847         shm_add_rss_swap(shp, &rss, &swp);
1848
1849 #if BITS_PER_LONG <= 32
1850 #define SIZE_SPEC "%10lu"
1851 #else
1852 #define SIZE_SPEC "%21lu"
1853 #endif
1854
1855         seq_printf(s,
1856                    "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1857                    "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1858                    SIZE_SPEC " " SIZE_SPEC "\n",
1859                    shp->shm_perm.key,
1860                    shp->shm_perm.id,
1861                    shp->shm_perm.mode,
1862                    shp->shm_segsz,
1863                    pid_nr_ns(shp->shm_cprid, pid_ns),
1864                    pid_nr_ns(shp->shm_lprid, pid_ns),
1865                    shp->shm_nattch,
1866                    from_kuid_munged(user_ns, shp->shm_perm.uid),
1867                    from_kgid_munged(user_ns, shp->shm_perm.gid),
1868                    from_kuid_munged(user_ns, shp->shm_perm.cuid),
1869                    from_kgid_munged(user_ns, shp->shm_perm.cgid),
1870                    shp->shm_atim,
1871                    shp->shm_dtim,
1872                    shp->shm_ctim,
1873                    rss * PAGE_SIZE,
1874                    swp * PAGE_SIZE);
1875
1876         return 0;
1877 }
1878 #endif