fs/notify/fanotify/fanotify_user.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 #include <linux/fanotify.h>
   3 #include <linux/fcntl.h>
   4 #include <linux/file.h>
   5 #include <linux/fs.h>
   6 #include <linux/anon_inodes.h>
   7 #include <linux/fsnotify_backend.h>
   8 #include <linux/init.h>
   9 #include <linux/mount.h>
  10 #include <linux/namei.h>
  11 #include <linux/poll.h>
  12 #include <linux/security.h>
  13 #include <linux/syscalls.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/uaccess.h>
  17 #include <linux/compat.h>
  18 #include <linux/sched/signal.h>
  19 #include <linux/memcontrol.h>
  20 #include <linux/statfs.h>
  21 #include <linux/exportfs.h>
  22
  23 #include <asm/ioctls.h>
  24
  25 #include "../../mount.h"
  26 #include "../fdinfo.h"
  27 #include "fanotify.h"
  28
  29 #define FANOTIFY_DEFAULT_MAX_EVENTS     16384
  30 #define FANOTIFY_DEFAULT_MAX_MARKS      8192
  31 #define FANOTIFY_DEFAULT_MAX_LISTENERS  128
  32
  33 /*
  34  * All flags that may be specified in parameter event_f_flags of fanotify_init.
  35  *
  36  * Internal and external open flags are stored together in field f_flags of
  37  * struct file. Only external open flags shall be allowed in event_f_flags.
  38  * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
  39  * excluded.
  40  */
  41 #define FANOTIFY_INIT_ALL_EVENT_F_BITS                          ( \
  42                 O_ACCMODE       | O_APPEND      | O_NONBLOCK    | \
  43                 __O_SYNC        | O_DSYNC       | O_CLOEXEC     | \
  44                 O_LARGEFILE     | O_NOATIME     )
  45
  46 extern const struct fsnotify_ops fanotify_fsnotify_ops;
  47
  48 struct kmem_cache *fanotify_mark_cache __read_mostly;
  49 struct kmem_cache *fanotify_fid_event_cachep __read_mostly;
  50 struct kmem_cache *fanotify_path_event_cachep __read_mostly;
  51 struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
  52
  53 #define FANOTIFY_EVENT_ALIGN 4
  54 #define FANOTIFY_INFO_HDR_LEN \
  55         (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
  56
  57 static int fanotify_fid_info_len(int fh_len, int name_len)
  58 {
  59         int info_len = fh_len;
  60
  61         if (name_len)
  62                 info_len += name_len + 1;
  63
  64         return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN);
  65 }
  66
  67 static int fanotify_event_info_len(unsigned int fid_mode,
  68                                    struct fanotify_event *event)
  69 {
  70         struct fanotify_info *info = fanotify_event_info(event);
  71         int dir_fh_len = fanotify_event_dir_fh_len(event);
  72         int fh_len = fanotify_event_object_fh_len(event);
  73         int info_len = 0;
  74         int dot_len = 0;
  75
  76         if (dir_fh_len) {
  77                 info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
  78         } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) {
  79                 /*
  80                  * With group flag FAN_REPORT_NAME, if name was not recorded in
  81                  * event on a directory, we will report the name ".".
  82                  */
  83                 dot_len = 1;
  84         }
  85
  86         if (fh_len)
  87                 info_len += fanotify_fid_info_len(fh_len, dot_len);
  88
  89         return info_len;
  90 }
  91
  92 /*
  93  * Get an fanotify notification event if one exists and is small
  94  * enough to fit in "count". Return an error pointer if the count
  95  * is not large enough. When permission event is dequeued, its state is
  96  * updated accordingly.
  97  */
  98 static struct fanotify_event *get_one_event(struct fsnotify_group *group,
  99                                             size_t count)
 100 {
 101         size_t event_size = FAN_EVENT_METADATA_LEN;
 102         struct fanotify_event *event = NULL;
 103         unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
 104
 105         pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
 106
 107         spin_lock(&group->notification_lock);
 108         if (fsnotify_notify_queue_is_empty(group))
 109                 goto out;
 110
 111         if (fid_mode) {
 112                 event_size += fanotify_event_info_len(fid_mode,
 113                         FANOTIFY_E(fsnotify_peek_first_event(group)));
 114         }
 115
 116         if (event_size > count) {
 117                 event = ERR_PTR(-EINVAL);
 118                 goto out;
 119         }
 120         event = FANOTIFY_E(fsnotify_remove_first_event(group));
 121         if (fanotify_is_perm_event(event->mask))
 122                 FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
 123 out:
 124         spin_unlock(&group->notification_lock);
 125         return event;
 126 }
 127
 128 static int create_fd(struct fsnotify_group *group, struct path *path,
 129                      struct file **file)
 130 {
 131         int client_fd;
 132         struct file *new_file;
 133
 134         client_fd = get_unused_fd_flags(group->fanotify_data.f_flags);
 135         if (client_fd < 0)
 136                 return client_fd;
 137
 138         /*
 139          * we need a new file handle for the userspace program so it can read even if it was
 140          * originally opened O_WRONLY.
 141          */
 142         new_file = dentry_open(path,
 143                                group->fanotify_data.f_flags | FMODE_NONOTIFY,
 144                                current_cred());
 145         if (IS_ERR(new_file)) {
 146                 /*
 147                  * we still send an event even if we can't open the file.  this
 148                  * can happen when say tasks are gone and we try to open their
 149                  * /proc files or we try to open a WRONLY file like in sysfs
 150                  * we just send the errno to userspace since there isn't much
 151                  * else we can do.
 152                  */
 153                 put_unused_fd(client_fd);
 154                 client_fd = PTR_ERR(new_file);
 155         } else {
 156                 *file = new_file;
 157         }
 158
 159         return client_fd;
 160 }
 161
 162 /*
 163  * Finish processing of permission event by setting it to ANSWERED state and
 164  * drop group->notification_lock.
 165  */
 166 static void finish_permission_event(struct fsnotify_group *group,
 167                                     struct fanotify_perm_event *event,
 168                                     unsigned int response)
 169                                     __releases(&group->notification_lock)
 170 {
 171         bool destroy = false;
 172
 173         assert_spin_locked(&group->notification_lock);
 174         event->response = response;
 175         if (event->state == FAN_EVENT_CANCELED)
 176                 destroy = true;
 177         else
 178                 event->state = FAN_EVENT_ANSWERED;
 179         spin_unlock(&group->notification_lock);
 180         if (destroy)
 181                 fsnotify_destroy_event(group, &event->fae.fse);
 182 }
 183
 184 static int process_access_response(struct fsnotify_group *group,
 185                                    struct fanotify_response *response_struct)
 186 {
 187         struct fanotify_perm_event *event;
 188         int fd = response_struct->fd;
 189         int response = response_struct->response;
 190
 191         pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
 192                  fd, response);
 193         /*
 194          * make sure the response is valid, if invalid we do nothing and either
 195          * userspace can send a valid response or we will clean it up after the
 196          * timeout
 197          */
 198         switch (response & ~FAN_AUDIT) {
 199         case FAN_ALLOW:
 200         case FAN_DENY:
 201                 break;
 202         default:
 203                 return -EINVAL;
 204         }
 205
 206         if (fd < 0)
 207                 return -EINVAL;
 208
 209         if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
 210                 return -EINVAL;
 211
 212         spin_lock(&group->notification_lock);
 213         list_for_each_entry(event, &group->fanotify_data.access_list,
 214                             fae.fse.list) {
 215                 if (event->fd != fd)
 216                         continue;
 217
 218                 list_del_init(&event->fae.fse.list);
 219                 finish_permission_event(group, event, response);
 220                 wake_up(&group->fanotify_data.access_waitq);
 221                 return 0;
 222         }
 223         spin_unlock(&group->notification_lock);
 224
 225         return -ENOENT;
 226 }
 227
 228 static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
 229                              int info_type, const char *name, size_t name_len,
 230                              char __user *buf, size_t count)
 231 {
 232         struct fanotify_event_info_fid info = { };
 233         struct file_handle handle = { };
 234         unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf;
 235         size_t fh_len = fh ? fh->len : 0;
 236         size_t info_len = fanotify_fid_info_len(fh_len, name_len);
 237         size_t len = info_len;
 238
 239         pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n",
 240                  __func__, fh_len, name_len, info_len, count);
 241
 242         if (!fh_len)
 243                 return 0;
 244
 245         if (WARN_ON_ONCE(len < sizeof(info) || len > count))
 246                 return -EFAULT;
 247
 248         /*
 249          * Copy event info fid header followed by variable sized file handle
 250          * and optionally followed by variable sized filename.
 251          */
 252         switch (info_type) {
 253         case FAN_EVENT_INFO_TYPE_FID:
 254         case FAN_EVENT_INFO_TYPE_DFID:
 255                 if (WARN_ON_ONCE(name_len))
 256                         return -EFAULT;
 257                 break;
 258         case FAN_EVENT_INFO_TYPE_DFID_NAME:
 259                 if (WARN_ON_ONCE(!name || !name_len))
 260                         return -EFAULT;
 261                 break;
 262         default:
 263                 return -EFAULT;
 264         }
 265
 266         info.hdr.info_type = info_type;
 267         info.hdr.len = len;
 268         info.fsid = *fsid;
 269         if (copy_to_user(buf, &info, sizeof(info)))
 270                 return -EFAULT;
 271
 272         buf += sizeof(info);
 273         len -= sizeof(info);
 274         if (WARN_ON_ONCE(len < sizeof(handle)))
 275                 return -EFAULT;
 276
 277         handle.handle_type = fh->type;
 278         handle.handle_bytes = fh_len;
 279         if (copy_to_user(buf, &handle, sizeof(handle)))
 280                 return -EFAULT;
 281
 282         buf += sizeof(handle);
 283         len -= sizeof(handle);
 284         if (WARN_ON_ONCE(len < fh_len))
 285                 return -EFAULT;
 286
 287         /*
 288          * For an inline fh and inline file name, copy through stack to exclude
 289          * the copy from usercopy hardening protections.
 290          */
 291         fh_buf = fanotify_fh_buf(fh);
 292         if (fh_len <= FANOTIFY_INLINE_FH_LEN) {
 293                 memcpy(bounce, fh_buf, fh_len);
 294                 fh_buf = bounce;
 295         }
 296         if (copy_to_user(buf, fh_buf, fh_len))
 297                 return -EFAULT;
 298
 299         buf += fh_len;
 300         len -= fh_len;
 301
 302         if (name_len) {
 303                 /* Copy the filename with terminating null */
 304                 name_len++;
 305                 if (WARN_ON_ONCE(len < name_len))
 306                         return -EFAULT;
 307
 308                 if (copy_to_user(buf, name, name_len))
 309                         return -EFAULT;
 310
 311                 buf += name_len;
 312                 len -= name_len;
 313         }
 314
 315         /* Pad with 0's */
 316         WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
 317         if (len > 0 && clear_user(buf, len))
 318                 return -EFAULT;
 319
 320         return info_len;
 321 }
 322
 323 static ssize_t copy_event_to_user(struct fsnotify_group *group,
 324                                   struct fanotify_event *event,
 325                                   char __user *buf, size_t count)
 326 {
 327         struct fanotify_event_metadata metadata;
 328         struct path *path = fanotify_event_path(event);
 329         struct fanotify_info *info = fanotify_event_info(event);
 330         unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
 331         struct file *f = NULL;
 332         int ret, fd = FAN_NOFD;
 333         int info_type = 0;
 334
 335         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 336
 337         metadata.event_len = FAN_EVENT_METADATA_LEN +
 338                                 fanotify_event_info_len(fid_mode, event);
 339         metadata.metadata_len = FAN_EVENT_METADATA_LEN;
 340         metadata.vers = FANOTIFY_METADATA_VERSION;
 341         metadata.reserved = 0;
 342         metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
 343         metadata.pid = pid_vnr(event->pid);
 344
 345         if (path && path->mnt && path->dentry) {
 346                 fd = create_fd(group, path, &f);
 347                 if (fd < 0)
 348                         return fd;
 349         }
 350         metadata.fd = fd;
 351
 352         ret = -EFAULT;
 353         /*
 354          * Sanity check copy size in case get_one_event() and
 355          * event_len sizes ever get out of sync.
 356          */
 357         if (WARN_ON_ONCE(metadata.event_len > count))
 358                 goto out_close_fd;
 359
 360         if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
 361                 goto out_close_fd;
 362
 363         buf += FAN_EVENT_METADATA_LEN;
 364         count -= FAN_EVENT_METADATA_LEN;
 365
 366         if (fanotify_is_perm_event(event->mask))
 367                 FANOTIFY_PERM(event)->fd = fd;
 368
 369         /* Event info records order is: dir fid + name, child fid */
 370         if (fanotify_event_dir_fh_len(event)) {
 371                 info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
 372                                              FAN_EVENT_INFO_TYPE_DFID;
 373                 ret = copy_info_to_user(fanotify_event_fsid(event),
 374                                         fanotify_info_dir_fh(info),
 375                                         info_type, fanotify_info_name(info),
 376                                         info->name_len, buf, count);
 377                 if (ret < 0)
 378                         goto out_close_fd;
 379
 380                 buf += ret;
 381                 count -= ret;
 382         }
 383
 384         if (fanotify_event_object_fh_len(event)) {
 385                 const char *dot = NULL;
 386                 int dot_len = 0;
 387
 388                 if (fid_mode == FAN_REPORT_FID || info_type) {
 389                         /*
 390                          * With only group flag FAN_REPORT_FID only type FID is
 391                          * reported. Second info record type is always FID.
 392                          */
 393                         info_type = FAN_EVENT_INFO_TYPE_FID;
 394                 } else if ((fid_mode & FAN_REPORT_NAME) &&
 395                            (event->mask & FAN_ONDIR)) {
 396                         /*
 397                          * With group flag FAN_REPORT_NAME, if name was not
 398                          * recorded in an event on a directory, report the
 399                          * name "." with info type DFID_NAME.
 400                          */
 401                         info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
 402                         dot = ".";
 403                         dot_len = 1;
 404                 } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
 405                            (event->mask & FAN_ONDIR)) {
 406                         /*
 407                          * With group flag FAN_REPORT_DIR_FID, a single info
 408                          * record has type DFID for directory entry modification
 409                          * event and for event on a directory.
 410                          */
 411                         info_type = FAN_EVENT_INFO_TYPE_DFID;
 412                 } else {
 413                         /*
 414                          * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
 415                          * a single info record has type FID for event on a
 416                          * non-directory, when there is no directory to report.
 417                          * For example, on FAN_DELETE_SELF event.
 418                          */
 419                         info_type = FAN_EVENT_INFO_TYPE_FID;
 420                 }
 421
 422                 ret = copy_info_to_user(fanotify_event_fsid(event),
 423                                         fanotify_event_object_fh(event),
 424                                         info_type, dot, dot_len, buf, count);
 425                 if (ret < 0)
 426                         goto out_close_fd;
 427
 428                 buf += ret;
 429                 count -= ret;
 430         }
 431
 432         if (f)
 433                 fd_install(fd, f);
 434
 435         return metadata.event_len;
 436
 437 out_close_fd:
 438         if (fd != FAN_NOFD) {
 439                 put_unused_fd(fd);
 440                 fput(f);
 441         }
 442         return ret;
 443 }
 444
 445 /* intofiy userspace file descriptor functions */
 446 static __poll_t fanotify_poll(struct file *file, poll_table *wait)
 447 {
 448         struct fsnotify_group *group = file->private_data;
 449         __poll_t ret = 0;
 450
 451         poll_wait(file, &group->notification_waitq, wait);
 452         spin_lock(&group->notification_lock);
 453         if (!fsnotify_notify_queue_is_empty(group))
 454                 ret = EPOLLIN | EPOLLRDNORM;
 455         spin_unlock(&group->notification_lock);
 456
 457         return ret;
 458 }
 459
 460 static ssize_t fanotify_read(struct file *file, char __user *buf,
 461                              size_t count, loff_t *pos)
 462 {
 463         struct fsnotify_group *group;
 464         struct fanotify_event *event;
 465         char __user *start;
 466         int ret;
 467         DEFINE_WAIT_FUNC(wait, woken_wake_function);
 468
 469         start = buf;
 470         group = file->private_data;
 471
 472         pr_debug("%s: group=%p\n", __func__, group);
 473
 474         add_wait_queue(&group->notification_waitq, &wait);
 475         while (1) {
 476                 /*
 477                  * User can supply arbitrarily large buffer. Avoid softlockups
 478                  * in case there are lots of available events.
 479                  */
 480                 cond_resched();
 481                 event = get_one_event(group, count);
 482                 if (IS_ERR(event)) {
 483                         ret = PTR_ERR(event);
 484                         break;
 485                 }
 486
 487                 if (!event) {
 488                         ret = -EAGAIN;
 489                         if (file->f_flags & O_NONBLOCK)
 490                                 break;
 491
 492                         ret = -ERESTARTSYS;
 493                         if (signal_pending(current))
 494                                 break;
 495
 496                         if (start != buf)
 497                                 break;
 498
 499                         wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 500                         continue;
 501                 }
 502
 503                 ret = copy_event_to_user(group, event, buf, count);
 504                 if (unlikely(ret == -EOPENSTALE)) {
 505                         /*
 506                          * We cannot report events with stale fd so drop it.
 507                          * Setting ret to 0 will continue the event loop and
 508                          * do the right thing if there are no more events to
 509                          * read (i.e. return bytes read, -EAGAIN or wait).
 510                          */
 511                         ret = 0;
 512                 }
 513
 514                 /*
 515                  * Permission events get queued to wait for response.  Other
 516                  * events can be destroyed now.
 517                  */
 518                 if (!fanotify_is_perm_event(event->mask)) {
 519                         fsnotify_destroy_event(group, &event->fse);
 520                 } else {
 521                         if (ret <= 0) {
 522                                 spin_lock(&group->notification_lock);
 523                                 finish_permission_event(group,
 524                                         FANOTIFY_PERM(event), FAN_DENY);
 525                                 wake_up(&group->fanotify_data.access_waitq);
 526                         } else {
 527                                 spin_lock(&group->notification_lock);
 528                                 list_add_tail(&event->fse.list,
 529                                         &group->fanotify_data.access_list);
 530                                 spin_unlock(&group->notification_lock);
 531                         }
 532                 }
 533                 if (ret < 0)
 534                         break;
 535                 buf += ret;
 536                 count -= ret;
 537         }
 538         remove_wait_queue(&group->notification_waitq, &wait);
 539
 540         if (start != buf && ret != -EFAULT)
 541                 ret = buf - start;
 542         return ret;
 543 }
 544
 545 static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 546 {
 547         struct fanotify_response response = { .fd = -1, .response = -1 };
 548         struct fsnotify_group *group;
 549         int ret;
 550
 551         if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
 552                 return -EINVAL;
 553
 554         group = file->private_data;
 555
 556         if (count < sizeof(response))
 557                 return -EINVAL;
 558
 559         count = sizeof(response);
 560
 561         pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
 562
 563         if (copy_from_user(&response, buf, count))
 564                 return -EFAULT;
 565
 566         ret = process_access_response(group, &response);
 567         if (ret < 0)
 568                 count = ret;
 569
 570         return count;
 571 }
 572
 573 static int fanotify_release(struct inode *ignored, struct file *file)
 574 {
 575         struct fsnotify_group *group = file->private_data;
 576
 577         /*
 578          * Stop new events from arriving in the notification queue. since
 579          * userspace cannot use fanotify fd anymore, no event can enter or
 580          * leave access_list by now either.
 581          */
 582         fsnotify_group_stop_queueing(group);
 583
 584         /*
 585          * Process all permission events on access_list and notification queue
 586          * and simulate reply from userspace.
 587          */
 588         spin_lock(&group->notification_lock);
 589         while (!list_empty(&group->fanotify_data.access_list)) {
 590                 struct fanotify_perm_event *event;
 591
 592                 event = list_first_entry(&group->fanotify_data.access_list,
 593                                 struct fanotify_perm_event, fae.fse.list);
 594                 list_del_init(&event->fae.fse.list);
 595                 finish_permission_event(group, event, FAN_ALLOW);
 596                 spin_lock(&group->notification_lock);
 597         }
 598
 599         /*
 600          * Destroy all non-permission events. For permission events just
 601          * dequeue them and set the response. They will be freed once the
 602          * response is consumed and fanotify_get_response() returns.
 603          */
 604         while (!fsnotify_notify_queue_is_empty(group)) {
 605                 struct fanotify_event *event;
 606
 607                 event = FANOTIFY_E(fsnotify_remove_first_event(group));
 608                 if (!(event->mask & FANOTIFY_PERM_EVENTS)) {
 609                         spin_unlock(&group->notification_lock);
 610                         fsnotify_destroy_event(group, &event->fse);
 611                 } else {
 612                         finish_permission_event(group, FANOTIFY_PERM(event),
 613                                                 FAN_ALLOW);
 614                 }
 615                 spin_lock(&group->notification_lock);
 616         }
 617         spin_unlock(&group->notification_lock);
 618
 619         /* Response for all permission events it set, wakeup waiters */
 620         wake_up(&group->fanotify_data.access_waitq);
 621
 622         /* matches the fanotify_init->fsnotify_alloc_group */
 623         fsnotify_destroy_group(group);
 624
 625         return 0;
 626 }
 627
 628 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 629 {
 630         struct fsnotify_group *group;
 631         struct fsnotify_event *fsn_event;
 632         void __user *p;
 633         int ret = -ENOTTY;
 634         size_t send_len = 0;
 635
 636         group = file->private_data;
 637
 638         p = (void __user *) arg;
 639
 640         switch (cmd) {
 641         case FIONREAD:
 642                 spin_lock(&group->notification_lock);
 643                 list_for_each_entry(fsn_event, &group->notification_list, list)
 644                         send_len += FAN_EVENT_METADATA_LEN;
 645                 spin_unlock(&group->notification_lock);
 646                 ret = put_user(send_len, (int __user *) p);
 647                 break;
 648         }
 649
 650         return ret;
 651 }
 652
 653 static const struct file_operations fanotify_fops = {
 654         .show_fdinfo    = fanotify_show_fdinfo,
 655         .poll           = fanotify_poll,
 656         .read           = fanotify_read,
 657         .write          = fanotify_write,
 658         .fasync         = NULL,
 659         .release        = fanotify_release,
 660         .unlocked_ioctl = fanotify_ioctl,
 661         .compat_ioctl   = compat_ptr_ioctl,
 662         .llseek         = noop_llseek,
 663 };
 664
 665 static int fanotify_find_path(int dfd, const char __user *filename,
 666                               struct path *path, unsigned int flags, __u64 mask,
 667                               unsigned int obj_type)
 668 {
 669         int ret;
 670
 671         pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
 672                  dfd, filename, flags);
 673
 674         if (filename == NULL) {
 675                 struct fd f = fdget(dfd);
 676
 677                 ret = -EBADF;
 678                 if (!f.file)
 679                         goto out;
 680
 681                 ret = -ENOTDIR;
 682                 if ((flags & FAN_MARK_ONLYDIR) &&
 683                     !(S_ISDIR(file_inode(f.file)->i_mode))) {
 684                         fdput(f);
 685                         goto out;
 686                 }
 687
 688                 *path = f.file->f_path;
 689                 path_get(path);
 690                 fdput(f);
 691         } else {
 692                 unsigned int lookup_flags = 0;
 693
 694                 if (!(flags & FAN_MARK_DONT_FOLLOW))
 695                         lookup_flags |= LOOKUP_FOLLOW;
 696                 if (flags & FAN_MARK_ONLYDIR)
 697                         lookup_flags |= LOOKUP_DIRECTORY;
 698
 699                 ret = user_path_at(dfd, filename, lookup_flags, path);
 700                 if (ret)
 701                         goto out;
 702         }
 703
 704         /* you can only watch an inode if you have read permissions on it */
 705         ret = inode_permission(path->dentry->d_inode, MAY_READ);
 706         if (ret) {
 707                 path_put(path);
 708                 goto out;
 709         }
 710
 711         ret = security_path_notify(path, mask, obj_type);
 712         if (ret)
 713                 path_put(path);
 714
 715 out:
 716         return ret;
 717 }
 718
 719 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
 720                                             __u32 mask, unsigned int flags,
 721                                             __u32 umask, int *destroy)
 722 {
 723         __u32 oldmask = 0;
 724
 725         /* umask bits cannot be removed by user */
 726         mask &= ~umask;
 727         spin_lock(&fsn_mark->lock);
 728         if (!(flags & FAN_MARK_IGNORED_MASK)) {
 729                 oldmask = fsn_mark->mask;
 730                 fsn_mark->mask &= ~mask;
 731         } else {
 732                 fsn_mark->ignored_mask &= ~mask;
 733         }
 734         /*
 735          * We need to keep the mark around even if remaining mask cannot
 736          * result in any events (e.g. mask == FAN_ONDIR) to support incremenal
 737          * changes to the mask.
 738          * Destroy mark when only umask bits remain.
 739          */
 740         *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
 741         spin_unlock(&fsn_mark->lock);
 742
 743         return mask & oldmask;
 744 }
 745
 746 static int fanotify_remove_mark(struct fsnotify_group *group,
 747                                 fsnotify_connp_t *connp, __u32 mask,
 748                                 unsigned int flags, __u32 umask)
 749 {
 750         struct fsnotify_mark *fsn_mark = NULL;
 751         __u32 removed;
 752         int destroy_mark;
 753
 754         mutex_lock(&group->mark_mutex);
 755         fsn_mark = fsnotify_find_mark(connp, group);
 756         if (!fsn_mark) {
 757                 mutex_unlock(&group->mark_mutex);
 758                 return -ENOENT;
 759         }
 760
 761         removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 762                                                  umask, &destroy_mark);
 763         if (removed & fsnotify_conn_mask(fsn_mark->connector))
 764                 fsnotify_recalc_mask(fsn_mark->connector);
 765         if (destroy_mark)
 766                 fsnotify_detach_mark(fsn_mark);
 767         mutex_unlock(&group->mark_mutex);
 768         if (destroy_mark)
 769                 fsnotify_free_mark(fsn_mark);
 770
 771         /* matches the fsnotify_find_mark() */
 772         fsnotify_put_mark(fsn_mark);
 773         return 0;
 774 }
 775
 776 static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 777                                          struct vfsmount *mnt, __u32 mask,
 778                                          unsigned int flags, __u32 umask)
 779 {
 780         return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
 781                                     mask, flags, umask);
 782 }
 783
 784 static int fanotify_remove_sb_mark(struct fsnotify_group *group,
 785                                    struct super_block *sb, __u32 mask,
 786                                    unsigned int flags, __u32 umask)
 787 {
 788         return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask,
 789                                     flags, umask);
 790 }
 791
 792 static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 793                                       struct inode *inode, __u32 mask,
 794                                       unsigned int flags, __u32 umask)
 795 {
 796         return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask,
 797                                     flags, umask);
 798 }
 799
 800 static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 801                                        __u32 mask,
 802                                        unsigned int flags)
 803 {
 804         __u32 oldmask = -1;
 805
 806         spin_lock(&fsn_mark->lock);
 807         if (!(flags & FAN_MARK_IGNORED_MASK)) {
 808                 oldmask = fsn_mark->mask;
 809                 fsn_mark->mask |= mask;
 810         } else {
 811                 fsn_mark->ignored_mask |= mask;
 812                 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 813                         fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 814         }
 815         spin_unlock(&fsn_mark->lock);
 816
 817         return mask & ~oldmask;
 818 }
 819
 820 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 821                                                    fsnotify_connp_t *connp,
 822                                                    unsigned int type,
 823                                                    __kernel_fsid_t *fsid)
 824 {
 825         struct fsnotify_mark *mark;
 826         int ret;
 827
 828         if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
 829                 return ERR_PTR(-ENOSPC);
 830
 831         mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 832         if (!mark)
 833                 return ERR_PTR(-ENOMEM);
 834
 835         fsnotify_init_mark(mark, group);
 836         ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
 837         if (ret) {
 838                 fsnotify_put_mark(mark);
 839                 return ERR_PTR(ret);
 840         }
 841
 842         return mark;
 843 }
 844
 845
 846 static int fanotify_add_mark(struct fsnotify_group *group,
 847                              fsnotify_connp_t *connp, unsigned int type,
 848                              __u32 mask, unsigned int flags,
 849                              __kernel_fsid_t *fsid)
 850 {
 851         struct fsnotify_mark *fsn_mark;
 852         __u32 added;
 853
 854         mutex_lock(&group->mark_mutex);
 855         fsn_mark = fsnotify_find_mark(connp, group);
 856         if (!fsn_mark) {
 857                 fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
 858                 if (IS_ERR(fsn_mark)) {
 859                         mutex_unlock(&group->mark_mutex);
 860                         return PTR_ERR(fsn_mark);
 861                 }
 862         }
 863         added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 864         if (added & ~fsnotify_conn_mask(fsn_mark->connector))
 865                 fsnotify_recalc_mask(fsn_mark->connector);
 866         mutex_unlock(&group->mark_mutex);
 867
 868         fsnotify_put_mark(fsn_mark);
 869         return 0;
 870 }
 871
 872 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 873                                       struct vfsmount *mnt, __u32 mask,
 874                                       unsigned int flags, __kernel_fsid_t *fsid)
 875 {
 876         return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
 877                                  FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
 878 }
 879
 880 static int fanotify_add_sb_mark(struct fsnotify_group *group,
 881                                 struct super_block *sb, __u32 mask,
 882                                 unsigned int flags, __kernel_fsid_t *fsid)
 883 {
 884         return fanotify_add_mark(group, &sb->s_fsnotify_marks,
 885                                  FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
 886 }
 887
 888 static int fanotify_add_inode_mark(struct fsnotify_group *group,
 889                                    struct inode *inode, __u32 mask,
 890                                    unsigned int flags, __kernel_fsid_t *fsid)
 891 {
 892         pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 893
 894         /*
 895          * If some other task has this inode open for write we should not add
 896          * an ignored mark, unless that ignored mark is supposed to survive
 897          * modification changes anyway.
 898          */
 899         if ((flags & FAN_MARK_IGNORED_MASK) &&
 900             !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
 901             inode_is_open_for_write(inode))
 902                 return 0;
 903
 904         return fanotify_add_mark(group, &inode->i_fsnotify_marks,
 905                                  FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
 906 }
 907
 908 static struct fsnotify_event *fanotify_alloc_overflow_event(void)
 909 {
 910         struct fanotify_event *oevent;
 911
 912         oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT);
 913         if (!oevent)
 914                 return NULL;
 915
 916         fanotify_init_event(oevent, 0, FS_Q_OVERFLOW);
 917         oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW;
 918
 919         return &oevent->fse;
 920 }
 921
 922 /* fanotify syscalls */
 923 SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 924 {
 925         struct fsnotify_group *group;
 926         int f_flags, fd;
 927         struct user_struct *user;
 928         unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
 929         unsigned int class = flags & FANOTIFY_CLASS_BITS;
 930
 931         pr_debug("%s: flags=%x event_f_flags=%x\n",
 932                  __func__, flags, event_f_flags);
 933
 934         if (!capable(CAP_SYS_ADMIN))
 935                 return -EPERM;
 936
 937 #ifdef CONFIG_AUDITSYSCALL
 938         if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
 939 #else
 940         if (flags & ~FANOTIFY_INIT_FLAGS)
 941 #endif
 942                 return -EINVAL;
 943
 944         if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS)
 945                 return -EINVAL;
 946
 947         switch (event_f_flags & O_ACCMODE) {
 948         case O_RDONLY:
 949         case O_RDWR:
 950         case O_WRONLY:
 951                 break;
 952         default:
 953                 return -EINVAL;
 954         }
 955
 956         if (fid_mode && class != FAN_CLASS_NOTIF)
 957                 return -EINVAL;
 958
 959         /*
 960          * Child name is reported with parent fid so requires dir fid.
 961          * We can report both child fid and dir fid with or without name.
 962          */
 963         if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID))
 964                 return -EINVAL;
 965
 966         user = get_current_user();
 967         if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
 968                 free_uid(user);
 969                 return -EMFILE;
 970         }
 971
 972         f_flags = O_RDWR | FMODE_NONOTIFY;
 973         if (flags & FAN_CLOEXEC)
 974                 f_flags |= O_CLOEXEC;
 975         if (flags & FAN_NONBLOCK)
 976                 f_flags |= O_NONBLOCK;
 977
 978         /* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
 979         group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
 980         if (IS_ERR(group)) {
 981                 free_uid(user);
 982                 return PTR_ERR(group);
 983         }
 984
 985         group->fanotify_data.user = user;
 986         group->fanotify_data.flags = flags;
 987         atomic_inc(&user->fanotify_listeners);
 988         group->memcg = get_mem_cgroup_from_mm(current->mm);
 989
 990         group->overflow_event = fanotify_alloc_overflow_event();
 991         if (unlikely(!group->overflow_event)) {
 992                 fd = -ENOMEM;
 993                 goto out_destroy_group;
 994         }
 995
 996         if (force_o_largefile())
 997                 event_f_flags |= O_LARGEFILE;
 998         group->fanotify_data.f_flags = event_f_flags;
 999         init_waitqueue_head(&group->fanotify_data.access_waitq);
1000         INIT_LIST_HEAD(&group->fanotify_data.access_list);
1001         switch (class) {
1002         case FAN_CLASS_NOTIF:
1003                 group->priority = FS_PRIO_0;
1004                 break;
1005         case FAN_CLASS_CONTENT:
1006                 group->priority = FS_PRIO_1;
1007                 break;
1008         case FAN_CLASS_PRE_CONTENT:
1009                 group->priority = FS_PRIO_2;
1010                 break;
1011         default:
1012                 fd = -EINVAL;
1013                 goto out_destroy_group;
1014         }
1015
1016         if (flags & FAN_UNLIMITED_QUEUE) {
1017                 fd = -EPERM;
1018                 if (!capable(CAP_SYS_ADMIN))
1019                         goto out_destroy_group;
1020                 group->max_events = UINT_MAX;
1021         } else {
1022                 group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
1023         }
1024
1025         if (flags & FAN_UNLIMITED_MARKS) {
1026                 fd = -EPERM;
1027                 if (!capable(CAP_SYS_ADMIN))
1028                         goto out_destroy_group;
1029                 group->fanotify_data.max_marks = UINT_MAX;
1030         } else {
1031                 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
1032         }
1033
1034         if (flags & FAN_ENABLE_AUDIT) {
1035                 fd = -EPERM;
1036                 if (!capable(CAP_AUDIT_WRITE))
1037                         goto out_destroy_group;
1038         }
1039
1040         fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
1041         if (fd < 0)
1042                 goto out_destroy_group;
1043
1044         return fd;
1045
1046 out_destroy_group:
1047         fsnotify_destroy_group(group);
1048         return fd;
1049 }
1050
1051 /* Check if filesystem can encode a unique fid */
1052 static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
1053 {
1054         __kernel_fsid_t root_fsid;
1055         int err;
1056
1057         /*
1058          * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
1059          */
1060         err = vfs_get_fsid(path->dentry, fsid);
1061         if (err)
1062                 return err;
1063
1064         if (!fsid->val[0] && !fsid->val[1])
1065                 return -ENODEV;
1066
1067         /*
1068          * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
1069          * which uses a different fsid than sb root.
1070          */
1071         err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid);
1072         if (err)
1073                 return err;
1074
1075         if (root_fsid.val[0] != fsid->val[0] ||
1076             root_fsid.val[1] != fsid->val[1])
1077                 return -EXDEV;
1078
1079         /*
1080          * We need to make sure that the file system supports at least
1081          * encoding a file handle so user can use name_to_handle_at() to
1082          * compare fid returned with event to the file handle of watched
1083          * objects. However, name_to_handle_at() requires that the
1084          * filesystem also supports decoding file handles.
1085          */
1086         if (!path->dentry->d_sb->s_export_op ||
1087             !path->dentry->d_sb->s_export_op->fh_to_dentry)
1088                 return -EOPNOTSUPP;
1089
1090         return 0;
1091 }
1092
1093 static int fanotify_events_supported(struct path *path, __u64 mask,
1094                                      unsigned int flags)
1095 {
1096         unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
1097
1098         /*
1099          * Some filesystems such as 'proc' acquire unusual locks when opening
1100          * files. For them fanotify permission events have high chances of
1101          * deadlocking the system - open done when reporting fanotify event
1102          * blocks on this "unusual" lock while another process holding the lock
1103          * waits for fanotify permission event to be answered. Just disallow
1104          * permission events for such filesystems.
1105          */
1106         if (mask & FANOTIFY_PERM_EVENTS &&
1107             path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
1108                 return -EINVAL;
1109
1110         /*
1111          * mount and sb marks are not allowed on kernel internal pseudo fs,
1112          * like pipe_mnt, because that would subscribe to events on all the
1113          * anonynous pipes in the system.
1114          *
1115          * SB_NOUSER covers all of the internal pseudo fs whose objects are not
1116          * exposed to user's mount namespace, but there are other SB_KERNMOUNT
1117          * fs, like nsfs, debugfs, for which the value of allowing sb and mount
1118          * mark is questionable. For now we leave them alone.
1119          */
1120         if (mark_type != FAN_MARK_INODE &&
1121             path->mnt->mnt_sb->s_flags & SB_NOUSER)
1122                 return -EINVAL;
1123
1124         return 0;
1125 }
1126
1127 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
1128                             int dfd, const char  __user *pathname)
1129 {
1130         struct inode *inode = NULL;
1131         struct vfsmount *mnt = NULL;
1132         struct fsnotify_group *group;
1133         struct fd f;
1134         struct path path;
1135         __kernel_fsid_t __fsid, *fsid = NULL;
1136         u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
1137         unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
1138         bool ignored = flags & FAN_MARK_IGNORED_MASK;
1139         unsigned int obj_type, fid_mode;
1140         u32 umask = 0;
1141         int ret;
1142
1143         pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
1144                  __func__, fanotify_fd, flags, dfd, pathname, mask);
1145
1146         /* we only use the lower 32 bits as of right now. */
1147         if (mask & ((__u64)0xffffffff << 32))
1148                 return -EINVAL;
1149
1150         if (flags & ~FANOTIFY_MARK_FLAGS)
1151                 return -EINVAL;
1152
1153         switch (mark_type) {
1154         case FAN_MARK_INODE:
1155                 obj_type = FSNOTIFY_OBJ_TYPE_INODE;
1156                 break;
1157         case FAN_MARK_MOUNT:
1158                 obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
1159                 break;
1160         case FAN_MARK_FILESYSTEM:
1161                 obj_type = FSNOTIFY_OBJ_TYPE_SB;
1162                 break;
1163         default:
1164                 return -EINVAL;
1165         }
1166
1167         switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
1168         case FAN_MARK_ADD:
1169         case FAN_MARK_REMOVE:
1170                 if (!mask)
1171                         return -EINVAL;
1172                 break;
1173         case FAN_MARK_FLUSH:
1174                 if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
1175                         return -EINVAL;
1176                 break;
1177         default:
1178                 return -EINVAL;
1179         }
1180
1181         if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
1182                 valid_mask |= FANOTIFY_PERM_EVENTS;
1183
1184         if (mask & ~valid_mask)
1185                 return -EINVAL;
1186
1187         /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
1188         if (ignored)
1189                 mask &= ~FANOTIFY_EVENT_FLAGS;
1190
1191         f = fdget(fanotify_fd);
1192         if (unlikely(!f.file))
1193                 return -EBADF;
1194
1195         /* verify that this is indeed an fanotify instance */
1196         ret = -EINVAL;
1197         if (unlikely(f.file->f_op != &fanotify_fops))
1198                 goto fput_and_out;
1199         group = f.file->private_data;
1200
1201         /*
1202          * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
1203          * allowed to set permissions events.
1204          */
1205         ret = -EINVAL;
1206         if (mask & FANOTIFY_PERM_EVENTS &&
1207             group->priority == FS_PRIO_0)
1208                 goto fput_and_out;
1209
1210         /*
1211          * Events with data type inode do not carry enough information to report
1212          * event->fd, so we do not allow setting a mask for inode events unless
1213          * group supports reporting fid.
1214          * inode events are not supported on a mount mark, because they do not
1215          * carry enough information (i.e. path) to be filtered by mount point.
1216          */
1217         fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
1218         if (mask & FANOTIFY_INODE_EVENTS &&
1219             (!fid_mode || mark_type == FAN_MARK_MOUNT))
1220                 goto fput_and_out;
1221
1222         if (flags & FAN_MARK_FLUSH) {
1223                 ret = 0;
1224                 if (mark_type == FAN_MARK_MOUNT)
1225                         fsnotify_clear_vfsmount_marks_by_group(group);
1226                 else if (mark_type == FAN_MARK_FILESYSTEM)
1227                         fsnotify_clear_sb_marks_by_group(group);
1228                 else
1229                         fsnotify_clear_inode_marks_by_group(group);
1230                 goto fput_and_out;
1231         }
1232
1233         ret = fanotify_find_path(dfd, pathname, &path, flags,
1234                         (mask & ALL_FSNOTIFY_EVENTS), obj_type);
1235         if (ret)
1236                 goto fput_and_out;
1237
1238         if (flags & FAN_MARK_ADD) {
1239                 ret = fanotify_events_supported(&path, mask, flags);
1240                 if (ret)
1241                         goto path_put_and_out;
1242         }
1243
1244         if (fid_mode) {
1245                 ret = fanotify_test_fid(&path, &__fsid);
1246                 if (ret)
1247                         goto path_put_and_out;
1248
1249                 fsid = &__fsid;
1250         }
1251
1252         /* inode held in place by reference to path; group by fget on fd */
1253         if (mark_type == FAN_MARK_INODE)
1254                 inode = path.dentry->d_inode;
1255         else
1256                 mnt = path.mnt;
1257
1258         /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
1259         if (mnt || !S_ISDIR(inode->i_mode)) {
1260                 mask &= ~FAN_EVENT_ON_CHILD;
1261                 umask = FAN_EVENT_ON_CHILD;
1262                 /*
1263                  * If group needs to report parent fid, register for getting
1264                  * events with parent/name info for non-directory.
1265                  */
1266                 if ((fid_mode & FAN_REPORT_DIR_FID) &&
1267                     (flags & FAN_MARK_ADD) && !ignored)
1268                         mask |= FAN_EVENT_ON_CHILD;
1269         }
1270
1271         /* create/update an inode mark */
1272         switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
1273         case FAN_MARK_ADD:
1274                 if (mark_type == FAN_MARK_MOUNT)
1275                         ret = fanotify_add_vfsmount_mark(group, mnt, mask,
1276                                                          flags, fsid);
1277                 else if (mark_type == FAN_MARK_FILESYSTEM)
1278                         ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
1279                                                    flags, fsid);
1280                 else
1281                         ret = fanotify_add_inode_mark(group, inode, mask,
1282                                                       flags, fsid);
1283                 break;
1284         case FAN_MARK_REMOVE:
1285                 if (mark_type == FAN_MARK_MOUNT)
1286                         ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
1287                                                             flags, umask);
1288                 else if (mark_type == FAN_MARK_FILESYSTEM)
1289                         ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
1290                                                       flags, umask);
1291                 else
1292                         ret = fanotify_remove_inode_mark(group, inode, mask,
1293                                                          flags, umask);
1294                 break;
1295         default:
1296                 ret = -EINVAL;
1297         }
1298
1299 path_put_and_out:
1300         path_put(&path);
1301 fput_and_out:
1302         fdput(f);
1303         return ret;
1304 }
1305
1306 #ifndef CONFIG_ARCH_SPLIT_ARG64
1307 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
1308                               __u64, mask, int, dfd,
1309                               const char  __user *, pathname)
1310 {
1311         return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
1312 }
1313 #endif
1314
1315 #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
1316 SYSCALL32_DEFINE6(fanotify_mark,
1317                                 int, fanotify_fd, unsigned int, flags,
1318                                 SC_ARG64(mask), int, dfd,
1319                                 const char  __user *, pathname)
1320 {
1321         return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
1322                                 dfd, pathname);
1323 }
1324 #endif
1325
1326 /*
1327  * fanotify_user_setup - Our initialization function.  Note that we cannot return
1328  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
1329  * must result in panic().
1330  */
1331 static int __init fanotify_user_setup(void)
1332 {
1333         BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
1334         BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
1335
1336         fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
1337                                          SLAB_PANIC|SLAB_ACCOUNT);
1338         fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event,
1339                                                SLAB_PANIC);
1340         fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event,
1341                                                 SLAB_PANIC);
1342         if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
1343                 fanotify_perm_event_cachep =
1344                         KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
1345         }
1346
1347         return 0;
1348 }
1349 device_initcall(fanotify_user_setup);