drivers/staging/erofs/utils.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * linux/drivers/staging/erofs/utils.c
   4  *
   5  * Copyright (C) 2018 HUAWEI, Inc.
   6  *             http://www.huawei.com/
   7  * Created by Gao Xiang <gaoxiang25@huawei.com>
   8  *
   9  * This file is subject to the terms and conditions of the GNU General Public
  10  * License.  See the file COPYING in the main directory of the Linux
  11  * distribution for more details.
  12  */
  13
  14 #include "internal.h"
  15 #include <linux/pagevec.h>
  16
  17 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
  18 {
  19         struct page *page;
  20
  21         if (!list_empty(pool)) {
  22                 page = lru_to_page(pool);
  23                 list_del(&page->lru);
  24         } else {
  25                 page = alloc_pages(gfp | __GFP_NOFAIL, 0);
  26         }
  27         return page;
  28 }
  29
  30 /* global shrink count (for all mounted EROFS instances) */
  31 static atomic_long_t erofs_global_shrink_cnt;
  32
  33 #ifdef CONFIG_EROFS_FS_ZIP
  34
  35 /* radix_tree and the future XArray both don't use tagptr_t yet */
  36 struct erofs_workgroup *erofs_find_workgroup(
  37         struct super_block *sb, pgoff_t index, bool *tag)
  38 {
  39         struct erofs_sb_info *sbi = EROFS_SB(sb);
  40         struct erofs_workgroup *grp;
  41         int oldcount;
  42
  43 repeat:
  44         rcu_read_lock();
  45         grp = radix_tree_lookup(&sbi->workstn_tree, index);
  46         if (grp != NULL) {
  47                 *tag = radix_tree_exceptional_entry(grp);
  48                 grp = (void *)((unsigned long)grp &
  49                         ~RADIX_TREE_EXCEPTIONAL_ENTRY);
  50
  51                 if (erofs_workgroup_get(grp, &oldcount)) {
  52                         /* prefer to relax rcu read side */
  53                         rcu_read_unlock();
  54                         goto repeat;
  55                 }
  56
  57                 /* decrease refcount added by erofs_workgroup_put */
  58                 if (unlikely(oldcount == 1))
  59                         atomic_long_dec(&erofs_global_shrink_cnt);
  60                 DBG_BUGON(index != grp->index);
  61         }
  62         rcu_read_unlock();
  63         return grp;
  64 }
  65
  66 int erofs_register_workgroup(struct super_block *sb,
  67                              struct erofs_workgroup *grp,
  68                              bool tag)
  69 {
  70         struct erofs_sb_info *sbi;
  71         int err;
  72
  73         /* grp shouldn't be broken or used before */
  74         if (unlikely(atomic_read(&grp->refcount) != 1)) {
  75                 DBG_BUGON(1);
  76                 return -EINVAL;
  77         }
  78
  79         err = radix_tree_preload(GFP_NOFS);
  80         if (err)
  81                 return err;
  82
  83         sbi = EROFS_SB(sb);
  84         erofs_workstn_lock(sbi);
  85
  86         if (tag)
  87                 grp = (void *)((unsigned long)grp |
  88                         1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
  89
  90         /*
  91          * Bump up reference count before making this workgroup
  92          * visible to other users in order to avoid potential UAF
  93          * without serialized by erofs_workstn_lock.
  94          */
  95         __erofs_workgroup_get(grp);
  96
  97         err = radix_tree_insert(&sbi->workstn_tree,
  98                                 grp->index, grp);
  99         if (unlikely(err))
 100                 /*
 101                  * it's safe to decrease since the workgroup isn't visible
 102                  * and refcount >= 2 (cannot be freezed).
 103                  */
 104                 __erofs_workgroup_put(grp);
 105
 106         erofs_workstn_unlock(sbi);
 107         radix_tree_preload_end();
 108         return err;
 109 }
 110
 111 extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
 112
 113 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
 114 {
 115         atomic_long_dec(&erofs_global_shrink_cnt);
 116         erofs_workgroup_free_rcu(grp);
 117 }
 118
 119 int erofs_workgroup_put(struct erofs_workgroup *grp)
 120 {
 121         int count = atomic_dec_return(&grp->refcount);
 122
 123         if (count == 1)
 124                 atomic_long_inc(&erofs_global_shrink_cnt);
 125         else if (!count)
 126                 __erofs_workgroup_free(grp);
 127         return count;
 128 }
 129
 130 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 131 /* for cache-managed case, customized reclaim paths exist */
 132 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
 133 {
 134         erofs_workgroup_unfreeze(grp, 0);
 135         __erofs_workgroup_free(grp);
 136 }
 137
 138 bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
 139                                     struct erofs_workgroup *grp,
 140                                     bool cleanup)
 141 {
 142         void *entry;
 143
 144         /*
 145          * for managed cache enabled, the refcount of workgroups
 146          * themselves could be < 0 (freezed). So there is no guarantee
 147          * that all refcount > 0 if managed cache is enabled.
 148          */
 149         if (!erofs_workgroup_try_to_freeze(grp, 1))
 150                 return false;
 151
 152         /*
 153          * note that all cached pages should be unlinked
 154          * before delete it from the radix tree.
 155          * Otherwise some cached pages of an orphan old workgroup
 156          * could be still linked after the new one is available.
 157          */
 158         if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
 159                 erofs_workgroup_unfreeze(grp, 1);
 160                 return false;
 161         }
 162
 163         /*
 164          * it is impossible to fail after the workgroup is freezed,
 165          * however in order to avoid some race conditions, add a
 166          * DBG_BUGON to observe this in advance.
 167          */
 168         entry = radix_tree_delete(&sbi->workstn_tree, grp->index);
 169         DBG_BUGON((void *)((unsigned long)entry &
 170                            ~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp);
 171
 172         /*
 173          * if managed cache is enable, the last refcount
 174          * should indicate the related workstation.
 175          */
 176         erofs_workgroup_unfreeze_final(grp);
 177         return true;
 178 }
 179
 180 #else
 181 /* for nocache case, no customized reclaim path at all */
 182 bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
 183                                     struct erofs_workgroup *grp,
 184                                     bool cleanup)
 185 {
 186         int cnt = atomic_read(&grp->refcount);
 187         void *entry;
 188
 189         DBG_BUGON(cnt <= 0);
 190         DBG_BUGON(cleanup && cnt != 1);
 191
 192         if (cnt > 1)
 193                 return false;
 194
 195         entry = radix_tree_delete(&sbi->workstn_tree, grp->index);
 196         DBG_BUGON((void *)((unsigned long)entry &
 197                            ~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp);
 198
 199         /* (rarely) could be grabbed again when freeing */
 200         erofs_workgroup_put(grp);
 201         return true;
 202 }
 203
 204 #endif
 205
 206 unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
 207                                        unsigned long nr_shrink,
 208                                        bool cleanup)
 209 {
 210         pgoff_t first_index = 0;
 211         void *batch[PAGEVEC_SIZE];
 212         unsigned freed = 0;
 213
 214         int i, found;
 215 repeat:
 216         erofs_workstn_lock(sbi);
 217
 218         found = radix_tree_gang_lookup(&sbi->workstn_tree,
 219                 batch, first_index, PAGEVEC_SIZE);
 220
 221         for (i = 0; i < found; ++i) {
 222                 struct erofs_workgroup *grp = (void *)
 223                         ((unsigned long)batch[i] &
 224                                 ~RADIX_TREE_EXCEPTIONAL_ENTRY);
 225
 226                 first_index = grp->index + 1;
 227
 228                 /* try to shrink each valid workgroup */
 229                 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
 230                         continue;
 231
 232                 ++freed;
 233                 if (unlikely(!--nr_shrink))
 234                         break;
 235         }
 236         erofs_workstn_unlock(sbi);
 237
 238         if (i && nr_shrink)
 239                 goto repeat;
 240         return freed;
 241 }
 242
 243 #endif
 244
 245 /* protected by 'erofs_sb_list_lock' */
 246 static unsigned int shrinker_run_no;
 247
 248 /* protects the mounted 'erofs_sb_list' */
 249 static DEFINE_SPINLOCK(erofs_sb_list_lock);
 250 static LIST_HEAD(erofs_sb_list);
 251
 252 void erofs_register_super(struct super_block *sb)
 253 {
 254         struct erofs_sb_info *sbi = EROFS_SB(sb);
 255
 256         mutex_init(&sbi->umount_mutex);
 257
 258         spin_lock(&erofs_sb_list_lock);
 259         list_add(&sbi->list, &erofs_sb_list);
 260         spin_unlock(&erofs_sb_list_lock);
 261 }
 262
 263 void erofs_unregister_super(struct super_block *sb)
 264 {
 265         spin_lock(&erofs_sb_list_lock);
 266         list_del(&EROFS_SB(sb)->list);
 267         spin_unlock(&erofs_sb_list_lock);
 268 }
 269
 270 unsigned long erofs_shrink_count(struct shrinker *shrink,
 271                                  struct shrink_control *sc)
 272 {
 273         return atomic_long_read(&erofs_global_shrink_cnt);
 274 }
 275
 276 unsigned long erofs_shrink_scan(struct shrinker *shrink,
 277                                 struct shrink_control *sc)
 278 {
 279         struct erofs_sb_info *sbi;
 280         struct list_head *p;
 281
 282         unsigned long nr = sc->nr_to_scan;
 283         unsigned int run_no;
 284         unsigned long freed = 0;
 285
 286         spin_lock(&erofs_sb_list_lock);
 287         do
 288                 run_no = ++shrinker_run_no;
 289         while (run_no == 0);
 290
 291         /* Iterate over all mounted superblocks and try to shrink them */
 292         p = erofs_sb_list.next;
 293         while (p != &erofs_sb_list) {
 294                 sbi = list_entry(p, struct erofs_sb_info, list);
 295
 296                 /*
 297                  * We move the ones we do to the end of the list, so we stop
 298                  * when we see one we have already done.
 299                  */
 300                 if (sbi->shrinker_run_no == run_no)
 301                         break;
 302
 303                 if (!mutex_trylock(&sbi->umount_mutex)) {
 304                         p = p->next;
 305                         continue;
 306                 }
 307
 308                 spin_unlock(&erofs_sb_list_lock);
 309                 sbi->shrinker_run_no = run_no;
 310
 311 #ifdef CONFIG_EROFS_FS_ZIP
 312                 freed += erofs_shrink_workstation(sbi, nr - freed, false);
 313 #endif
 314
 315                 spin_lock(&erofs_sb_list_lock);
 316                 /* Get the next list element before we move this one */
 317                 p = p->next;
 318
 319                 /*
 320                  * Move this one to the end of the list to provide some
 321                  * fairness.
 322                  */
 323                 list_move_tail(&sbi->list, &erofs_sb_list);
 324                 mutex_unlock(&sbi->umount_mutex);
 325
 326                 if (freed >= nr)
 327                         break;
 328         }
 329         spin_unlock(&erofs_sb_list_lock);
 330         return freed;
 331 }
 332