page_pool/helpers.h

   1 /* SPDX-License-Identifier: GPL-2.0
   2  *
   3  * page_pool/helpers.h
   4  *      Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
   5  *      Copyright (C) 2016 Red Hat, Inc.
   6  */
   7
   8 /**
   9  * DOC: page_pool allocator
  10  *
  11  * The page_pool allocator is optimized for recycling page or page fragment used
  12  * by skb packet and xdp frame.
  13  *
  14  * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(),
  15  * which allocate memory with or without page splitting depending on the
  16  * requested memory size.
  17  *
  18  * If the driver knows that it always requires full pages or its allocations are
  19  * always smaller than half a page, it can use one of the more specific API
  20  * calls:
  21  *
  22  * 1. page_pool_alloc_pages(): allocate memory without page splitting when
  23  * driver knows that the memory it need is always bigger than half of the page
  24  * allocated from page pool. There is no cache line dirtying for 'struct page'
  25  * when a page is recycled back to the page pool.
  26  *
  27  * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver
  28  * knows that the memory it need is always smaller than or equal to half of the
  29  * page allocated from page pool. Page splitting enables memory saving and thus
  30  * avoids TLB/cache miss for data access, but there also is some cost to
  31  * implement page splitting, mainly some cache line dirtying/bouncing for
  32  * 'struct page' and atomic operation for page->pp_frag_count.
  33  *
  34  * The API keeps track of in-flight pages, in order to let API users know when
  35  * it is safe to free a page_pool object, the API users must call
  36  * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or
  37  * attach the page_pool object to a page_pool-aware object like skbs marked with
  38  * skb_mark_for_recycle().
  39  *
  40  * page_pool_put_page() may be called multi times on the same page if a page is
  41  * split into multi fragments. For the last fragment, it will either recycle the
  42  * page, or in case of page->_refcount > 1, it will release the DMA mapping and
  43  * in-flight state accounting.
  44  *
  45  * dma_sync_single_range_for_device() is only called for the last fragment when
  46  * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
  47  * last freed fragment to do the sync_for_device operation for all fragments in
  48  * the same page when a page is split, the API user must setup pool->p.max_len
  49  * and pool->p.offset correctly and ensure that page_pool_put_page() is called
  50  * with dma_sync_size being -1 for fragment API.
  51  */
  52 #ifndef _NET_PAGE_POOL_HELPERS_H
  53 #define _NET_PAGE_POOL_HELPERS_H
  54
  55 #include <net/page_pool/types.h>
  56
  57 #ifdef CONFIG_PAGE_POOL_STATS
  58 int page_pool_ethtool_stats_get_count(void);
  59 u8 *page_pool_ethtool_stats_get_strings(u8 *data);
  60 u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
  61
  62 /*
  63  * Drivers that wish to harvest page pool stats and report them to users
  64  * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
  65  * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
  66  */
  67 bool page_pool_get_stats(struct page_pool *pool,
  68                          struct page_pool_stats *stats);
  69 #else
  70 static inline int page_pool_ethtool_stats_get_count(void)
  71 {
  72         return 0;
  73 }
  74
  75 static inline u8 *page_pool_ethtool_stats_get_strings(u8 *data)
  76 {
  77         return data;
  78 }
  79
  80 static inline u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
  81 {
  82         return data;
  83 }
  84 #endif
  85
  86 /**
  87  * page_pool_dev_alloc_pages() - allocate a page.
  88  * @pool:       pool from which to allocate
  89  *
  90  * Get a page from the page allocator or page_pool caches.
  91  */
  92 static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
  93 {
  94         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
  95
  96         return page_pool_alloc_pages(pool, gfp);
  97 }
  98
  99 /**
 100  * page_pool_dev_alloc_frag() - allocate a page fragment.
 101  * @pool: pool from which to allocate
 102  * @offset: offset to the allocated page
 103  * @size: requested size
 104  *
 105  * Get a page fragment from the page allocator or page_pool caches.
 106  *
 107  * Return:
 108  * Return allocated page fragment, otherwise return NULL.
 109  */
 110 static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
 111                                                     unsigned int *offset,
 112                                                     unsigned int size)
 113 {
 114         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
 115
 116         return page_pool_alloc_frag(pool, offset, size, gfp);
 117 }
 118
 119 static inline struct page *page_pool_alloc(struct page_pool *pool,
 120                                            unsigned int *offset,
 121                                            unsigned int *size, gfp_t gfp)
 122 {
 123         unsigned int max_size = PAGE_SIZE << pool->p.order;
 124         struct page *page;
 125
 126         if ((*size << 1) > max_size) {
 127                 *size = max_size;
 128                 *offset = 0;
 129                 return page_pool_alloc_pages(pool, gfp);
 130         }
 131
 132         page = page_pool_alloc_frag(pool, offset, *size, gfp);
 133         if (unlikely(!page))
 134                 return NULL;
 135
 136         /* There is very likely not enough space for another fragment, so append
 137          * the remaining size to the current fragment to avoid truesize
 138          * underestimate problem.
 139          */
 140         if (pool->frag_offset + *size > max_size) {
 141                 *size = max_size - *offset;
 142                 pool->frag_offset = max_size;
 143         }
 144
 145         return page;
 146 }
 147
 148 /**
 149  * page_pool_dev_alloc() - allocate a page or a page fragment.
 150  * @pool: pool from which to allocate
 151  * @offset: offset to the allocated page
 152  * @size: in as the requested size, out as the allocated size
 153  *
 154  * Get a page or a page fragment from the page allocator or page_pool caches
 155  * depending on the requested size in order to allocate memory with least memory
 156  * utilization and performance penalty.
 157  *
 158  * Return:
 159  * Return allocated page or page fragment, otherwise return NULL.
 160  */
 161 static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
 162                                                unsigned int *offset,
 163                                                unsigned int *size)
 164 {
 165         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
 166
 167         return page_pool_alloc(pool, offset, size, gfp);
 168 }
 169
 170 static inline void *page_pool_alloc_va(struct page_pool *pool,
 171                                        unsigned int *size, gfp_t gfp)
 172 {
 173         unsigned int offset;
 174         struct page *page;
 175
 176         /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
 177         page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM);
 178         if (unlikely(!page))
 179                 return NULL;
 180
 181         return page_address(page) + offset;
 182 }
 183
 184 /**
 185  * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its
 186  *                            va.
 187  * @pool: pool from which to allocate
 188  * @size: in as the requested size, out as the allocated size
 189  *
 190  * This is just a thin wrapper around the page_pool_alloc() API, and
 191  * it returns va of the allocated page or page fragment.
 192  *
 193  * Return:
 194  * Return the va for the allocated page or page fragment, otherwise return NULL.
 195  */
 196 static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
 197                                            unsigned int *size)
 198 {
 199         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
 200
 201         return page_pool_alloc_va(pool, size, gfp);
 202 }
 203
 204 /**
 205  * page_pool_get_dma_dir() - Retrieve the stored DMA direction.
 206  * @pool:       pool from which page was allocated
 207  *
 208  * Get the stored dma direction. A driver might decide to store this locally
 209  * and avoid the extra cache line from page_pool to determine the direction.
 210  */
 211 static
 212 inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
 213 {
 214         return pool->p.dma_dir;
 215 }
 216
 217 /* pp_frag_count represents the number of writers who can update the page
 218  * either by updating skb->data or via DMA mappings for the device.
 219  * We can't rely on the page refcnt for that as we don't know who might be
 220  * holding page references and we can't reliably destroy or sync DMA mappings
 221  * of the fragments.
 222  *
 223  * When pp_frag_count reaches 0 we can either recycle the page if the page
 224  * refcnt is 1 or return it back to the memory allocator and destroy any
 225  * mappings we have.
 226  */
 227 static inline void page_pool_fragment_page(struct page *page, long nr)
 228 {
 229         atomic_long_set(&page->pp_frag_count, nr);
 230 }
 231
 232 static inline long page_pool_defrag_page(struct page *page, long nr)
 233 {
 234         long ret;
 235
 236         /* If nr == pp_frag_count then we have cleared all remaining
 237          * references to the page:
 238          * 1. 'n == 1': no need to actually overwrite it.
 239          * 2. 'n != 1': overwrite it with one, which is the rare case
 240          *              for pp_frag_count draining.
 241          *
 242          * The main advantage to doing this is that not only we avoid a atomic
 243          * update, as an atomic_read is generally a much cheaper operation than
 244          * an atomic update, especially when dealing with a page that may be
 245          * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count
 246          * handling by ensuring all pages have partitioned into only 1 piece
 247          * initially, and only overwrite it when the page is partitioned into
 248          * more than one piece.
 249          */
 250         if (atomic_long_read(&page->pp_frag_count) == nr) {
 251                 /* As we have ensured nr is always one for constant case using
 252                  * the BUILD_BUG_ON(), only need to handle the non-constant case
 253                  * here for pp_frag_count draining, which is a rare case.
 254                  */
 255                 BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
 256                 if (!__builtin_constant_p(nr))
 257                         atomic_long_set(&page->pp_frag_count, 1);
 258
 259                 return 0;
 260         }
 261
 262         ret = atomic_long_sub_return(nr, &page->pp_frag_count);
 263         WARN_ON(ret < 0);
 264
 265         /* We are the last user here too, reset pp_frag_count back to 1 to
 266          * ensure all pages have been partitioned into 1 piece initially,
 267          * this should be the rare case when the last two fragment users call
 268          * page_pool_defrag_page() currently.
 269          */
 270         if (unlikely(!ret))
 271                 atomic_long_set(&page->pp_frag_count, 1);
 272
 273         return ret;
 274 }
 275
 276 static inline bool page_pool_is_last_frag(struct page *page)
 277 {
 278         /* If page_pool_defrag_page() returns 0, we were the last user */
 279         return page_pool_defrag_page(page, 1) == 0;
 280 }
 281
 282 /**
 283  * page_pool_put_page() - release a reference to a page pool page
 284  * @pool:       pool from which page was allocated
 285  * @page:       page to release a reference on
 286  * @dma_sync_size: how much of the page may have been touched by the device
 287  * @allow_direct: released by the consumer, allow lockless caching
 288  *
 289  * The outcome of this depends on the page refcnt. If the driver bumps
 290  * the refcnt > 1 this will unmap the page. If the page refcnt is 1
 291  * the allocator owns the page and will try to recycle it in one of the pool
 292  * caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device
 293  * using dma_sync_single_range_for_device().
 294  */
 295 static inline void page_pool_put_page(struct page_pool *pool,
 296                                       struct page *page,
 297                                       unsigned int dma_sync_size,
 298                                       bool allow_direct)
 299 {
 300         /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
 301          * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
 302          */
 303 #ifdef CONFIG_PAGE_POOL
 304         if (!page_pool_is_last_frag(page))
 305                 return;
 306
 307         page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
 308 #endif
 309 }
 310
 311 /**
 312  * page_pool_put_full_page() - release a reference on a page pool page
 313  * @pool:       pool from which page was allocated
 314  * @page:       page to release a reference on
 315  * @allow_direct: released by the consumer, allow lockless caching
 316  *
 317  * Similar to page_pool_put_page(), but will DMA sync the entire memory area
 318  * as configured in &page_pool_params.max_len.
 319  */
 320 static inline void page_pool_put_full_page(struct page_pool *pool,
 321                                            struct page *page, bool allow_direct)
 322 {
 323         page_pool_put_page(pool, page, -1, allow_direct);
 324 }
 325
 326 /**
 327  * page_pool_recycle_direct() - release a reference on a page pool page
 328  * @pool:       pool from which page was allocated
 329  * @page:       page to release a reference on
 330  *
 331  * Similar to page_pool_put_full_page() but caller must guarantee safe context
 332  * (e.g NAPI), since it will recycle the page directly into the pool fast cache.
 333  */
 334 static inline void page_pool_recycle_direct(struct page_pool *pool,
 335                                             struct page *page)
 336 {
 337         page_pool_put_full_page(pool, page, true);
 338 }
 339
 340 #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA     \
 341                 (sizeof(dma_addr_t) > sizeof(unsigned long))
 342
 343 /**
 344  * page_pool_free_va() - free a va into the page_pool
 345  * @pool: pool from which va was allocated
 346  * @va: va to be freed
 347  * @allow_direct: freed by the consumer, allow lockless caching
 348  *
 349  * Free a va allocated from page_pool_allo_va().
 350  */
 351 static inline void page_pool_free_va(struct page_pool *pool, void *va,
 352                                      bool allow_direct)
 353 {
 354         page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
 355 }
 356
 357 /**
 358  * page_pool_get_dma_addr() - Retrieve the stored DMA address.
 359  * @page:       page allocated from a page pool
 360  *
 361  * Fetch the DMA address of the page. The page pool to which the page belongs
 362  * must had been created with PP_FLAG_DMA_MAP.
 363  */
 364 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 365 {
 366         dma_addr_t ret = page->dma_addr;
 367
 368         if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
 369                 ret <<= PAGE_SHIFT;
 370
 371         return ret;
 372 }
 373
 374 static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 375 {
 376         if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
 377                 page->dma_addr = addr >> PAGE_SHIFT;
 378
 379                 /* We assume page alignment to shave off bottom bits,
 380                  * if this "compression" doesn't work we need to drop.
 381                  */
 382                 return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT;
 383         }
 384
 385         page->dma_addr = addr;
 386         return false;
 387 }
 388
 389 static inline bool page_pool_put(struct page_pool *pool)
 390 {
 391         return refcount_dec_and_test(&pool->user_cnt);
 392 }
 393
 394 static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
 395 {
 396         if (unlikely(pool->p.nid != new_nid))
 397                 page_pool_update_nid(pool, new_nid);
 398 }
 399
 400 #endif /* _NET_PAGE_POOL_HELPERS_H */