GNU Linux-libre 4.19.211-gnu1
[releases.git] / net / xdp / xdp_umem.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* XDP user-space packet buffer
3  * Copyright(c) 2018 Intel Corporation.
4  */
5
6 #include <linux/init.h>
7 #include <linux/sched/mm.h>
8 #include <linux/sched/signal.h>
9 #include <linux/sched/task.h>
10 #include <linux/uaccess.h>
11 #include <linux/slab.h>
12 #include <linux/bpf.h>
13 #include <linux/mm.h>
14 #include <linux/netdevice.h>
15 #include <linux/rtnetlink.h>
16
17 #include "xdp_umem.h"
18 #include "xsk_queue.h"
19
20 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
21
22 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
23 {
24         unsigned long flags;
25
26         if (!xs->tx)
27                 return;
28
29         spin_lock_irqsave(&umem->xsk_list_lock, flags);
30         list_add_rcu(&xs->list, &umem->xsk_list);
31         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
32 }
33
34 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
35 {
36         unsigned long flags;
37
38         if (!xs->tx)
39                 return;
40
41         spin_lock_irqsave(&umem->xsk_list_lock, flags);
42         list_del_rcu(&xs->list);
43         spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
44 }
45
46 int xdp_umem_query(struct net_device *dev, u16 queue_id)
47 {
48         struct netdev_bpf bpf;
49
50         ASSERT_RTNL();
51
52         memset(&bpf, 0, sizeof(bpf));
53         bpf.command = XDP_QUERY_XSK_UMEM;
54         bpf.xsk.queue_id = queue_id;
55
56         if (!dev->netdev_ops->ndo_bpf)
57                 return 0;
58         return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
59 }
60
61 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
62                         u32 queue_id, u16 flags)
63 {
64         bool force_zc, force_copy;
65         struct netdev_bpf bpf;
66         int err;
67
68         force_zc = flags & XDP_ZEROCOPY;
69         force_copy = flags & XDP_COPY;
70
71         if (force_zc && force_copy)
72                 return -EINVAL;
73
74         if (force_copy)
75                 return 0;
76
77         if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
78                 return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
79
80         bpf.command = XDP_QUERY_XSK_UMEM;
81
82         rtnl_lock();
83         err = xdp_umem_query(dev, queue_id);
84         if (err) {
85                 err = err < 0 ? -EOPNOTSUPP : -EBUSY;
86                 goto err_rtnl_unlock;
87         }
88
89         bpf.command = XDP_SETUP_XSK_UMEM;
90         bpf.xsk.umem = umem;
91         bpf.xsk.queue_id = queue_id;
92
93         err = dev->netdev_ops->ndo_bpf(dev, &bpf);
94         if (err)
95                 goto err_rtnl_unlock;
96         rtnl_unlock();
97
98         dev_hold(dev);
99         umem->dev = dev;
100         umem->queue_id = queue_id;
101         umem->zc = true;
102         return 0;
103
104 err_rtnl_unlock:
105         rtnl_unlock();
106         return force_zc ? err : 0; /* fail or fallback */
107 }
108
109 static void xdp_umem_clear_dev(struct xdp_umem *umem)
110 {
111         struct netdev_bpf bpf;
112         int err;
113
114         if (umem->dev) {
115                 bpf.command = XDP_SETUP_XSK_UMEM;
116                 bpf.xsk.umem = NULL;
117                 bpf.xsk.queue_id = umem->queue_id;
118
119                 rtnl_lock();
120                 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
121                 rtnl_unlock();
122
123                 if (err)
124                         WARN(1, "failed to disable umem!\n");
125
126                 dev_put(umem->dev);
127                 umem->dev = NULL;
128         }
129 }
130
131 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
132 {
133         unsigned int i;
134
135         for (i = 0; i < umem->npgs; i++) {
136                 struct page *page = umem->pgs[i];
137
138                 set_page_dirty_lock(page);
139                 put_page(page);
140         }
141
142         kfree(umem->pgs);
143         umem->pgs = NULL;
144 }
145
146 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
147 {
148         if (umem->user) {
149                 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
150                 free_uid(umem->user);
151         }
152 }
153
154 static void xdp_umem_release(struct xdp_umem *umem)
155 {
156         xdp_umem_clear_dev(umem);
157
158         if (umem->fq) {
159                 xskq_destroy(umem->fq);
160                 umem->fq = NULL;
161         }
162
163         if (umem->cq) {
164                 xskq_destroy(umem->cq);
165                 umem->cq = NULL;
166         }
167
168         xdp_umem_unpin_pages(umem);
169
170         kfree(umem->pages);
171         umem->pages = NULL;
172
173         xdp_umem_unaccount_pages(umem);
174         kfree(umem);
175 }
176
177 static void xdp_umem_release_deferred(struct work_struct *work)
178 {
179         struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
180
181         xdp_umem_release(umem);
182 }
183
184 void xdp_get_umem(struct xdp_umem *umem)
185 {
186         refcount_inc(&umem->users);
187 }
188
189 void xdp_put_umem(struct xdp_umem *umem)
190 {
191         if (!umem)
192                 return;
193
194         if (refcount_dec_and_test(&umem->users)) {
195                 INIT_WORK(&umem->work, xdp_umem_release_deferred);
196                 schedule_work(&umem->work);
197         }
198 }
199
200 static int xdp_umem_pin_pages(struct xdp_umem *umem)
201 {
202         unsigned int gup_flags = FOLL_WRITE;
203         long npgs;
204         int err;
205
206         umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
207                             GFP_KERNEL | __GFP_NOWARN);
208         if (!umem->pgs)
209                 return -ENOMEM;
210
211         down_write(&current->mm->mmap_sem);
212         npgs = get_user_pages(umem->address, umem->npgs,
213                               gup_flags, &umem->pgs[0], NULL);
214         up_write(&current->mm->mmap_sem);
215
216         if (npgs != umem->npgs) {
217                 if (npgs >= 0) {
218                         umem->npgs = npgs;
219                         err = -ENOMEM;
220                         goto out_pin;
221                 }
222                 err = npgs;
223                 goto out_pgs;
224         }
225         return 0;
226
227 out_pin:
228         xdp_umem_unpin_pages(umem);
229 out_pgs:
230         kfree(umem->pgs);
231         umem->pgs = NULL;
232         return err;
233 }
234
235 static int xdp_umem_account_pages(struct xdp_umem *umem)
236 {
237         unsigned long lock_limit, new_npgs, old_npgs;
238
239         if (capable(CAP_IPC_LOCK))
240                 return 0;
241
242         lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
243         umem->user = get_uid(current_user());
244
245         do {
246                 old_npgs = atomic_long_read(&umem->user->locked_vm);
247                 new_npgs = old_npgs + umem->npgs;
248                 if (new_npgs > lock_limit) {
249                         free_uid(umem->user);
250                         umem->user = NULL;
251                         return -ENOBUFS;
252                 }
253         } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
254                                      new_npgs) != old_npgs);
255         return 0;
256 }
257
258 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
259 {
260         u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
261         u64 npgs, addr = mr->addr, size = mr->len;
262         unsigned int chunks, chunks_per_page;
263         int err, i;
264
265         if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
266                 /* Strictly speaking we could support this, if:
267                  * - huge pages, or*
268                  * - using an IOMMU, or
269                  * - making sure the memory area is consecutive
270                  * but for now, we simply say "computer says no".
271                  */
272                 return -EINVAL;
273         }
274
275         if (!is_power_of_2(chunk_size))
276                 return -EINVAL;
277
278         if (!PAGE_ALIGNED(addr)) {
279                 /* Memory area has to be page size aligned. For
280                  * simplicity, this might change.
281                  */
282                 return -EINVAL;
283         }
284
285         if ((addr + size) < addr)
286                 return -EINVAL;
287
288         npgs = div_u64(size, PAGE_SIZE);
289         if (npgs > U32_MAX)
290                 return -EINVAL;
291
292         chunks = (unsigned int)div_u64(size, chunk_size);
293         if (chunks == 0)
294                 return -EINVAL;
295
296         chunks_per_page = PAGE_SIZE / chunk_size;
297         if (chunks < chunks_per_page || chunks % chunks_per_page)
298                 return -EINVAL;
299
300         headroom = ALIGN(headroom, 64);
301
302         if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
303                 return -EINVAL;
304
305         umem->address = (unsigned long)addr;
306         umem->props.chunk_mask = ~((u64)chunk_size - 1);
307         umem->props.size = size;
308         umem->headroom = headroom;
309         umem->chunk_size_nohr = chunk_size - headroom;
310         umem->npgs = (u32)npgs;
311         umem->pgs = NULL;
312         umem->user = NULL;
313         INIT_LIST_HEAD(&umem->xsk_list);
314         spin_lock_init(&umem->xsk_list_lock);
315
316         refcount_set(&umem->users, 1);
317
318         err = xdp_umem_account_pages(umem);
319         if (err)
320                 return err;
321
322         err = xdp_umem_pin_pages(umem);
323         if (err)
324                 goto out_account;
325
326         umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
327         if (!umem->pages) {
328                 err = -ENOMEM;
329                 goto out_pin;
330         }
331
332         for (i = 0; i < umem->npgs; i++)
333                 umem->pages[i].addr = page_address(umem->pgs[i]);
334
335         return 0;
336
337 out_pin:
338         xdp_umem_unpin_pages(umem);
339 out_account:
340         xdp_umem_unaccount_pages(umem);
341         return err;
342 }
343
344 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
345 {
346         struct xdp_umem *umem;
347         int err;
348
349         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
350         if (!umem)
351                 return ERR_PTR(-ENOMEM);
352
353         err = xdp_umem_reg(umem, mr);
354         if (err) {
355                 kfree(umem);
356                 return ERR_PTR(err);
357         }
358
359         return umem;
360 }
361
362 bool xdp_umem_validate_queues(struct xdp_umem *umem)
363 {
364         return umem->fq && umem->cq;
365 }