2 * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/file.h>
34 #include <linux/anon_inodes.h>
35 #include <rdma/ib_verbs.h>
36 #include <rdma/uverbs_types.h>
37 #include <linux/rcupdate.h>
38 #include <rdma/uverbs_ioctl.h>
39 #include <rdma/rdma_user_ioctl.h>
41 #include "core_priv.h"
42 #include "rdma_core.h"
44 int uverbs_ns_idx(u16 *id, unsigned int ns_count)
46 int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
51 *id &= ~UVERBS_ID_NS_MASK;
55 const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
58 const struct uverbs_root_spec *object_hash = ibdev->specs_root;
59 const struct uverbs_object_spec_hash *objects;
60 int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
65 objects = object_hash->object_buckets[ret];
67 if (object >= objects->num_objects)
70 return objects->objects[object];
73 const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
76 const struct uverbs_method_spec_hash *methods;
77 int ret = uverbs_ns_idx(&method, object->num_buckets);
82 methods = object->method_buckets[ret];
83 if (method >= methods->num_methods)
86 return methods->methods[method];
89 void uverbs_uobject_get(struct ib_uobject *uobject)
91 kref_get(&uobject->ref);
94 static void uverbs_uobject_free(struct kref *ref)
96 struct ib_uobject *uobj =
97 container_of(ref, struct ib_uobject, ref);
99 if (uobj->type->type_class->needs_kfree_rcu)
100 kfree_rcu(uobj, rcu);
105 void uverbs_uobject_put(struct ib_uobject *uobject)
107 kref_put(&uobject->ref, uverbs_uobject_free);
110 static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
113 * When a shared access is required, we use a positive counter. Each
114 * shared access request checks that the value != -1 and increment it.
115 * Exclusive access is required for operations like write or destroy.
116 * In exclusive access mode, we check that the counter is zero (nobody
117 * claimed this object) and we set it to -1. Releasing a shared access
118 * lock is done simply by decreasing the counter. As for exclusive
119 * access locks, since only a single one of them is is allowed
120 * concurrently, setting the counter to zero is enough for releasing
124 return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
127 /* lock is either WRITE or DESTROY - should be exclusive */
128 return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
131 static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
132 const struct uverbs_obj_type *type)
134 struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
137 return ERR_PTR(-ENOMEM);
139 * user_handle should be filled by the handler,
140 * The object is added to the list in the commit stage.
142 uobj->context = context;
144 atomic_set(&uobj->usecnt, 0);
145 kref_init(&uobj->ref);
150 static int idr_add_uobj(struct ib_uobject *uobj)
154 idr_preload(GFP_KERNEL);
155 spin_lock(&uobj->context->ufile->idr_lock);
158 * We start with allocating an idr pointing to NULL. This represents an
159 * object which isn't initialized yet. We'll replace it later on with
160 * the real object once we commit.
162 ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
163 min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
167 spin_unlock(&uobj->context->ufile->idr_lock);
170 return ret < 0 ? ret : 0;
174 * It only removes it from the uobjects list, uverbs_uobject_put() is still
177 static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
179 spin_lock(&uobj->context->ufile->idr_lock);
180 idr_remove(&uobj->context->ufile->idr, uobj->id);
181 spin_unlock(&uobj->context->ufile->idr_lock);
184 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
185 static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
186 struct ib_ucontext *ucontext,
187 int id, bool exclusive)
189 struct ib_uobject *uobj;
192 /* object won't be released as we're protected in rcu */
193 uobj = idr_find(&ucontext->ufile->idr, id);
195 uobj = ERR_PTR(-ENOENT);
200 * The idr_find is guaranteed to return a pointer to something that
201 * isn't freed yet, or NULL, as the free after idr_remove goes through
202 * kfree_rcu(). However the object may still have been released and
203 * kfree() could be called at any time.
205 if (!kref_get_unless_zero(&uobj->ref))
206 uobj = ERR_PTR(-ENOENT);
213 static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
214 struct ib_ucontext *ucontext,
215 int id, bool exclusive)
218 struct ib_uobject *uobject;
219 const struct uverbs_obj_fd_type *fd_type =
220 container_of(type, struct uverbs_obj_fd_type, type);
223 return ERR_PTR(-EOPNOTSUPP);
227 return ERR_PTR(-EBADF);
229 uobject = f->private_data;
231 * fget(id) ensures we are not currently running uverbs_close_fd,
232 * and the caller is expected to ensure that uverbs_close_fd is never
233 * done while a call top lookup is possible.
235 if (f->f_op != fd_type->fops) {
237 return ERR_PTR(-EBADF);
240 uverbs_uobject_get(uobject);
244 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
245 struct ib_ucontext *ucontext,
246 int id, bool exclusive)
248 struct ib_uobject *uobj;
251 uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
255 if (uobj->type != type) {
260 ret = uverbs_try_lock_object(uobj, exclusive);
262 WARN(ucontext->cleanup_reason,
263 "ib_uverbs: Trying to lookup_get while cleanup context\n");
269 uobj->type->type_class->lookup_put(uobj, exclusive);
270 uverbs_uobject_put(uobj);
274 static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
275 struct ib_ucontext *ucontext)
278 struct ib_uobject *uobj;
280 uobj = alloc_uobj(ucontext, type);
284 ret = idr_add_uobj(uobj);
288 ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
289 RDMACG_RESOURCE_HCA_OBJECT);
296 uverbs_idr_remove_uobj(uobj);
298 uverbs_uobject_put(uobj);
302 static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
303 struct ib_ucontext *ucontext)
305 const struct uverbs_obj_fd_type *fd_type =
306 container_of(type, struct uverbs_obj_fd_type, type);
308 struct ib_uobject *uobj;
309 struct ib_uobject_file *uobj_file;
312 new_fd = get_unused_fd_flags(O_CLOEXEC);
314 return ERR_PTR(new_fd);
316 uobj = alloc_uobj(ucontext, type);
318 put_unused_fd(new_fd);
322 uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
323 filp = anon_inode_getfile(fd_type->name,
328 put_unused_fd(new_fd);
329 uverbs_uobject_put(uobj);
333 uobj_file->uobj.id = new_fd;
334 uobj_file->uobj.object = filp;
335 uobj_file->ufile = ucontext->ufile;
336 INIT_LIST_HEAD(&uobj->list);
337 kref_get(&uobj_file->ufile->ref);
342 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
343 struct ib_ucontext *ucontext)
345 return type->type_class->alloc_begin(type, ucontext);
348 static void uverbs_uobject_add(struct ib_uobject *uobject)
350 mutex_lock(&uobject->context->uobjects_lock);
351 list_add(&uobject->list, &uobject->context->uobjects);
352 mutex_unlock(&uobject->context->uobjects_lock);
355 static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
356 enum rdma_remove_reason why)
358 const struct uverbs_obj_idr_type *idr_type =
359 container_of(uobj->type, struct uverbs_obj_idr_type,
361 int ret = idr_type->destroy_object(uobj, why);
364 * We can only fail gracefully if the user requested to destroy the
365 * object. In the rest of the cases, just remove whatever you can.
367 if (why == RDMA_REMOVE_DESTROY && ret)
370 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
371 RDMACG_RESOURCE_HCA_OBJECT);
372 uverbs_idr_remove_uobj(uobj);
377 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
379 struct ib_uobject_file *uobj_file =
380 container_of(uobj, struct ib_uobject_file, uobj);
381 struct file *filp = uobj->object;
382 int id = uobj_file->uobj.id;
384 /* Unsuccessful NEW */
389 static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
390 enum rdma_remove_reason why)
392 const struct uverbs_obj_fd_type *fd_type =
393 container_of(uobj->type, struct uverbs_obj_fd_type, type);
394 struct ib_uobject_file *uobj_file =
395 container_of(uobj, struct ib_uobject_file, uobj);
396 int ret = fd_type->context_closed(uobj_file, why);
398 if (why == RDMA_REMOVE_DESTROY && ret)
401 if (why == RDMA_REMOVE_DURING_CLEANUP) {
402 alloc_abort_fd_uobject(uobj);
406 uobj_file->uobj.context = NULL;
410 static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
412 #ifdef CONFIG_LOCKDEP
414 WARN_ON(atomic_read(&uobj->usecnt) != -1);
416 WARN_ON(atomic_read(&uobj->usecnt) <= 0);
420 static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
421 enum rdma_remove_reason why)
424 struct ib_ucontext *ucontext = uobj->context;
426 ret = uobj->type->type_class->remove_commit(uobj, why);
427 if (ret && why == RDMA_REMOVE_DESTROY) {
428 /* We couldn't remove the object, so just unlock the uobject */
429 atomic_set(&uobj->usecnt, 0);
430 uobj->type->type_class->lookup_put(uobj, true);
432 mutex_lock(&ucontext->uobjects_lock);
433 list_del(&uobj->list);
434 mutex_unlock(&ucontext->uobjects_lock);
435 /* put the ref we took when we created the object */
436 uverbs_uobject_put(uobj);
442 /* This is called only for user requested DESTROY reasons */
443 int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
446 struct ib_ucontext *ucontext = uobj->context;
448 /* put the ref count we took at lookup_get */
449 uverbs_uobject_put(uobj);
450 /* Cleanup is running. Calling this should have been impossible */
451 if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
452 WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
455 assert_uverbs_usecnt(uobj, true);
456 ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
458 up_read(&ucontext->cleanup_rwsem);
462 static int null_obj_type_class_remove_commit(struct ib_uobject *uobj,
463 enum rdma_remove_reason why)
468 static const struct uverbs_obj_type null_obj_type = {
469 .type_class = &((const struct uverbs_obj_type_class){
470 .remove_commit = null_obj_type_class_remove_commit,
472 .needs_kfree_rcu = true}),
475 int rdma_explicit_destroy(struct ib_uobject *uobject)
478 struct ib_ucontext *ucontext = uobject->context;
480 /* Cleanup is running. Calling this should have been impossible */
481 if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
482 WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
485 assert_uverbs_usecnt(uobject, true);
486 ret = uobject->type->type_class->remove_commit(uobject,
487 RDMA_REMOVE_DESTROY);
491 uobject->type = &null_obj_type;
494 up_read(&ucontext->cleanup_rwsem);
498 static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
500 uverbs_uobject_add(uobj);
501 spin_lock(&uobj->context->ufile->idr_lock);
503 * We already allocated this IDR with a NULL object, so
504 * this shouldn't fail.
506 WARN_ON(idr_replace(&uobj->context->ufile->idr,
508 spin_unlock(&uobj->context->ufile->idr_lock);
511 static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
513 struct ib_uobject_file *uobj_file =
514 container_of(uobj, struct ib_uobject_file, uobj);
516 uverbs_uobject_add(&uobj_file->uobj);
517 fd_install(uobj_file->uobj.id, uobj->object);
518 /* This shouldn't be used anymore. Use the file object instead */
519 uobj_file->uobj.id = 0;
520 /* Get another reference as we export this to the fops */
521 uverbs_uobject_get(&uobj_file->uobj);
524 int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
526 /* Cleanup is running. Calling this should have been impossible */
527 if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
530 WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
531 ret = uobj->type->type_class->remove_commit(uobj,
532 RDMA_REMOVE_DURING_CLEANUP);
534 pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
539 uobj->type->type_class->alloc_commit(uobj);
540 up_read(&uobj->context->cleanup_rwsem);
545 static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
547 uverbs_idr_remove_uobj(uobj);
548 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
549 RDMACG_RESOURCE_HCA_OBJECT);
550 uverbs_uobject_put(uobj);
553 void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
555 uobj->type->type_class->alloc_abort(uobj);
558 static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
562 static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
564 struct file *filp = uobj->object;
567 /* This indirectly calls uverbs_close_fd and free the object */
571 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
573 assert_uverbs_usecnt(uobj, exclusive);
574 uobj->type->type_class->lookup_put(uobj, exclusive);
576 * In order to unlock an object, either decrease its usecnt for
577 * read access or zero it in case of exclusive access. See
578 * uverbs_try_lock_object for locking schema information.
581 atomic_dec(&uobj->usecnt);
583 atomic_set(&uobj->usecnt, 0);
585 uverbs_uobject_put(uobj);
588 const struct uverbs_obj_type_class uverbs_idr_class = {
589 .alloc_begin = alloc_begin_idr_uobject,
590 .lookup_get = lookup_get_idr_uobject,
591 .alloc_commit = alloc_commit_idr_uobject,
592 .alloc_abort = alloc_abort_idr_uobject,
593 .lookup_put = lookup_put_idr_uobject,
594 .remove_commit = remove_commit_idr_uobject,
596 * When we destroy an object, we first just lock it for WRITE and
597 * actually DESTROY it in the finalize stage. So, the problematic
598 * scenario is when we just started the finalize stage of the
599 * destruction (nothing was executed yet). Now, the other thread
600 * fetched the object for READ access, but it didn't lock it yet.
601 * The DESTROY thread continues and starts destroying the object.
602 * When the other thread continue - without the RCU, it would
603 * access freed memory. However, the rcu_read_lock delays the free
604 * until the rcu_read_lock of the READ operation quits. Since the
605 * exclusive lock of the object is still taken by the DESTROY flow, the
606 * READ operation will get -EBUSY and it'll just bail out.
608 .needs_kfree_rcu = true,
611 static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
613 struct ib_ucontext *ucontext;
614 struct ib_uverbs_file *ufile = uobj_file->ufile;
617 mutex_lock(&uobj_file->ufile->cleanup_mutex);
619 /* uobject was either already cleaned up or is cleaned up right now anyway */
620 if (!uobj_file->uobj.context ||
621 !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
624 ucontext = uobj_file->uobj.context;
625 ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
626 up_read(&ucontext->cleanup_rwsem);
628 pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
630 mutex_unlock(&ufile->cleanup_mutex);
633 void uverbs_close_fd(struct file *f)
635 struct ib_uobject_file *uobj_file = f->private_data;
636 struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
638 _uverbs_close_fd(uobj_file);
639 uverbs_uobject_put(&uobj_file->uobj);
640 kref_put(uverbs_file_ref, ib_uverbs_release_file);
643 void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
645 enum rdma_remove_reason reason = device_removed ?
646 RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
647 unsigned int cur_order = 0;
649 ucontext->cleanup_reason = reason;
651 * Waits for all remove_commit and alloc_commit to finish. Logically, We
652 * want to hold this forever as the context is going to be destroyed,
653 * but we'll release it since it causes a "held lock freed" BUG message.
655 down_write(&ucontext->cleanup_rwsem);
657 while (!list_empty(&ucontext->uobjects)) {
658 struct ib_uobject *obj, *next_obj;
659 unsigned int next_order = UINT_MAX;
662 * This shouldn't run while executing other commands on this
663 * context. Thus, the only thing we should take care of is
664 * releasing a FD while traversing this list. The FD could be
665 * closed and released from the _release fop of this FD.
666 * In order to mitigate this, we add a lock.
667 * We take and release the lock per order traversal in order
668 * to let other threads (which might still use the FDs) chance
671 mutex_lock(&ucontext->uobjects_lock);
672 list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
674 if (obj->type->destroy_order == cur_order) {
678 * if we hit this WARN_ON, that means we are
679 * racing with a lookup_get.
681 WARN_ON(uverbs_try_lock_object(obj, true));
682 ret = obj->type->type_class->remove_commit(obj,
684 list_del(&obj->list);
686 pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
688 /* put the ref we took when we created the object */
689 uverbs_uobject_put(obj);
691 next_order = min(next_order,
692 obj->type->destroy_order);
695 mutex_unlock(&ucontext->uobjects_lock);
696 cur_order = next_order;
698 up_write(&ucontext->cleanup_rwsem);
701 void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
703 ucontext->cleanup_reason = 0;
704 mutex_init(&ucontext->uobjects_lock);
705 INIT_LIST_HEAD(&ucontext->uobjects);
706 init_rwsem(&ucontext->cleanup_rwsem);
709 const struct uverbs_obj_type_class uverbs_fd_class = {
710 .alloc_begin = alloc_begin_fd_uobject,
711 .lookup_get = lookup_get_fd_uobject,
712 .alloc_commit = alloc_commit_fd_uobject,
713 .alloc_abort = alloc_abort_fd_uobject,
714 .lookup_put = lookup_put_fd_uobject,
715 .remove_commit = remove_commit_fd_uobject,
716 .needs_kfree_rcu = false,
719 struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
720 struct ib_ucontext *ucontext,
721 enum uverbs_obj_access access,
725 case UVERBS_ACCESS_READ:
726 return rdma_lookup_get_uobject(type_attrs, ucontext, id, false);
727 case UVERBS_ACCESS_DESTROY:
728 case UVERBS_ACCESS_WRITE:
729 return rdma_lookup_get_uobject(type_attrs, ucontext, id, true);
730 case UVERBS_ACCESS_NEW:
731 return rdma_alloc_begin_uobject(type_attrs, ucontext);
734 return ERR_PTR(-EOPNOTSUPP);
738 int uverbs_finalize_object(struct ib_uobject *uobj,
739 enum uverbs_obj_access access,
745 * refcounts should be handled at the object level and not at the
746 * uobject level. Refcounts of the objects themselves are done in
751 case UVERBS_ACCESS_READ:
752 rdma_lookup_put_uobject(uobj, false);
754 case UVERBS_ACCESS_WRITE:
755 rdma_lookup_put_uobject(uobj, true);
757 case UVERBS_ACCESS_DESTROY:
759 ret = rdma_remove_commit_uobject(uobj);
761 rdma_lookup_put_uobject(uobj, true);
763 case UVERBS_ACCESS_NEW:
765 ret = rdma_alloc_commit_uobject(uobj);
767 rdma_alloc_abort_uobject(uobj);
777 int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
778 struct uverbs_attr_spec_hash * const *spec_hash,
785 for (i = 0; i < num; i++) {
786 struct uverbs_attr_bundle_hash *curr_bundle =
787 &attrs_bundle->hash[i];
788 const struct uverbs_attr_spec_hash *curr_spec_bucket =
792 for (j = 0; j < curr_bundle->num_attrs; j++) {
793 struct uverbs_attr *attr;
794 const struct uverbs_attr_spec *spec;
796 if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
799 attr = &curr_bundle->attrs[j];
800 spec = &curr_spec_bucket->attrs[j];
802 if (spec->type == UVERBS_ATTR_TYPE_IDR ||
803 spec->type == UVERBS_ATTR_TYPE_FD) {
806 current_ret = uverbs_finalize_object(attr->obj_attr.uobject,