GNU Linux-libre 5.10.153-gnu1
[releases.git] / drivers / infiniband / hw / mlx5 / devx.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/mlx5_user_ioctl_cmds.h>
11 #include <rdma/mlx5_user_ioctl_verbs.h>
12 #include <rdma/ib_umem.h>
13 #include <rdma/uverbs_std_types.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/fs.h>
16 #include "mlx5_ib.h"
17 #include "devx.h"
18 #include "qp.h"
19 #include <linux/xarray.h>
20
21 #define UVERBS_MODULE_NAME mlx5_ib
22 #include <rdma/uverbs_named_ioctl.h>
23
24 static void dispatch_event_fd(struct list_head *fd_list, const void *data);
25
26 enum devx_obj_flags {
27         DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
28         DEVX_OBJ_FLAGS_DCT = 1 << 1,
29         DEVX_OBJ_FLAGS_CQ = 1 << 2,
30 };
31
32 struct devx_async_data {
33         struct mlx5_ib_dev *mdev;
34         struct list_head list;
35         struct devx_async_cmd_event_file *ev_file;
36         struct mlx5_async_work cb_work;
37         u16 cmd_out_len;
38         /* must be last field in this structure */
39         struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
40 };
41
42 struct devx_async_event_data {
43         struct list_head list; /* headed in ev_file->event_list */
44         struct mlx5_ib_uapi_devx_async_event_hdr hdr;
45 };
46
47 /* first level XA value data structure */
48 struct devx_event {
49         struct xarray object_ids; /* second XA level, Key = object id */
50         struct list_head unaffiliated_list;
51 };
52
53 /* second level XA value data structure */
54 struct devx_obj_event {
55         struct rcu_head rcu;
56         struct list_head obj_sub_list;
57 };
58
59 struct devx_event_subscription {
60         struct list_head file_list; /* headed in ev_file->
61                                      * subscribed_events_list
62                                      */
63         struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
64                                    * devx_obj_event->obj_sub_list
65                                    */
66         struct list_head obj_list; /* headed in devx_object */
67         struct list_head event_list; /* headed in ev_file->event_list or in
68                                       * temp list via subscription
69                                       */
70
71         u8 is_cleaned:1;
72         u32 xa_key_level1;
73         u32 xa_key_level2;
74         struct rcu_head rcu;
75         u64 cookie;
76         struct devx_async_event_file *ev_file;
77         struct eventfd_ctx *eventfd;
78 };
79
80 struct devx_async_event_file {
81         struct ib_uobject uobj;
82         /* Head of events that are subscribed to this FD */
83         struct list_head subscribed_events_list;
84         spinlock_t lock;
85         wait_queue_head_t poll_wait;
86         struct list_head event_list;
87         struct mlx5_ib_dev *dev;
88         u8 omit_data:1;
89         u8 is_overflow_err:1;
90         u8 is_destroyed:1;
91 };
92
93 struct devx_umem {
94         struct mlx5_core_dev            *mdev;
95         struct ib_umem                  *umem;
96         u32                             page_offset;
97         int                             page_shift;
98         int                             ncont;
99         u32                             dinlen;
100         u32                             dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
101 };
102
103 struct devx_umem_reg_cmd {
104         void                            *in;
105         u32                             inlen;
106         u32                             out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
107 };
108
109 static struct mlx5_ib_ucontext *
110 devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
111 {
112         return to_mucontext(ib_uverbs_get_ucontext(attrs));
113 }
114
115 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
116 {
117         u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
118         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
119         void *uctx;
120         int err;
121         u16 uid;
122         u32 cap = 0;
123
124         /* 0 means not supported */
125         if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
126                 return -EINVAL;
127
128         uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
129         if (is_user && capable(CAP_NET_RAW) &&
130             (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
131                 cap |= MLX5_UCTX_CAP_RAW_TX;
132         if (is_user && capable(CAP_SYS_RAWIO) &&
133             (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
134              MLX5_UCTX_CAP_INTERNAL_DEV_RES))
135                 cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
136
137         MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
138         MLX5_SET(uctx, uctx, cap, cap);
139
140         err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
141         if (err)
142                 return err;
143
144         uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
145         return uid;
146 }
147
148 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
149 {
150         u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
151         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
152
153         MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
154         MLX5_SET(destroy_uctx_in, in, uid, uid);
155
156         mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
157 }
158
159 static bool is_legacy_unaffiliated_event_num(u16 event_num)
160 {
161         switch (event_num) {
162         case MLX5_EVENT_TYPE_PORT_CHANGE:
163                 return true;
164         default:
165                 return false;
166         }
167 }
168
169 static bool is_legacy_obj_event_num(u16 event_num)
170 {
171         switch (event_num) {
172         case MLX5_EVENT_TYPE_PATH_MIG:
173         case MLX5_EVENT_TYPE_COMM_EST:
174         case MLX5_EVENT_TYPE_SQ_DRAINED:
175         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
176         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
177         case MLX5_EVENT_TYPE_CQ_ERROR:
178         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
179         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
180         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
181         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
182         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
183         case MLX5_EVENT_TYPE_DCT_DRAINED:
184         case MLX5_EVENT_TYPE_COMP:
185         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
186         case MLX5_EVENT_TYPE_XRQ_ERROR:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static u16 get_legacy_obj_type(u16 opcode)
194 {
195         switch (opcode) {
196         case MLX5_CMD_OP_CREATE_RQ:
197                 return MLX5_EVENT_QUEUE_TYPE_RQ;
198         case MLX5_CMD_OP_CREATE_QP:
199                 return MLX5_EVENT_QUEUE_TYPE_QP;
200         case MLX5_CMD_OP_CREATE_SQ:
201                 return MLX5_EVENT_QUEUE_TYPE_SQ;
202         case MLX5_CMD_OP_CREATE_DCT:
203                 return MLX5_EVENT_QUEUE_TYPE_DCT;
204         default:
205                 return 0;
206         }
207 }
208
209 static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
210 {
211         u16 opcode;
212
213         opcode = (obj->obj_id >> 32) & 0xffff;
214
215         if (is_legacy_obj_event_num(event_num))
216                 return get_legacy_obj_type(opcode);
217
218         switch (opcode) {
219         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
220                 return (obj->obj_id >> 48);
221         case MLX5_CMD_OP_CREATE_RQ:
222                 return MLX5_OBJ_TYPE_RQ;
223         case MLX5_CMD_OP_CREATE_QP:
224                 return MLX5_OBJ_TYPE_QP;
225         case MLX5_CMD_OP_CREATE_SQ:
226                 return MLX5_OBJ_TYPE_SQ;
227         case MLX5_CMD_OP_CREATE_DCT:
228                 return MLX5_OBJ_TYPE_DCT;
229         case MLX5_CMD_OP_CREATE_TIR:
230                 return MLX5_OBJ_TYPE_TIR;
231         case MLX5_CMD_OP_CREATE_TIS:
232                 return MLX5_OBJ_TYPE_TIS;
233         case MLX5_CMD_OP_CREATE_PSV:
234                 return MLX5_OBJ_TYPE_PSV;
235         case MLX5_OBJ_TYPE_MKEY:
236                 return MLX5_OBJ_TYPE_MKEY;
237         case MLX5_CMD_OP_CREATE_RMP:
238                 return MLX5_OBJ_TYPE_RMP;
239         case MLX5_CMD_OP_CREATE_XRC_SRQ:
240                 return MLX5_OBJ_TYPE_XRC_SRQ;
241         case MLX5_CMD_OP_CREATE_XRQ:
242                 return MLX5_OBJ_TYPE_XRQ;
243         case MLX5_CMD_OP_CREATE_RQT:
244                 return MLX5_OBJ_TYPE_RQT;
245         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
246                 return MLX5_OBJ_TYPE_FLOW_COUNTER;
247         case MLX5_CMD_OP_CREATE_CQ:
248                 return MLX5_OBJ_TYPE_CQ;
249         default:
250                 return 0;
251         }
252 }
253
254 static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
255 {
256         switch (event_type) {
257         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
258         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
259         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
260         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
261         case MLX5_EVENT_TYPE_PATH_MIG:
262         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
263         case MLX5_EVENT_TYPE_COMM_EST:
264         case MLX5_EVENT_TYPE_SQ_DRAINED:
265         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
266         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
267                 return eqe->data.qp_srq.type;
268         case MLX5_EVENT_TYPE_CQ_ERROR:
269         case MLX5_EVENT_TYPE_XRQ_ERROR:
270                 return 0;
271         case MLX5_EVENT_TYPE_DCT_DRAINED:
272         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
273                 return MLX5_EVENT_QUEUE_TYPE_DCT;
274         default:
275                 return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
276         }
277 }
278
279 static u32 get_dec_obj_id(u64 obj_id)
280 {
281         return (obj_id & 0xffffffff);
282 }
283
284 /*
285  * As the obj_id in the firmware is not globally unique the object type
286  * must be considered upon checking for a valid object id.
287  * For that the opcode of the creator command is encoded as part of the obj_id.
288  */
289 static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
290 {
291         return ((u64)opcode << 32) | obj_id;
292 }
293
294 static u64 devx_get_obj_id(const void *in)
295 {
296         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
297         u64 obj_id;
298
299         switch (opcode) {
300         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
301         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
302                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT |
303                                         MLX5_GET(general_obj_in_cmd_hdr, in,
304                                                  obj_type) << 16,
305                                         MLX5_GET(general_obj_in_cmd_hdr, in,
306                                                  obj_id));
307                 break;
308         case MLX5_CMD_OP_QUERY_MKEY:
309                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
310                                         MLX5_GET(query_mkey_in, in,
311                                                  mkey_index));
312                 break;
313         case MLX5_CMD_OP_QUERY_CQ:
314                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
315                                         MLX5_GET(query_cq_in, in, cqn));
316                 break;
317         case MLX5_CMD_OP_MODIFY_CQ:
318                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
319                                         MLX5_GET(modify_cq_in, in, cqn));
320                 break;
321         case MLX5_CMD_OP_QUERY_SQ:
322                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
323                                         MLX5_GET(query_sq_in, in, sqn));
324                 break;
325         case MLX5_CMD_OP_MODIFY_SQ:
326                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
327                                         MLX5_GET(modify_sq_in, in, sqn));
328                 break;
329         case MLX5_CMD_OP_QUERY_RQ:
330                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
331                                         MLX5_GET(query_rq_in, in, rqn));
332                 break;
333         case MLX5_CMD_OP_MODIFY_RQ:
334                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
335                                         MLX5_GET(modify_rq_in, in, rqn));
336                 break;
337         case MLX5_CMD_OP_QUERY_RMP:
338                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
339                                         MLX5_GET(query_rmp_in, in, rmpn));
340                 break;
341         case MLX5_CMD_OP_MODIFY_RMP:
342                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
343                                         MLX5_GET(modify_rmp_in, in, rmpn));
344                 break;
345         case MLX5_CMD_OP_QUERY_RQT:
346                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
347                                         MLX5_GET(query_rqt_in, in, rqtn));
348                 break;
349         case MLX5_CMD_OP_MODIFY_RQT:
350                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
351                                         MLX5_GET(modify_rqt_in, in, rqtn));
352                 break;
353         case MLX5_CMD_OP_QUERY_TIR:
354                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
355                                         MLX5_GET(query_tir_in, in, tirn));
356                 break;
357         case MLX5_CMD_OP_MODIFY_TIR:
358                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
359                                         MLX5_GET(modify_tir_in, in, tirn));
360                 break;
361         case MLX5_CMD_OP_QUERY_TIS:
362                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
363                                         MLX5_GET(query_tis_in, in, tisn));
364                 break;
365         case MLX5_CMD_OP_MODIFY_TIS:
366                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
367                                         MLX5_GET(modify_tis_in, in, tisn));
368                 break;
369         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
370                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
371                                         MLX5_GET(query_flow_table_in, in,
372                                                  table_id));
373                 break;
374         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
375                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
376                                         MLX5_GET(modify_flow_table_in, in,
377                                                  table_id));
378                 break;
379         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
380                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
381                                         MLX5_GET(query_flow_group_in, in,
382                                                  group_id));
383                 break;
384         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
385                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
386                                         MLX5_GET(query_fte_in, in,
387                                                  flow_index));
388                 break;
389         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
390                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
391                                         MLX5_GET(set_fte_in, in, flow_index));
392                 break;
393         case MLX5_CMD_OP_QUERY_Q_COUNTER:
394                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
395                                         MLX5_GET(query_q_counter_in, in,
396                                                  counter_set_id));
397                 break;
398         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
399                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
400                                         MLX5_GET(query_flow_counter_in, in,
401                                                  flow_counter_id));
402                 break;
403         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
404                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
405                                         MLX5_GET(general_obj_in_cmd_hdr, in,
406                                                  obj_id));
407                 break;
408         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
409                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
410                                         MLX5_GET(query_scheduling_element_in,
411                                                  in, scheduling_element_id));
412                 break;
413         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
414                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
415                                         MLX5_GET(modify_scheduling_element_in,
416                                                  in, scheduling_element_id));
417                 break;
418         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
419                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
420                                         MLX5_GET(add_vxlan_udp_dport_in, in,
421                                                  vxlan_udp_port));
422                 break;
423         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
424                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
425                                         MLX5_GET(query_l2_table_entry_in, in,
426                                                  table_index));
427                 break;
428         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
429                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
430                                         MLX5_GET(set_l2_table_entry_in, in,
431                                                  table_index));
432                 break;
433         case MLX5_CMD_OP_QUERY_QP:
434                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
435                                         MLX5_GET(query_qp_in, in, qpn));
436                 break;
437         case MLX5_CMD_OP_RST2INIT_QP:
438                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
439                                         MLX5_GET(rst2init_qp_in, in, qpn));
440                 break;
441         case MLX5_CMD_OP_INIT2INIT_QP:
442                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
443                                         MLX5_GET(init2init_qp_in, in, qpn));
444                 break;
445         case MLX5_CMD_OP_INIT2RTR_QP:
446                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
447                                         MLX5_GET(init2rtr_qp_in, in, qpn));
448                 break;
449         case MLX5_CMD_OP_RTR2RTS_QP:
450                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
451                                         MLX5_GET(rtr2rts_qp_in, in, qpn));
452                 break;
453         case MLX5_CMD_OP_RTS2RTS_QP:
454                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
455                                         MLX5_GET(rts2rts_qp_in, in, qpn));
456                 break;
457         case MLX5_CMD_OP_SQERR2RTS_QP:
458                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
459                                         MLX5_GET(sqerr2rts_qp_in, in, qpn));
460                 break;
461         case MLX5_CMD_OP_2ERR_QP:
462                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
463                                         MLX5_GET(qp_2err_in, in, qpn));
464                 break;
465         case MLX5_CMD_OP_2RST_QP:
466                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
467                                         MLX5_GET(qp_2rst_in, in, qpn));
468                 break;
469         case MLX5_CMD_OP_QUERY_DCT:
470                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
471                                         MLX5_GET(query_dct_in, in, dctn));
472                 break;
473         case MLX5_CMD_OP_QUERY_XRQ:
474         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
475         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
476                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
477                                         MLX5_GET(query_xrq_in, in, xrqn));
478                 break;
479         case MLX5_CMD_OP_QUERY_XRC_SRQ:
480                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
481                                         MLX5_GET(query_xrc_srq_in, in,
482                                                  xrc_srqn));
483                 break;
484         case MLX5_CMD_OP_ARM_XRC_SRQ:
485                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
486                                         MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
487                 break;
488         case MLX5_CMD_OP_QUERY_SRQ:
489                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
490                                         MLX5_GET(query_srq_in, in, srqn));
491                 break;
492         case MLX5_CMD_OP_ARM_RQ:
493                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
494                                         MLX5_GET(arm_rq_in, in, srq_number));
495                 break;
496         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
497                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
498                                         MLX5_GET(drain_dct_in, in, dctn));
499                 break;
500         case MLX5_CMD_OP_ARM_XRQ:
501         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
502         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
503         case MLX5_CMD_OP_MODIFY_XRQ:
504                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
505                                         MLX5_GET(arm_xrq_in, in, xrqn));
506                 break;
507         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
508                 obj_id = get_enc_obj_id
509                                 (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
510                                  MLX5_GET(query_packet_reformat_context_in,
511                                           in, packet_reformat_id));
512                 break;
513         default:
514                 obj_id = 0;
515         }
516
517         return obj_id;
518 }
519
520 static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
521                                  struct ib_uobject *uobj, const void *in)
522 {
523         struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
524         u64 obj_id = devx_get_obj_id(in);
525
526         if (!obj_id)
527                 return false;
528
529         switch (uobj_get_object_id(uobj)) {
530         case UVERBS_OBJECT_CQ:
531                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
532                                       to_mcq(uobj->object)->mcq.cqn) ==
533                                       obj_id;
534
535         case UVERBS_OBJECT_SRQ:
536         {
537                 struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
538                 u16 opcode;
539
540                 switch (srq->common.res) {
541                 case MLX5_RES_XSRQ:
542                         opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
543                         break;
544                 case MLX5_RES_XRQ:
545                         opcode = MLX5_CMD_OP_CREATE_XRQ;
546                         break;
547                 default:
548                         if (!dev->mdev->issi)
549                                 opcode = MLX5_CMD_OP_CREATE_SRQ;
550                         else
551                                 opcode = MLX5_CMD_OP_CREATE_RMP;
552                 }
553
554                 return get_enc_obj_id(opcode,
555                                       to_msrq(uobj->object)->msrq.srqn) ==
556                                       obj_id;
557         }
558
559         case UVERBS_OBJECT_QP:
560         {
561                 struct mlx5_ib_qp *qp = to_mqp(uobj->object);
562
563                 if (qp->type == IB_QPT_RAW_PACKET ||
564                     (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
565                         struct mlx5_ib_raw_packet_qp *raw_packet_qp =
566                                                          &qp->raw_packet_qp;
567                         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
568                         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
569
570                         return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
571                                                rq->base.mqp.qpn) == obj_id ||
572                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
573                                                sq->base.mqp.qpn) == obj_id ||
574                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
575                                                rq->tirn) == obj_id ||
576                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
577                                                sq->tisn) == obj_id);
578                 }
579
580                 if (qp->type == MLX5_IB_QPT_DCT)
581                         return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
582                                               qp->dct.mdct.mqp.qpn) == obj_id;
583                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
584                                       qp->ibqp.qp_num) == obj_id;
585         }
586
587         case UVERBS_OBJECT_WQ:
588                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
589                                       to_mrwq(uobj->object)->core_qp.qpn) ==
590                                       obj_id;
591
592         case UVERBS_OBJECT_RWQ_IND_TBL:
593                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
594                                       to_mrwq_ind_table(uobj->object)->rqtn) ==
595                                       obj_id;
596
597         case MLX5_IB_OBJECT_DEVX_OBJ:
598                 return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
599
600         default:
601                 return false;
602         }
603 }
604
605 static void devx_set_umem_valid(const void *in)
606 {
607         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
608
609         switch (opcode) {
610         case MLX5_CMD_OP_CREATE_MKEY:
611                 MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
612                 break;
613         case MLX5_CMD_OP_CREATE_CQ:
614         {
615                 void *cqc;
616
617                 MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
618                 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
619                 MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
620                 break;
621         }
622         case MLX5_CMD_OP_CREATE_QP:
623         {
624                 void *qpc;
625
626                 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
627                 MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
628                 MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
629                 break;
630         }
631
632         case MLX5_CMD_OP_CREATE_RQ:
633         {
634                 void *rqc, *wq;
635
636                 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
637                 wq  = MLX5_ADDR_OF(rqc, rqc, wq);
638                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
639                 MLX5_SET(wq, wq, wq_umem_valid, 1);
640                 break;
641         }
642
643         case MLX5_CMD_OP_CREATE_SQ:
644         {
645                 void *sqc, *wq;
646
647                 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
648                 wq = MLX5_ADDR_OF(sqc, sqc, wq);
649                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
650                 MLX5_SET(wq, wq, wq_umem_valid, 1);
651                 break;
652         }
653
654         case MLX5_CMD_OP_MODIFY_CQ:
655                 MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
656                 break;
657
658         case MLX5_CMD_OP_CREATE_RMP:
659         {
660                 void *rmpc, *wq;
661
662                 rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
663                 wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
664                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
665                 MLX5_SET(wq, wq, wq_umem_valid, 1);
666                 break;
667         }
668
669         case MLX5_CMD_OP_CREATE_XRQ:
670         {
671                 void *xrqc, *wq;
672
673                 xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
674                 wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
675                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
676                 MLX5_SET(wq, wq, wq_umem_valid, 1);
677                 break;
678         }
679
680         case MLX5_CMD_OP_CREATE_XRC_SRQ:
681         {
682                 void *xrc_srqc;
683
684                 MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
685                 xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
686                                         xrc_srq_context_entry);
687                 MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
688                 break;
689         }
690
691         default:
692                 return;
693         }
694 }
695
696 static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
697 {
698         *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
699
700         switch (*opcode) {
701         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
702         case MLX5_CMD_OP_CREATE_MKEY:
703         case MLX5_CMD_OP_CREATE_CQ:
704         case MLX5_CMD_OP_ALLOC_PD:
705         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
706         case MLX5_CMD_OP_CREATE_RMP:
707         case MLX5_CMD_OP_CREATE_SQ:
708         case MLX5_CMD_OP_CREATE_RQ:
709         case MLX5_CMD_OP_CREATE_RQT:
710         case MLX5_CMD_OP_CREATE_TIR:
711         case MLX5_CMD_OP_CREATE_TIS:
712         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
713         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
714         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
715         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
716         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
717         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
718         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
719         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
720         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
721         case MLX5_CMD_OP_CREATE_QP:
722         case MLX5_CMD_OP_CREATE_SRQ:
723         case MLX5_CMD_OP_CREATE_XRC_SRQ:
724         case MLX5_CMD_OP_CREATE_DCT:
725         case MLX5_CMD_OP_CREATE_XRQ:
726         case MLX5_CMD_OP_ATTACH_TO_MCG:
727         case MLX5_CMD_OP_ALLOC_XRCD:
728                 return true;
729         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
730         {
731                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
732                 if (op_mod == 0)
733                         return true;
734                 return false;
735         }
736         case MLX5_CMD_OP_CREATE_PSV:
737         {
738                 u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
739
740                 if (num_psv == 1)
741                         return true;
742                 return false;
743         }
744         default:
745                 return false;
746         }
747 }
748
749 static bool devx_is_obj_modify_cmd(const void *in)
750 {
751         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
752
753         switch (opcode) {
754         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
755         case MLX5_CMD_OP_MODIFY_CQ:
756         case MLX5_CMD_OP_MODIFY_RMP:
757         case MLX5_CMD_OP_MODIFY_SQ:
758         case MLX5_CMD_OP_MODIFY_RQ:
759         case MLX5_CMD_OP_MODIFY_RQT:
760         case MLX5_CMD_OP_MODIFY_TIR:
761         case MLX5_CMD_OP_MODIFY_TIS:
762         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
763         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
764         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
765         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
766         case MLX5_CMD_OP_RST2INIT_QP:
767         case MLX5_CMD_OP_INIT2RTR_QP:
768         case MLX5_CMD_OP_INIT2INIT_QP:
769         case MLX5_CMD_OP_RTR2RTS_QP:
770         case MLX5_CMD_OP_RTS2RTS_QP:
771         case MLX5_CMD_OP_SQERR2RTS_QP:
772         case MLX5_CMD_OP_2ERR_QP:
773         case MLX5_CMD_OP_2RST_QP:
774         case MLX5_CMD_OP_ARM_XRC_SRQ:
775         case MLX5_CMD_OP_ARM_RQ:
776         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
777         case MLX5_CMD_OP_ARM_XRQ:
778         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
779         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
780         case MLX5_CMD_OP_MODIFY_XRQ:
781                 return true;
782         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
783         {
784                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
785
786                 if (op_mod == 1)
787                         return true;
788                 return false;
789         }
790         default:
791                 return false;
792         }
793 }
794
795 static bool devx_is_obj_query_cmd(const void *in)
796 {
797         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
798
799         switch (opcode) {
800         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
801         case MLX5_CMD_OP_QUERY_MKEY:
802         case MLX5_CMD_OP_QUERY_CQ:
803         case MLX5_CMD_OP_QUERY_RMP:
804         case MLX5_CMD_OP_QUERY_SQ:
805         case MLX5_CMD_OP_QUERY_RQ:
806         case MLX5_CMD_OP_QUERY_RQT:
807         case MLX5_CMD_OP_QUERY_TIR:
808         case MLX5_CMD_OP_QUERY_TIS:
809         case MLX5_CMD_OP_QUERY_Q_COUNTER:
810         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
811         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
812         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
813         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
814         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
815         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
816         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
817         case MLX5_CMD_OP_QUERY_QP:
818         case MLX5_CMD_OP_QUERY_SRQ:
819         case MLX5_CMD_OP_QUERY_XRC_SRQ:
820         case MLX5_CMD_OP_QUERY_DCT:
821         case MLX5_CMD_OP_QUERY_XRQ:
822         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
823         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
824         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
825                 return true;
826         default:
827                 return false;
828         }
829 }
830
831 static bool devx_is_whitelist_cmd(void *in)
832 {
833         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
834
835         switch (opcode) {
836         case MLX5_CMD_OP_QUERY_HCA_CAP:
837         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
838         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
839                 return true;
840         default:
841                 return false;
842         }
843 }
844
845 static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
846 {
847         if (devx_is_whitelist_cmd(cmd_in)) {
848                 struct mlx5_ib_dev *dev;
849
850                 if (c->devx_uid)
851                         return c->devx_uid;
852
853                 dev = to_mdev(c->ibucontext.device);
854                 if (dev->devx_whitelist_uid)
855                         return dev->devx_whitelist_uid;
856
857                 return -EOPNOTSUPP;
858         }
859
860         if (!c->devx_uid)
861                 return -EINVAL;
862
863         return c->devx_uid;
864 }
865
866 static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
867 {
868         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
869
870         /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
871         if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
872              MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
873             (opcode >= MLX5_CMD_OP_GENERAL_START &&
874              opcode < MLX5_CMD_OP_GENERAL_END))
875                 return true;
876
877         switch (opcode) {
878         case MLX5_CMD_OP_QUERY_HCA_CAP:
879         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
880         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
881         case MLX5_CMD_OP_QUERY_VPORT_STATE:
882         case MLX5_CMD_OP_QUERY_ADAPTER:
883         case MLX5_CMD_OP_QUERY_ISSI:
884         case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
885         case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
886         case MLX5_CMD_OP_QUERY_VNIC_ENV:
887         case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
888         case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
889         case MLX5_CMD_OP_NOP:
890         case MLX5_CMD_OP_QUERY_CONG_STATUS:
891         case MLX5_CMD_OP_QUERY_CONG_PARAMS:
892         case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
893         case MLX5_CMD_OP_QUERY_LAG:
894                 return true;
895         default:
896                 return false;
897         }
898 }
899
900 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
901         struct uverbs_attr_bundle *attrs)
902 {
903         struct mlx5_ib_ucontext *c;
904         struct mlx5_ib_dev *dev;
905         int user_vector;
906         int dev_eqn;
907         int err;
908
909         if (uverbs_copy_from(&user_vector, attrs,
910                              MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
911                 return -EFAULT;
912
913         c = devx_ufile2uctx(attrs);
914         if (IS_ERR(c))
915                 return PTR_ERR(c);
916         dev = to_mdev(c->ibucontext.device);
917
918         err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn);
919         if (err < 0)
920                 return err;
921
922         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
923                            &dev_eqn, sizeof(dev_eqn)))
924                 return -EFAULT;
925
926         return 0;
927 }
928
929 /*
930  *Security note:
931  * The hardware protection mechanism works like this: Each device object that
932  * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
933  * the device specification manual) upon its creation. Then upon doorbell,
934  * hardware fetches the object context for which the doorbell was rang, and
935  * validates that the UAR through which the DB was rang matches the UAR ID
936  * of the object.
937  * If no match the doorbell is silently ignored by the hardware. Of course,
938  * the user cannot ring a doorbell on a UAR that was not mapped to it.
939  * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
940  * mailboxes (except tagging them with UID), we expose to the user its UAR
941  * ID, so it can embed it in these objects in the expected specification
942  * format. So the only thing the user can do is hurt itself by creating a
943  * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
944  * may ring a doorbell on its objects.
945  * The consequence of that will be that another user can schedule a QP/SQ
946  * of the buggy user for execution (just insert it to the hardware schedule
947  * queue or arm its CQ for event generation), no further harm is expected.
948  */
949 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
950         struct uverbs_attr_bundle *attrs)
951 {
952         struct mlx5_ib_ucontext *c;
953         struct mlx5_ib_dev *dev;
954         u32 user_idx;
955         s32 dev_idx;
956
957         c = devx_ufile2uctx(attrs);
958         if (IS_ERR(c))
959                 return PTR_ERR(c);
960         dev = to_mdev(c->ibucontext.device);
961
962         if (uverbs_copy_from(&user_idx, attrs,
963                              MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
964                 return -EFAULT;
965
966         dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
967         if (dev_idx < 0)
968                 return dev_idx;
969
970         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
971                            &dev_idx, sizeof(dev_idx)))
972                 return -EFAULT;
973
974         return 0;
975 }
976
977 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
978         struct uverbs_attr_bundle *attrs)
979 {
980         struct mlx5_ib_ucontext *c;
981         struct mlx5_ib_dev *dev;
982         void *cmd_in = uverbs_attr_get_alloced_ptr(
983                 attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
984         int cmd_out_len = uverbs_attr_get_len(attrs,
985                                         MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
986         void *cmd_out;
987         int err;
988         int uid;
989
990         c = devx_ufile2uctx(attrs);
991         if (IS_ERR(c))
992                 return PTR_ERR(c);
993         dev = to_mdev(c->ibucontext.device);
994
995         uid = devx_get_uid(c, cmd_in);
996         if (uid < 0)
997                 return uid;
998
999         /* Only white list of some general HCA commands are allowed for this method. */
1000         if (!devx_is_general_cmd(cmd_in, dev))
1001                 return -EINVAL;
1002
1003         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1004         if (IS_ERR(cmd_out))
1005                 return PTR_ERR(cmd_out);
1006
1007         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1008         err = mlx5_cmd_exec(dev->mdev, cmd_in,
1009                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
1010                             cmd_out, cmd_out_len);
1011         if (err)
1012                 return err;
1013
1014         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
1015                               cmd_out_len);
1016 }
1017
1018 static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
1019                                        u32 *dinlen,
1020                                        u32 *obj_id)
1021 {
1022         u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
1023         u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
1024
1025         *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1026         *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
1027
1028         MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
1029         MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
1030
1031         switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
1032         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
1033                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1034                 MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
1035                 break;
1036
1037         case MLX5_CMD_OP_CREATE_UMEM:
1038                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1039                          MLX5_CMD_OP_DESTROY_UMEM);
1040                 break;
1041         case MLX5_CMD_OP_CREATE_MKEY:
1042                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
1043                 break;
1044         case MLX5_CMD_OP_CREATE_CQ:
1045                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
1046                 break;
1047         case MLX5_CMD_OP_ALLOC_PD:
1048                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
1049                 break;
1050         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
1051                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1052                          MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
1053                 break;
1054         case MLX5_CMD_OP_CREATE_RMP:
1055                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
1056                 break;
1057         case MLX5_CMD_OP_CREATE_SQ:
1058                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
1059                 break;
1060         case MLX5_CMD_OP_CREATE_RQ:
1061                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
1062                 break;
1063         case MLX5_CMD_OP_CREATE_RQT:
1064                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
1065                 break;
1066         case MLX5_CMD_OP_CREATE_TIR:
1067                 *obj_id = MLX5_GET(create_tir_out, out, tirn);
1068                 MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
1069                 MLX5_SET(destroy_tir_in, din, tirn, *obj_id);
1070                 break;
1071         case MLX5_CMD_OP_CREATE_TIS:
1072                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
1073                 break;
1074         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
1075                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1076                          MLX5_CMD_OP_DEALLOC_Q_COUNTER);
1077                 break;
1078         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
1079                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
1080                 *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
1081                 MLX5_SET(destroy_flow_table_in, din, other_vport,
1082                          MLX5_GET(create_flow_table_in,  in, other_vport));
1083                 MLX5_SET(destroy_flow_table_in, din, vport_number,
1084                          MLX5_GET(create_flow_table_in,  in, vport_number));
1085                 MLX5_SET(destroy_flow_table_in, din, table_type,
1086                          MLX5_GET(create_flow_table_in,  in, table_type));
1087                 MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
1088                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1089                          MLX5_CMD_OP_DESTROY_FLOW_TABLE);
1090                 break;
1091         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
1092                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
1093                 *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
1094                 MLX5_SET(destroy_flow_group_in, din, other_vport,
1095                          MLX5_GET(create_flow_group_in, in, other_vport));
1096                 MLX5_SET(destroy_flow_group_in, din, vport_number,
1097                          MLX5_GET(create_flow_group_in, in, vport_number));
1098                 MLX5_SET(destroy_flow_group_in, din, table_type,
1099                          MLX5_GET(create_flow_group_in, in, table_type));
1100                 MLX5_SET(destroy_flow_group_in, din, table_id,
1101                          MLX5_GET(create_flow_group_in, in, table_id));
1102                 MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
1103                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1104                          MLX5_CMD_OP_DESTROY_FLOW_GROUP);
1105                 break;
1106         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
1107                 *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
1108                 *obj_id = MLX5_GET(set_fte_in, in, flow_index);
1109                 MLX5_SET(delete_fte_in, din, other_vport,
1110                          MLX5_GET(set_fte_in,  in, other_vport));
1111                 MLX5_SET(delete_fte_in, din, vport_number,
1112                          MLX5_GET(set_fte_in, in, vport_number));
1113                 MLX5_SET(delete_fte_in, din, table_type,
1114                          MLX5_GET(set_fte_in, in, table_type));
1115                 MLX5_SET(delete_fte_in, din, table_id,
1116                          MLX5_GET(set_fte_in, in, table_id));
1117                 MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
1118                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1119                          MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
1120                 break;
1121         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
1122                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1123                          MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
1124                 break;
1125         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
1126                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1127                          MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
1128                 break;
1129         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
1130                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1131                          MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
1132                 break;
1133         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
1134                 *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
1135                 *obj_id = MLX5_GET(create_scheduling_element_out, out,
1136                                    scheduling_element_id);
1137                 MLX5_SET(destroy_scheduling_element_in, din,
1138                          scheduling_hierarchy,
1139                          MLX5_GET(create_scheduling_element_in, in,
1140                                   scheduling_hierarchy));
1141                 MLX5_SET(destroy_scheduling_element_in, din,
1142                          scheduling_element_id, *obj_id);
1143                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1144                          MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
1145                 break;
1146         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
1147                 *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
1148                 *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
1149                 MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
1150                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1151                          MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
1152                 break;
1153         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
1154                 *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
1155                 *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
1156                 MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
1157                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1158                          MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
1159                 break;
1160         case MLX5_CMD_OP_CREATE_QP:
1161                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
1162                 break;
1163         case MLX5_CMD_OP_CREATE_SRQ:
1164                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
1165                 break;
1166         case MLX5_CMD_OP_CREATE_XRC_SRQ:
1167                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1168                          MLX5_CMD_OP_DESTROY_XRC_SRQ);
1169                 break;
1170         case MLX5_CMD_OP_CREATE_DCT:
1171                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
1172                 break;
1173         case MLX5_CMD_OP_CREATE_XRQ:
1174                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
1175                 break;
1176         case MLX5_CMD_OP_ATTACH_TO_MCG:
1177                 *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
1178                 MLX5_SET(detach_from_mcg_in, din, qpn,
1179                          MLX5_GET(attach_to_mcg_in, in, qpn));
1180                 memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
1181                        MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
1182                        MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
1183                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
1184                 break;
1185         case MLX5_CMD_OP_ALLOC_XRCD:
1186                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
1187                 break;
1188         case MLX5_CMD_OP_CREATE_PSV:
1189                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1190                          MLX5_CMD_OP_DESTROY_PSV);
1191                 MLX5_SET(destroy_psv_in, din, psvn,
1192                          MLX5_GET(create_psv_out, out, psv0_index));
1193                 break;
1194         default:
1195                 /* The entry must match to one of the devx_is_obj_create_cmd */
1196                 WARN_ON(true);
1197                 break;
1198         }
1199 }
1200
1201 static int devx_handle_mkey_indirect(struct devx_obj *obj,
1202                                      struct mlx5_ib_dev *dev,
1203                                      void *in, void *out)
1204 {
1205         struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
1206         struct mlx5_core_mkey *mkey;
1207         void *mkc;
1208         u8 key;
1209
1210         mkey = &devx_mr->mmkey;
1211         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1212         key = MLX5_GET(mkc, mkc, mkey_7_0);
1213         mkey->key = mlx5_idx_to_mkey(
1214                         MLX5_GET(create_mkey_out, out, mkey_index)) | key;
1215         mkey->type = MLX5_MKEY_INDIRECT_DEVX;
1216         mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
1217         mkey->size = MLX5_GET64(mkc, mkc, len);
1218         mkey->pd = MLX5_GET(mkc, mkc, pd);
1219         devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
1220
1221         return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
1222                                GFP_KERNEL));
1223 }
1224
1225 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
1226                                    struct devx_obj *obj,
1227                                    void *in, int in_len)
1228 {
1229         int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
1230                         MLX5_FLD_SZ_BYTES(create_mkey_in,
1231                         memory_key_mkey_entry);
1232         void *mkc;
1233         u8 access_mode;
1234
1235         if (in_len < min_len)
1236                 return -EINVAL;
1237
1238         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1239
1240         access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
1241         access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
1242
1243         if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
1244                 access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
1245                 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1246                         obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
1247                 return 0;
1248         }
1249
1250         MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1251         return 0;
1252 }
1253
1254 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
1255                                       struct devx_event_subscription *sub)
1256 {
1257         struct devx_event *event;
1258         struct devx_obj_event *xa_val_level2;
1259
1260         if (sub->is_cleaned)
1261                 return;
1262
1263         sub->is_cleaned = 1;
1264         list_del_rcu(&sub->xa_list);
1265
1266         if (list_empty(&sub->obj_list))
1267                 return;
1268
1269         list_del_rcu(&sub->obj_list);
1270         /* check whether key level 1 for this obj_sub_list is empty */
1271         event = xa_load(&dev->devx_event_table.event_xa,
1272                         sub->xa_key_level1);
1273         WARN_ON(!event);
1274
1275         xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
1276         if (list_empty(&xa_val_level2->obj_sub_list)) {
1277                 xa_erase(&event->object_ids,
1278                          sub->xa_key_level2);
1279                 kfree_rcu(xa_val_level2, rcu);
1280         }
1281 }
1282
1283 static int devx_obj_cleanup(struct ib_uobject *uobject,
1284                             enum rdma_remove_reason why,
1285                             struct uverbs_attr_bundle *attrs)
1286 {
1287         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1288         struct mlx5_devx_event_table *devx_event_table;
1289         struct devx_obj *obj = uobject->object;
1290         struct devx_event_subscription *sub_entry, *tmp;
1291         struct mlx5_ib_dev *dev;
1292         int ret;
1293
1294         dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1295         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1296                 /*
1297                  * The pagefault_single_data_segment() does commands against
1298                  * the mmkey, we must wait for that to stop before freeing the
1299                  * mkey, as another allocation could get the same mkey #.
1300                  */
1301                 xa_erase(&obj->ib_dev->odp_mkeys,
1302                          mlx5_base_mkey(obj->devx_mr.mmkey.key));
1303                 synchronize_srcu(&dev->odp_srcu);
1304         }
1305
1306         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1307                 ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
1308         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1309                 ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1310         else
1311                 ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
1312                                     obj->dinlen, out, sizeof(out));
1313         if (ib_is_destroy_retryable(ret, why, uobject))
1314                 return ret;
1315
1316         devx_event_table = &dev->devx_event_table;
1317
1318         mutex_lock(&devx_event_table->event_xa_lock);
1319         list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
1320                 devx_cleanup_subscription(dev, sub_entry);
1321         mutex_unlock(&devx_event_table->event_xa_lock);
1322
1323         kfree(obj);
1324         return ret;
1325 }
1326
1327 static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
1328 {
1329         struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
1330         struct mlx5_devx_event_table *table;
1331         struct devx_event *event;
1332         struct devx_obj_event *obj_event;
1333         u32 obj_id = mcq->cqn;
1334
1335         table = &obj->ib_dev->devx_event_table;
1336         rcu_read_lock();
1337         event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
1338         if (!event)
1339                 goto out;
1340
1341         obj_event = xa_load(&event->object_ids, obj_id);
1342         if (!obj_event)
1343                 goto out;
1344
1345         dispatch_event_fd(&obj_event->obj_sub_list, eqe);
1346 out:
1347         rcu_read_unlock();
1348 }
1349
1350 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
1351         struct uverbs_attr_bundle *attrs)
1352 {
1353         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1354         int cmd_out_len =  uverbs_attr_get_len(attrs,
1355                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
1356         int cmd_in_len = uverbs_attr_get_len(attrs,
1357                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1358         void *cmd_out;
1359         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1360                 attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
1361         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1362                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1363         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1364         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1365         struct devx_obj *obj;
1366         u16 obj_type = 0;
1367         int err;
1368         int uid;
1369         u32 obj_id;
1370         u16 opcode;
1371
1372         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1373                 return -EINVAL;
1374
1375         uid = devx_get_uid(c, cmd_in);
1376         if (uid < 0)
1377                 return uid;
1378
1379         if (!devx_is_obj_create_cmd(cmd_in, &opcode))
1380                 return -EINVAL;
1381
1382         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1383         if (IS_ERR(cmd_out))
1384                 return PTR_ERR(cmd_out);
1385
1386         obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
1387         if (!obj)
1388                 return -ENOMEM;
1389
1390         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1391         if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
1392                 err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
1393                 if (err)
1394                         goto obj_free;
1395         } else {
1396                 devx_set_umem_valid(cmd_in);
1397         }
1398
1399         if (opcode == MLX5_CMD_OP_CREATE_DCT) {
1400                 obj->flags |= DEVX_OBJ_FLAGS_DCT;
1401                 err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
1402                                            cmd_in_len, cmd_out, cmd_out_len);
1403         } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
1404                 obj->flags |= DEVX_OBJ_FLAGS_CQ;
1405                 obj->core_cq.comp = devx_cq_comp;
1406                 err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
1407                                           cmd_in, cmd_in_len, cmd_out,
1408                                           cmd_out_len);
1409         } else {
1410                 err = mlx5_cmd_exec(dev->mdev, cmd_in,
1411                                     cmd_in_len,
1412                                     cmd_out, cmd_out_len);
1413         }
1414
1415         if (err)
1416                 goto obj_free;
1417
1418         if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
1419                 u8 bulk = MLX5_GET(alloc_flow_counter_in,
1420                                    cmd_in,
1421                                    flow_counter_bulk);
1422                 obj->flow_counter_bulk_size = 128UL * bulk;
1423         }
1424
1425         uobj->object = obj;
1426         INIT_LIST_HEAD(&obj->event_sub);
1427         obj->ib_dev = dev;
1428         devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
1429                                    &obj_id);
1430         WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
1431
1432         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
1433         if (err)
1434                 goto obj_destroy;
1435
1436         if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
1437                 obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
1438         obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
1439
1440         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1441                 err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
1442                 if (err)
1443                         goto obj_destroy;
1444         }
1445         return 0;
1446
1447 obj_destroy:
1448         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1449                 mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
1450         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1451                 mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1452         else
1453                 mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
1454                               sizeof(out));
1455 obj_free:
1456         kfree(obj);
1457         return err;
1458 }
1459
1460 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
1461         struct uverbs_attr_bundle *attrs)
1462 {
1463         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
1464         int cmd_out_len = uverbs_attr_get_len(attrs,
1465                                         MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
1466         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1467                                                           MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
1468         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1469                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1470         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1471         void *cmd_out;
1472         int err;
1473         int uid;
1474
1475         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1476                 return -EINVAL;
1477
1478         uid = devx_get_uid(c, cmd_in);
1479         if (uid < 0)
1480                 return uid;
1481
1482         if (!devx_is_obj_modify_cmd(cmd_in))
1483                 return -EINVAL;
1484
1485         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1486                 return -EINVAL;
1487
1488         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1489         if (IS_ERR(cmd_out))
1490                 return PTR_ERR(cmd_out);
1491
1492         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1493         devx_set_umem_valid(cmd_in);
1494
1495         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1496                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
1497                             cmd_out, cmd_out_len);
1498         if (err)
1499                 return err;
1500
1501         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1502                               cmd_out, cmd_out_len);
1503 }
1504
1505 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
1506         struct uverbs_attr_bundle *attrs)
1507 {
1508         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
1509         int cmd_out_len = uverbs_attr_get_len(attrs,
1510                                               MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
1511         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1512                                                           MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
1513         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1514                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1515         void *cmd_out;
1516         int err;
1517         int uid;
1518         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1519
1520         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1521                 return -EINVAL;
1522
1523         uid = devx_get_uid(c, cmd_in);
1524         if (uid < 0)
1525                 return uid;
1526
1527         if (!devx_is_obj_query_cmd(cmd_in))
1528                 return -EINVAL;
1529
1530         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1531                 return -EINVAL;
1532
1533         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1534         if (IS_ERR(cmd_out))
1535                 return PTR_ERR(cmd_out);
1536
1537         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1538         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1539                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
1540                             cmd_out, cmd_out_len);
1541         if (err)
1542                 return err;
1543
1544         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1545                               cmd_out, cmd_out_len);
1546 }
1547
1548 struct devx_async_event_queue {
1549         spinlock_t              lock;
1550         wait_queue_head_t       poll_wait;
1551         struct list_head        event_list;
1552         atomic_t                bytes_in_use;
1553         u8                      is_destroyed:1;
1554 };
1555
1556 struct devx_async_cmd_event_file {
1557         struct ib_uobject               uobj;
1558         struct devx_async_event_queue   ev_queue;
1559         struct mlx5_async_ctx           async_ctx;
1560 };
1561
1562 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
1563 {
1564         spin_lock_init(&ev_queue->lock);
1565         INIT_LIST_HEAD(&ev_queue->event_list);
1566         init_waitqueue_head(&ev_queue->poll_wait);
1567         atomic_set(&ev_queue->bytes_in_use, 0);
1568         ev_queue->is_destroyed = 0;
1569 }
1570
1571 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
1572         struct uverbs_attr_bundle *attrs)
1573 {
1574         struct devx_async_cmd_event_file *ev_file;
1575
1576         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1577                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
1578         struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
1579
1580         ev_file = container_of(uobj, struct devx_async_cmd_event_file,
1581                                uobj);
1582         devx_init_event_queue(&ev_file->ev_queue);
1583         mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
1584         return 0;
1585 }
1586
1587 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
1588         struct uverbs_attr_bundle *attrs)
1589 {
1590         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1591                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
1592         struct devx_async_event_file *ev_file;
1593         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1594                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1595         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1596         u32 flags;
1597         int err;
1598
1599         err = uverbs_get_flags32(&flags, attrs,
1600                 MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
1601                 MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
1602
1603         if (err)
1604                 return err;
1605
1606         ev_file = container_of(uobj, struct devx_async_event_file,
1607                                uobj);
1608         spin_lock_init(&ev_file->lock);
1609         INIT_LIST_HEAD(&ev_file->event_list);
1610         init_waitqueue_head(&ev_file->poll_wait);
1611         if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
1612                 ev_file->omit_data = 1;
1613         INIT_LIST_HEAD(&ev_file->subscribed_events_list);
1614         ev_file->dev = dev;
1615         get_device(&dev->ib_dev.dev);
1616         return 0;
1617 }
1618
1619 static void devx_query_callback(int status, struct mlx5_async_work *context)
1620 {
1621         struct devx_async_data *async_data =
1622                 container_of(context, struct devx_async_data, cb_work);
1623         struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
1624         struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
1625         unsigned long flags;
1626
1627         /*
1628          * Note that if the struct devx_async_cmd_event_file uobj begins to be
1629          * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
1630          * routine returns, ensuring that it always remains valid here.
1631          */
1632         spin_lock_irqsave(&ev_queue->lock, flags);
1633         list_add_tail(&async_data->list, &ev_queue->event_list);
1634         spin_unlock_irqrestore(&ev_queue->lock, flags);
1635
1636         wake_up_interruptible(&ev_queue->poll_wait);
1637 }
1638
1639 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
1640
1641 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
1642         struct uverbs_attr_bundle *attrs)
1643 {
1644         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
1645                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
1646         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1647                                 attrs,
1648                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
1649         u16 cmd_out_len;
1650         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1651                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1652         struct ib_uobject *fd_uobj;
1653         int err;
1654         int uid;
1655         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1656         struct devx_async_cmd_event_file *ev_file;
1657         struct devx_async_data *async_data;
1658
1659         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1660                 return -EINVAL;
1661
1662         uid = devx_get_uid(c, cmd_in);
1663         if (uid < 0)
1664                 return uid;
1665
1666         if (!devx_is_obj_query_cmd(cmd_in))
1667                 return -EINVAL;
1668
1669         err = uverbs_get_const(&cmd_out_len, attrs,
1670                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
1671         if (err)
1672                 return err;
1673
1674         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1675                 return -EINVAL;
1676
1677         fd_uobj = uverbs_attr_get_uobject(attrs,
1678                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
1679         if (IS_ERR(fd_uobj))
1680                 return PTR_ERR(fd_uobj);
1681
1682         ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1683                                uobj);
1684
1685         if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
1686                         MAX_ASYNC_BYTES_IN_USE) {
1687                 atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1688                 return -EAGAIN;
1689         }
1690
1691         async_data = kvzalloc(struct_size(async_data, hdr.out_data,
1692                                           cmd_out_len), GFP_KERNEL);
1693         if (!async_data) {
1694                 err = -ENOMEM;
1695                 goto sub_bytes;
1696         }
1697
1698         err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
1699                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
1700         if (err)
1701                 goto free_async;
1702
1703         async_data->cmd_out_len = cmd_out_len;
1704         async_data->mdev = mdev;
1705         async_data->ev_file = ev_file;
1706
1707         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1708         err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
1709                     uverbs_attr_get_len(attrs,
1710                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
1711                     async_data->hdr.out_data,
1712                     async_data->cmd_out_len,
1713                     devx_query_callback, &async_data->cb_work);
1714
1715         if (err)
1716                 goto free_async;
1717
1718         return 0;
1719
1720 free_async:
1721         kvfree(async_data);
1722 sub_bytes:
1723         atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1724         return err;
1725 }
1726
1727 static void
1728 subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
1729                            u32 key_level1,
1730                            bool is_level2,
1731                            u32 key_level2)
1732 {
1733         struct devx_event *event;
1734         struct devx_obj_event *xa_val_level2;
1735
1736         /* Level 1 is valid for future use, no need to free */
1737         if (!is_level2)
1738                 return;
1739
1740         event = xa_load(&devx_event_table->event_xa, key_level1);
1741         WARN_ON(!event);
1742
1743         xa_val_level2 = xa_load(&event->object_ids,
1744                                 key_level2);
1745         if (list_empty(&xa_val_level2->obj_sub_list)) {
1746                 xa_erase(&event->object_ids,
1747                          key_level2);
1748                 kfree_rcu(xa_val_level2, rcu);
1749         }
1750 }
1751
1752 static int
1753 subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
1754                          u32 key_level1,
1755                          bool is_level2,
1756                          u32 key_level2)
1757 {
1758         struct devx_obj_event *obj_event;
1759         struct devx_event *event;
1760         int err;
1761
1762         event = xa_load(&devx_event_table->event_xa, key_level1);
1763         if (!event) {
1764                 event = kzalloc(sizeof(*event), GFP_KERNEL);
1765                 if (!event)
1766                         return -ENOMEM;
1767
1768                 INIT_LIST_HEAD(&event->unaffiliated_list);
1769                 xa_init(&event->object_ids);
1770
1771                 err = xa_insert(&devx_event_table->event_xa,
1772                                 key_level1,
1773                                 event,
1774                                 GFP_KERNEL);
1775                 if (err) {
1776                         kfree(event);
1777                         return err;
1778                 }
1779         }
1780
1781         if (!is_level2)
1782                 return 0;
1783
1784         obj_event = xa_load(&event->object_ids, key_level2);
1785         if (!obj_event) {
1786                 obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
1787                 if (!obj_event)
1788                         /* Level1 is valid for future use, no need to free */
1789                         return -ENOMEM;
1790
1791                 err = xa_insert(&event->object_ids,
1792                                 key_level2,
1793                                 obj_event,
1794                                 GFP_KERNEL);
1795                 if (err) {
1796                         kfree(obj_event);
1797                         return err;
1798                 }
1799                 INIT_LIST_HEAD(&obj_event->obj_sub_list);
1800         }
1801
1802         return 0;
1803 }
1804
1805 static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
1806                                    struct devx_obj *obj)
1807 {
1808         int i;
1809
1810         for (i = 0; i < num_events; i++) {
1811                 if (obj) {
1812                         if (!is_legacy_obj_event_num(event_type_num_list[i]))
1813                                 return false;
1814                 } else if (!is_legacy_unaffiliated_event_num(
1815                                 event_type_num_list[i])) {
1816                         return false;
1817                 }
1818         }
1819
1820         return true;
1821 }
1822
1823 #define MAX_SUPP_EVENT_NUM 255
1824 static bool is_valid_events(struct mlx5_core_dev *dev,
1825                             int num_events, u16 *event_type_num_list,
1826                             struct devx_obj *obj)
1827 {
1828         __be64 *aff_events;
1829         __be64 *unaff_events;
1830         int mask_entry;
1831         int mask_bit;
1832         int i;
1833
1834         if (MLX5_CAP_GEN(dev, event_cap)) {
1835                 aff_events = MLX5_CAP_DEV_EVENT(dev,
1836                                                 user_affiliated_events);
1837                 unaff_events = MLX5_CAP_DEV_EVENT(dev,
1838                                                   user_unaffiliated_events);
1839         } else {
1840                 return is_valid_events_legacy(num_events, event_type_num_list,
1841                                               obj);
1842         }
1843
1844         for (i = 0; i < num_events; i++) {
1845                 if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
1846                         return false;
1847
1848                 mask_entry = event_type_num_list[i] / 64;
1849                 mask_bit = event_type_num_list[i] % 64;
1850
1851                 if (obj) {
1852                         /* CQ completion */
1853                         if (event_type_num_list[i] == 0)
1854                                 continue;
1855
1856                         if (!(be64_to_cpu(aff_events[mask_entry]) &
1857                                         (1ull << mask_bit)))
1858                                 return false;
1859
1860                         continue;
1861                 }
1862
1863                 if (!(be64_to_cpu(unaff_events[mask_entry]) &
1864                                 (1ull << mask_bit)))
1865                         return false;
1866         }
1867
1868         return true;
1869 }
1870
1871 #define MAX_NUM_EVENTS 16
1872 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
1873         struct uverbs_attr_bundle *attrs)
1874 {
1875         struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
1876                                 attrs,
1877                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
1878         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1879                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1880         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1881         struct ib_uobject *fd_uobj;
1882         struct devx_obj *obj = NULL;
1883         struct devx_async_event_file *ev_file;
1884         struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
1885         u16 *event_type_num_list;
1886         struct devx_event_subscription *event_sub, *tmp_sub;
1887         struct list_head sub_list;
1888         int redirect_fd;
1889         bool use_eventfd = false;
1890         int num_events;
1891         int num_alloc_xa_entries = 0;
1892         u16 obj_type = 0;
1893         u64 cookie = 0;
1894         u32 obj_id = 0;
1895         int err;
1896         int i;
1897
1898         if (!c->devx_uid)
1899                 return -EINVAL;
1900
1901         if (!IS_ERR(devx_uobj)) {
1902                 obj = (struct devx_obj *)devx_uobj->object;
1903                 if (obj)
1904                         obj_id = get_dec_obj_id(obj->obj_id);
1905         }
1906
1907         fd_uobj = uverbs_attr_get_uobject(attrs,
1908                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
1909         if (IS_ERR(fd_uobj))
1910                 return PTR_ERR(fd_uobj);
1911
1912         ev_file = container_of(fd_uobj, struct devx_async_event_file,
1913                                uobj);
1914
1915         if (uverbs_attr_is_valid(attrs,
1916                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
1917                 err = uverbs_copy_from(&redirect_fd, attrs,
1918                                MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
1919                 if (err)
1920                         return err;
1921
1922                 use_eventfd = true;
1923         }
1924
1925         if (uverbs_attr_is_valid(attrs,
1926                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
1927                 if (use_eventfd)
1928                         return -EINVAL;
1929
1930                 err = uverbs_copy_from(&cookie, attrs,
1931                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
1932                 if (err)
1933                         return err;
1934         }
1935
1936         num_events = uverbs_attr_ptr_get_array_size(
1937                 attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
1938                 sizeof(u16));
1939
1940         if (num_events < 0)
1941                 return num_events;
1942
1943         if (num_events > MAX_NUM_EVENTS)
1944                 return -EINVAL;
1945
1946         event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
1947                         MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
1948
1949         if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
1950                 return -EINVAL;
1951
1952         INIT_LIST_HEAD(&sub_list);
1953
1954         /* Protect from concurrent subscriptions to same XA entries to allow
1955          * both to succeed
1956          */
1957         mutex_lock(&devx_event_table->event_xa_lock);
1958         for (i = 0; i < num_events; i++) {
1959                 u32 key_level1;
1960
1961                 if (obj)
1962                         obj_type = get_dec_obj_type(obj,
1963                                                     event_type_num_list[i]);
1964                 key_level1 = event_type_num_list[i] | obj_type << 16;
1965
1966                 err = subscribe_event_xa_alloc(devx_event_table,
1967                                                key_level1,
1968                                                obj,
1969                                                obj_id);
1970                 if (err)
1971                         goto err;
1972
1973                 num_alloc_xa_entries++;
1974                 event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
1975                 if (!event_sub) {
1976                         err = -ENOMEM;
1977                         goto err;
1978                 }
1979
1980                 list_add_tail(&event_sub->event_list, &sub_list);
1981                 uverbs_uobject_get(&ev_file->uobj);
1982                 if (use_eventfd) {
1983                         event_sub->eventfd =
1984                                 eventfd_ctx_fdget(redirect_fd);
1985
1986                         if (IS_ERR(event_sub->eventfd)) {
1987                                 err = PTR_ERR(event_sub->eventfd);
1988                                 event_sub->eventfd = NULL;
1989                                 goto err;
1990                         }
1991                 }
1992
1993                 event_sub->cookie = cookie;
1994                 event_sub->ev_file = ev_file;
1995                 /* May be needed upon cleanup the devx object/subscription */
1996                 event_sub->xa_key_level1 = key_level1;
1997                 event_sub->xa_key_level2 = obj_id;
1998                 INIT_LIST_HEAD(&event_sub->obj_list);
1999         }
2000
2001         /* Once all the allocations and the XA data insertions were done we
2002          * can go ahead and add all the subscriptions to the relevant lists
2003          * without concern of a failure.
2004          */
2005         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2006                 struct devx_event *event;
2007                 struct devx_obj_event *obj_event;
2008
2009                 list_del_init(&event_sub->event_list);
2010
2011                 spin_lock_irq(&ev_file->lock);
2012                 list_add_tail_rcu(&event_sub->file_list,
2013                                   &ev_file->subscribed_events_list);
2014                 spin_unlock_irq(&ev_file->lock);
2015
2016                 event = xa_load(&devx_event_table->event_xa,
2017                                 event_sub->xa_key_level1);
2018                 WARN_ON(!event);
2019
2020                 if (!obj) {
2021                         list_add_tail_rcu(&event_sub->xa_list,
2022                                           &event->unaffiliated_list);
2023                         continue;
2024                 }
2025
2026                 obj_event = xa_load(&event->object_ids, obj_id);
2027                 WARN_ON(!obj_event);
2028                 list_add_tail_rcu(&event_sub->xa_list,
2029                                   &obj_event->obj_sub_list);
2030                 list_add_tail_rcu(&event_sub->obj_list,
2031                                   &obj->event_sub);
2032         }
2033
2034         mutex_unlock(&devx_event_table->event_xa_lock);
2035         return 0;
2036
2037 err:
2038         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2039                 list_del(&event_sub->event_list);
2040
2041                 subscribe_event_xa_dealloc(devx_event_table,
2042                                            event_sub->xa_key_level1,
2043                                            obj,
2044                                            obj_id);
2045
2046                 if (event_sub->eventfd)
2047                         eventfd_ctx_put(event_sub->eventfd);
2048                 uverbs_uobject_put(&event_sub->ev_file->uobj);
2049                 kfree(event_sub);
2050         }
2051
2052         mutex_unlock(&devx_event_table->event_xa_lock);
2053         return err;
2054 }
2055
2056 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
2057                          struct uverbs_attr_bundle *attrs,
2058                          struct devx_umem *obj)
2059 {
2060         u64 addr;
2061         size_t size;
2062         u32 access;
2063         int npages;
2064         int err;
2065         u32 page_mask;
2066
2067         if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
2068             uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
2069                 return -EFAULT;
2070
2071         err = uverbs_get_flags32(&access, attrs,
2072                                  MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2073                                  IB_ACCESS_LOCAL_WRITE |
2074                                  IB_ACCESS_REMOTE_WRITE |
2075                                  IB_ACCESS_REMOTE_READ);
2076         if (err)
2077                 return err;
2078
2079         err = ib_check_mr_access(access);
2080         if (err)
2081                 return err;
2082
2083         obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
2084         if (IS_ERR(obj->umem))
2085                 return PTR_ERR(obj->umem);
2086
2087         mlx5_ib_cont_pages(obj->umem, obj->umem->address,
2088                            MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
2089                            &obj->page_shift, &obj->ncont, NULL);
2090
2091         if (!npages) {
2092                 ib_umem_release(obj->umem);
2093                 return -EINVAL;
2094         }
2095
2096         page_mask = (1 << obj->page_shift) - 1;
2097         obj->page_offset = obj->umem->address & page_mask;
2098
2099         return 0;
2100 }
2101
2102 static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
2103                                    struct devx_umem *obj,
2104                                    struct devx_umem_reg_cmd *cmd)
2105 {
2106         cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
2107                     (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
2108         cmd->in = uverbs_zalloc(attrs, cmd->inlen);
2109         return PTR_ERR_OR_ZERO(cmd->in);
2110 }
2111
2112 static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
2113                                     struct devx_umem *obj,
2114                                     struct devx_umem_reg_cmd *cmd)
2115 {
2116         void *umem;
2117         __be64 *mtt;
2118
2119         umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
2120         mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
2121
2122         MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
2123         MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
2124         MLX5_SET(umem, umem, log_page_size, obj->page_shift -
2125                                             MLX5_ADAPTER_PAGE_SHIFT);
2126         MLX5_SET(umem, umem, page_offset, obj->page_offset);
2127         mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
2128                              (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
2129                              MLX5_IB_MTT_READ);
2130 }
2131
2132 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
2133         struct uverbs_attr_bundle *attrs)
2134 {
2135         struct devx_umem_reg_cmd cmd;
2136         struct devx_umem *obj;
2137         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2138                 attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2139         u32 obj_id;
2140         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
2141                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2142         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
2143         int err;
2144
2145         if (!c->devx_uid)
2146                 return -EINVAL;
2147
2148         obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
2149         if (!obj)
2150                 return -ENOMEM;
2151
2152         err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
2153         if (err)
2154                 goto err_obj_free;
2155
2156         err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
2157         if (err)
2158                 goto err_umem_release;
2159
2160         devx_umem_reg_cmd_build(dev, obj, &cmd);
2161
2162         MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
2163         err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
2164                             sizeof(cmd.out));
2165         if (err)
2166                 goto err_umem_release;
2167
2168         obj->mdev = dev->mdev;
2169         uobj->object = obj;
2170         devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
2171         uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2172
2173         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id,
2174                              sizeof(obj_id));
2175         return err;
2176
2177 err_umem_release:
2178         ib_umem_release(obj->umem);
2179 err_obj_free:
2180         kfree(obj);
2181         return err;
2182 }
2183
2184 static int devx_umem_cleanup(struct ib_uobject *uobject,
2185                              enum rdma_remove_reason why,
2186                              struct uverbs_attr_bundle *attrs)
2187 {
2188         struct devx_umem *obj = uobject->object;
2189         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
2190         int err;
2191
2192         err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
2193         if (ib_is_destroy_retryable(err, why, uobject))
2194                 return err;
2195
2196         ib_umem_release(obj->umem);
2197         kfree(obj);
2198         return 0;
2199 }
2200
2201 static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
2202                                   unsigned long event_type)
2203 {
2204         __be64 *unaff_events;
2205         int mask_entry;
2206         int mask_bit;
2207
2208         if (!MLX5_CAP_GEN(dev, event_cap))
2209                 return is_legacy_unaffiliated_event_num(event_type);
2210
2211         unaff_events = MLX5_CAP_DEV_EVENT(dev,
2212                                           user_unaffiliated_events);
2213         WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
2214
2215         mask_entry = event_type / 64;
2216         mask_bit = event_type % 64;
2217
2218         if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
2219                 return false;
2220
2221         return true;
2222 }
2223
2224 static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
2225 {
2226         struct mlx5_eqe *eqe = data;
2227         u32 obj_id = 0;
2228
2229         switch (event_type) {
2230         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
2231         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
2232         case MLX5_EVENT_TYPE_PATH_MIG:
2233         case MLX5_EVENT_TYPE_COMM_EST:
2234         case MLX5_EVENT_TYPE_SQ_DRAINED:
2235         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
2236         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
2237         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
2238         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
2239         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
2240                 obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
2241                 break;
2242         case MLX5_EVENT_TYPE_XRQ_ERROR:
2243                 obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
2244                 break;
2245         case MLX5_EVENT_TYPE_DCT_DRAINED:
2246         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
2247                 obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
2248                 break;
2249         case MLX5_EVENT_TYPE_CQ_ERROR:
2250                 obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
2251                 break;
2252         default:
2253                 obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
2254                 break;
2255         }
2256
2257         return obj_id;
2258 }
2259
2260 static int deliver_event(struct devx_event_subscription *event_sub,
2261                          const void *data)
2262 {
2263         struct devx_async_event_file *ev_file;
2264         struct devx_async_event_data *event_data;
2265         unsigned long flags;
2266
2267         ev_file = event_sub->ev_file;
2268
2269         if (ev_file->omit_data) {
2270                 spin_lock_irqsave(&ev_file->lock, flags);
2271                 if (!list_empty(&event_sub->event_list) ||
2272                     ev_file->is_destroyed) {
2273                         spin_unlock_irqrestore(&ev_file->lock, flags);
2274                         return 0;
2275                 }
2276
2277                 list_add_tail(&event_sub->event_list, &ev_file->event_list);
2278                 spin_unlock_irqrestore(&ev_file->lock, flags);
2279                 wake_up_interruptible(&ev_file->poll_wait);
2280                 return 0;
2281         }
2282
2283         event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
2284                              GFP_ATOMIC);
2285         if (!event_data) {
2286                 spin_lock_irqsave(&ev_file->lock, flags);
2287                 ev_file->is_overflow_err = 1;
2288                 spin_unlock_irqrestore(&ev_file->lock, flags);
2289                 return -ENOMEM;
2290         }
2291
2292         event_data->hdr.cookie = event_sub->cookie;
2293         memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
2294
2295         spin_lock_irqsave(&ev_file->lock, flags);
2296         if (!ev_file->is_destroyed)
2297                 list_add_tail(&event_data->list, &ev_file->event_list);
2298         else
2299                 kfree(event_data);
2300         spin_unlock_irqrestore(&ev_file->lock, flags);
2301         wake_up_interruptible(&ev_file->poll_wait);
2302
2303         return 0;
2304 }
2305
2306 static void dispatch_event_fd(struct list_head *fd_list,
2307                               const void *data)
2308 {
2309         struct devx_event_subscription *item;
2310
2311         list_for_each_entry_rcu(item, fd_list, xa_list) {
2312                 if (item->eventfd)
2313                         eventfd_signal(item->eventfd, 1);
2314                 else
2315                         deliver_event(item, data);
2316         }
2317 }
2318
2319 static int devx_event_notifier(struct notifier_block *nb,
2320                                unsigned long event_type, void *data)
2321 {
2322         struct mlx5_devx_event_table *table;
2323         struct mlx5_ib_dev *dev;
2324         struct devx_event *event;
2325         struct devx_obj_event *obj_event;
2326         u16 obj_type = 0;
2327         bool is_unaffiliated;
2328         u32 obj_id;
2329
2330         /* Explicit filtering to kernel events which may occur frequently */
2331         if (event_type == MLX5_EVENT_TYPE_CMD ||
2332             event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
2333                 return NOTIFY_OK;
2334
2335         table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
2336         dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
2337         is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
2338
2339         if (!is_unaffiliated)
2340                 obj_type = get_event_obj_type(event_type, data);
2341
2342         rcu_read_lock();
2343         event = xa_load(&table->event_xa, event_type | (obj_type << 16));
2344         if (!event) {
2345                 rcu_read_unlock();
2346                 return NOTIFY_DONE;
2347         }
2348
2349         if (is_unaffiliated) {
2350                 dispatch_event_fd(&event->unaffiliated_list, data);
2351                 rcu_read_unlock();
2352                 return NOTIFY_OK;
2353         }
2354
2355         obj_id = devx_get_obj_id_from_event(event_type, data);
2356         obj_event = xa_load(&event->object_ids, obj_id);
2357         if (!obj_event) {
2358                 rcu_read_unlock();
2359                 return NOTIFY_DONE;
2360         }
2361
2362         dispatch_event_fd(&obj_event->obj_sub_list, data);
2363
2364         rcu_read_unlock();
2365         return NOTIFY_OK;
2366 }
2367
2368 int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
2369 {
2370         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2371         int uid;
2372
2373         uid = mlx5_ib_devx_create(dev, false);
2374         if (uid > 0) {
2375                 dev->devx_whitelist_uid = uid;
2376                 xa_init(&table->event_xa);
2377                 mutex_init(&table->event_xa_lock);
2378                 MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
2379                 mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
2380         }
2381
2382         return 0;
2383 }
2384
2385 void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
2386 {
2387         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2388         struct devx_event_subscription *sub, *tmp;
2389         struct devx_event *event;
2390         void *entry;
2391         unsigned long id;
2392
2393         if (dev->devx_whitelist_uid) {
2394                 mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
2395                 mutex_lock(&dev->devx_event_table.event_xa_lock);
2396                 xa_for_each(&table->event_xa, id, entry) {
2397                         event = entry;
2398                         list_for_each_entry_safe(
2399                                 sub, tmp, &event->unaffiliated_list, xa_list)
2400                                 devx_cleanup_subscription(dev, sub);
2401                         kfree(entry);
2402                 }
2403                 mutex_unlock(&dev->devx_event_table.event_xa_lock);
2404                 xa_destroy(&table->event_xa);
2405
2406                 mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
2407         }
2408 }
2409
2410 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
2411                                          size_t count, loff_t *pos)
2412 {
2413         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2414         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2415         struct devx_async_data *event;
2416         int ret = 0;
2417         size_t eventsz;
2418
2419         spin_lock_irq(&ev_queue->lock);
2420
2421         while (list_empty(&ev_queue->event_list)) {
2422                 spin_unlock_irq(&ev_queue->lock);
2423
2424                 if (filp->f_flags & O_NONBLOCK)
2425                         return -EAGAIN;
2426
2427                 if (wait_event_interruptible(
2428                             ev_queue->poll_wait,
2429                             (!list_empty(&ev_queue->event_list) ||
2430                              ev_queue->is_destroyed))) {
2431                         return -ERESTARTSYS;
2432                 }
2433
2434                 spin_lock_irq(&ev_queue->lock);
2435                 if (ev_queue->is_destroyed) {
2436                         spin_unlock_irq(&ev_queue->lock);
2437                         return -EIO;
2438                 }
2439         }
2440
2441         event = list_entry(ev_queue->event_list.next,
2442                            struct devx_async_data, list);
2443         eventsz = event->cmd_out_len +
2444                         sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
2445
2446         if (eventsz > count) {
2447                 spin_unlock_irq(&ev_queue->lock);
2448                 return -ENOSPC;
2449         }
2450
2451         list_del(ev_queue->event_list.next);
2452         spin_unlock_irq(&ev_queue->lock);
2453
2454         if (copy_to_user(buf, &event->hdr, eventsz))
2455                 ret = -EFAULT;
2456         else
2457                 ret = eventsz;
2458
2459         atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
2460         kvfree(event);
2461         return ret;
2462 }
2463
2464 static __poll_t devx_async_cmd_event_poll(struct file *filp,
2465                                               struct poll_table_struct *wait)
2466 {
2467         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2468         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2469         __poll_t pollflags = 0;
2470
2471         poll_wait(filp, &ev_queue->poll_wait, wait);
2472
2473         spin_lock_irq(&ev_queue->lock);
2474         if (ev_queue->is_destroyed)
2475                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2476         else if (!list_empty(&ev_queue->event_list))
2477                 pollflags = EPOLLIN | EPOLLRDNORM;
2478         spin_unlock_irq(&ev_queue->lock);
2479
2480         return pollflags;
2481 }
2482
2483 static const struct file_operations devx_async_cmd_event_fops = {
2484         .owner   = THIS_MODULE,
2485         .read    = devx_async_cmd_event_read,
2486         .poll    = devx_async_cmd_event_poll,
2487         .release = uverbs_uobject_fd_release,
2488         .llseek  = no_llseek,
2489 };
2490
2491 static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
2492                                      size_t count, loff_t *pos)
2493 {
2494         struct devx_async_event_file *ev_file = filp->private_data;
2495         struct devx_event_subscription *event_sub;
2496         struct devx_async_event_data *event;
2497         int ret = 0;
2498         size_t eventsz;
2499         bool omit_data;
2500         void *event_data;
2501
2502         omit_data = ev_file->omit_data;
2503
2504         spin_lock_irq(&ev_file->lock);
2505
2506         if (ev_file->is_overflow_err) {
2507                 ev_file->is_overflow_err = 0;
2508                 spin_unlock_irq(&ev_file->lock);
2509                 return -EOVERFLOW;
2510         }
2511
2512
2513         while (list_empty(&ev_file->event_list)) {
2514                 spin_unlock_irq(&ev_file->lock);
2515
2516                 if (filp->f_flags & O_NONBLOCK)
2517                         return -EAGAIN;
2518
2519                 if (wait_event_interruptible(ev_file->poll_wait,
2520                             (!list_empty(&ev_file->event_list) ||
2521                              ev_file->is_destroyed))) {
2522                         return -ERESTARTSYS;
2523                 }
2524
2525                 spin_lock_irq(&ev_file->lock);
2526                 if (ev_file->is_destroyed) {
2527                         spin_unlock_irq(&ev_file->lock);
2528                         return -EIO;
2529                 }
2530         }
2531
2532         if (omit_data) {
2533                 event_sub = list_first_entry(&ev_file->event_list,
2534                                         struct devx_event_subscription,
2535                                         event_list);
2536                 eventsz = sizeof(event_sub->cookie);
2537                 event_data = &event_sub->cookie;
2538         } else {
2539                 event = list_first_entry(&ev_file->event_list,
2540                                       struct devx_async_event_data, list);
2541                 eventsz = sizeof(struct mlx5_eqe) +
2542                         sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
2543                 event_data = &event->hdr;
2544         }
2545
2546         if (eventsz > count) {
2547                 spin_unlock_irq(&ev_file->lock);
2548                 return -EINVAL;
2549         }
2550
2551         if (omit_data)
2552                 list_del_init(&event_sub->event_list);
2553         else
2554                 list_del(&event->list);
2555
2556         spin_unlock_irq(&ev_file->lock);
2557
2558         if (copy_to_user(buf, event_data, eventsz))
2559                 /* This points to an application issue, not a kernel concern */
2560                 ret = -EFAULT;
2561         else
2562                 ret = eventsz;
2563
2564         if (!omit_data)
2565                 kfree(event);
2566         return ret;
2567 }
2568
2569 static __poll_t devx_async_event_poll(struct file *filp,
2570                                       struct poll_table_struct *wait)
2571 {
2572         struct devx_async_event_file *ev_file = filp->private_data;
2573         __poll_t pollflags = 0;
2574
2575         poll_wait(filp, &ev_file->poll_wait, wait);
2576
2577         spin_lock_irq(&ev_file->lock);
2578         if (ev_file->is_destroyed)
2579                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2580         else if (!list_empty(&ev_file->event_list))
2581                 pollflags = EPOLLIN | EPOLLRDNORM;
2582         spin_unlock_irq(&ev_file->lock);
2583
2584         return pollflags;
2585 }
2586
2587 static void devx_free_subscription(struct rcu_head *rcu)
2588 {
2589         struct devx_event_subscription *event_sub =
2590                 container_of(rcu, struct devx_event_subscription, rcu);
2591
2592         if (event_sub->eventfd)
2593                 eventfd_ctx_put(event_sub->eventfd);
2594         uverbs_uobject_put(&event_sub->ev_file->uobj);
2595         kfree(event_sub);
2596 }
2597
2598 static const struct file_operations devx_async_event_fops = {
2599         .owner   = THIS_MODULE,
2600         .read    = devx_async_event_read,
2601         .poll    = devx_async_event_poll,
2602         .release = uverbs_uobject_fd_release,
2603         .llseek  = no_llseek,
2604 };
2605
2606 static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
2607                                              enum rdma_remove_reason why)
2608 {
2609         struct devx_async_cmd_event_file *comp_ev_file =
2610                 container_of(uobj, struct devx_async_cmd_event_file,
2611                              uobj);
2612         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2613         struct devx_async_data *entry, *tmp;
2614
2615         spin_lock_irq(&ev_queue->lock);
2616         ev_queue->is_destroyed = 1;
2617         spin_unlock_irq(&ev_queue->lock);
2618         wake_up_interruptible(&ev_queue->poll_wait);
2619
2620         mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
2621
2622         spin_lock_irq(&comp_ev_file->ev_queue.lock);
2623         list_for_each_entry_safe(entry, tmp,
2624                                  &comp_ev_file->ev_queue.event_list, list) {
2625                 list_del(&entry->list);
2626                 kvfree(entry);
2627         }
2628         spin_unlock_irq(&comp_ev_file->ev_queue.lock);
2629         return 0;
2630 };
2631
2632 static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
2633                                          enum rdma_remove_reason why)
2634 {
2635         struct devx_async_event_file *ev_file =
2636                 container_of(uobj, struct devx_async_event_file,
2637                              uobj);
2638         struct devx_event_subscription *event_sub, *event_sub_tmp;
2639         struct mlx5_ib_dev *dev = ev_file->dev;
2640
2641         spin_lock_irq(&ev_file->lock);
2642         ev_file->is_destroyed = 1;
2643
2644         /* free the pending events allocation */
2645         if (ev_file->omit_data) {
2646                 struct devx_event_subscription *event_sub, *tmp;
2647
2648                 list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
2649                                          event_list)
2650                         list_del_init(&event_sub->event_list);
2651
2652         } else {
2653                 struct devx_async_event_data *entry, *tmp;
2654
2655                 list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
2656                                          list) {
2657                         list_del(&entry->list);
2658                         kfree(entry);
2659                 }
2660         }
2661
2662         spin_unlock_irq(&ev_file->lock);
2663         wake_up_interruptible(&ev_file->poll_wait);
2664
2665         mutex_lock(&dev->devx_event_table.event_xa_lock);
2666         /* delete the subscriptions which are related to this FD */
2667         list_for_each_entry_safe(event_sub, event_sub_tmp,
2668                                  &ev_file->subscribed_events_list, file_list) {
2669                 devx_cleanup_subscription(dev, event_sub);
2670                 list_del_rcu(&event_sub->file_list);
2671                 /* subscription may not be used by the read API any more */
2672                 call_rcu(&event_sub->rcu, devx_free_subscription);
2673         }
2674         mutex_unlock(&dev->devx_event_table.event_xa_lock);
2675
2676         put_device(&dev->ib_dev.dev);
2677         return 0;
2678 };
2679
2680 DECLARE_UVERBS_NAMED_METHOD(
2681         MLX5_IB_METHOD_DEVX_UMEM_REG,
2682         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
2683                         MLX5_IB_OBJECT_DEVX_UMEM,
2684                         UVERBS_ACCESS_NEW,
2685                         UA_MANDATORY),
2686         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
2687                            UVERBS_ATTR_TYPE(u64),
2688                            UA_MANDATORY),
2689         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
2690                            UVERBS_ATTR_TYPE(u64),
2691                            UA_MANDATORY),
2692         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2693                              enum ib_access_flags),
2694         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
2695                             UVERBS_ATTR_TYPE(u32),
2696                             UA_MANDATORY));
2697
2698 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2699         MLX5_IB_METHOD_DEVX_UMEM_DEREG,
2700         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
2701                         MLX5_IB_OBJECT_DEVX_UMEM,
2702                         UVERBS_ACCESS_DESTROY,
2703                         UA_MANDATORY));
2704
2705 DECLARE_UVERBS_NAMED_METHOD(
2706         MLX5_IB_METHOD_DEVX_QUERY_EQN,
2707         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
2708                            UVERBS_ATTR_TYPE(u32),
2709                            UA_MANDATORY),
2710         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
2711                             UVERBS_ATTR_TYPE(u32),
2712                             UA_MANDATORY));
2713
2714 DECLARE_UVERBS_NAMED_METHOD(
2715         MLX5_IB_METHOD_DEVX_QUERY_UAR,
2716         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
2717                            UVERBS_ATTR_TYPE(u32),
2718                            UA_MANDATORY),
2719         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
2720                             UVERBS_ATTR_TYPE(u32),
2721                             UA_MANDATORY));
2722
2723 DECLARE_UVERBS_NAMED_METHOD(
2724         MLX5_IB_METHOD_DEVX_OTHER,
2725         UVERBS_ATTR_PTR_IN(
2726                 MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
2727                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2728                 UA_MANDATORY,
2729                 UA_ALLOC_AND_COPY),
2730         UVERBS_ATTR_PTR_OUT(
2731                 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
2732                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2733                 UA_MANDATORY));
2734
2735 DECLARE_UVERBS_NAMED_METHOD(
2736         MLX5_IB_METHOD_DEVX_OBJ_CREATE,
2737         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
2738                         MLX5_IB_OBJECT_DEVX_OBJ,
2739                         UVERBS_ACCESS_NEW,
2740                         UA_MANDATORY),
2741         UVERBS_ATTR_PTR_IN(
2742                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
2743                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2744                 UA_MANDATORY,
2745                 UA_ALLOC_AND_COPY),
2746         UVERBS_ATTR_PTR_OUT(
2747                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
2748                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2749                 UA_MANDATORY));
2750
2751 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2752         MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
2753         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
2754                         MLX5_IB_OBJECT_DEVX_OBJ,
2755                         UVERBS_ACCESS_DESTROY,
2756                         UA_MANDATORY));
2757
2758 DECLARE_UVERBS_NAMED_METHOD(
2759         MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
2760         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
2761                         UVERBS_IDR_ANY_OBJECT,
2762                         UVERBS_ACCESS_WRITE,
2763                         UA_MANDATORY),
2764         UVERBS_ATTR_PTR_IN(
2765                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
2766                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2767                 UA_MANDATORY,
2768                 UA_ALLOC_AND_COPY),
2769         UVERBS_ATTR_PTR_OUT(
2770                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
2771                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2772                 UA_MANDATORY));
2773
2774 DECLARE_UVERBS_NAMED_METHOD(
2775         MLX5_IB_METHOD_DEVX_OBJ_QUERY,
2776         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2777                         UVERBS_IDR_ANY_OBJECT,
2778                         UVERBS_ACCESS_READ,
2779                         UA_MANDATORY),
2780         UVERBS_ATTR_PTR_IN(
2781                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2782                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2783                 UA_MANDATORY,
2784                 UA_ALLOC_AND_COPY),
2785         UVERBS_ATTR_PTR_OUT(
2786                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
2787                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2788                 UA_MANDATORY));
2789
2790 DECLARE_UVERBS_NAMED_METHOD(
2791         MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
2792         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2793                         UVERBS_IDR_ANY_OBJECT,
2794                         UVERBS_ACCESS_READ,
2795                         UA_MANDATORY),
2796         UVERBS_ATTR_PTR_IN(
2797                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2798                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2799                 UA_MANDATORY,
2800                 UA_ALLOC_AND_COPY),
2801         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
2802                 u16, UA_MANDATORY),
2803         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
2804                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2805                 UVERBS_ACCESS_READ,
2806                 UA_MANDATORY),
2807         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
2808                 UVERBS_ATTR_TYPE(u64),
2809                 UA_MANDATORY));
2810
2811 DECLARE_UVERBS_NAMED_METHOD(
2812         MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
2813         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
2814                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2815                 UVERBS_ACCESS_READ,
2816                 UA_MANDATORY),
2817         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
2818                 MLX5_IB_OBJECT_DEVX_OBJ,
2819                 UVERBS_ACCESS_READ,
2820                 UA_OPTIONAL),
2821         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2822                 UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
2823                 UA_MANDATORY,
2824                 UA_ALLOC_AND_COPY),
2825         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
2826                 UVERBS_ATTR_TYPE(u64),
2827                 UA_OPTIONAL),
2828         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
2829                 UVERBS_ATTR_TYPE(u32),
2830                 UA_OPTIONAL));
2831
2832 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
2833                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
2834                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
2835                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
2836                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
2837
2838 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
2839                             UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
2840                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
2841                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
2842                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
2843                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
2844                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
2845
2846 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
2847                             UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
2848                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
2849                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
2850
2851
2852 DECLARE_UVERBS_NAMED_METHOD(
2853         MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
2854         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
2855                         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2856                         UVERBS_ACCESS_NEW,
2857                         UA_MANDATORY));
2858
2859 DECLARE_UVERBS_NAMED_OBJECT(
2860         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2861         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
2862                              devx_async_cmd_event_destroy_uobj,
2863                              &devx_async_cmd_event_fops, "[devx_async_cmd]",
2864                              O_RDONLY),
2865         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
2866
2867 DECLARE_UVERBS_NAMED_METHOD(
2868         MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
2869         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
2870                         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2871                         UVERBS_ACCESS_NEW,
2872                         UA_MANDATORY),
2873         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
2874                         enum mlx5_ib_uapi_devx_create_event_channel_flags,
2875                         UA_MANDATORY));
2876
2877 DECLARE_UVERBS_NAMED_OBJECT(
2878         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2879         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
2880                              devx_async_event_destroy_uobj,
2881                              &devx_async_event_fops, "[devx_async_event]",
2882                              O_RDONLY),
2883         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
2884
2885 static bool devx_is_supported(struct ib_device *device)
2886 {
2887         struct mlx5_ib_dev *dev = to_mdev(device);
2888
2889         return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
2890 }
2891
2892 const struct uapi_definition mlx5_ib_devx_defs[] = {
2893         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2894                 MLX5_IB_OBJECT_DEVX,
2895                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2896         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2897                 MLX5_IB_OBJECT_DEVX_OBJ,
2898                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2899         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2900                 MLX5_IB_OBJECT_DEVX_UMEM,
2901                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2902         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2903                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2904                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2905         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2906                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2907                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2908         {},
2909 };