GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / infiniband / hw / mlx5 / devx.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4  */
5
6 #include <rdma/ib_user_verbs.h>
7 #include <rdma/ib_verbs.h>
8 #include <rdma/uverbs_types.h>
9 #include <rdma/uverbs_ioctl.h>
10 #include <rdma/mlx5_user_ioctl_cmds.h>
11 #include <rdma/mlx5_user_ioctl_verbs.h>
12 #include <rdma/ib_umem.h>
13 #include <rdma/uverbs_std_types.h>
14 #include <linux/mlx5/driver.h>
15 #include <linux/mlx5/fs.h>
16 #include "mlx5_ib.h"
17 #include "devx.h"
18 #include "qp.h"
19 #include <linux/xarray.h>
20
21 #define UVERBS_MODULE_NAME mlx5_ib
22 #include <rdma/uverbs_named_ioctl.h>
23
24 static void dispatch_event_fd(struct list_head *fd_list, const void *data);
25
26 enum devx_obj_flags {
27         DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
28         DEVX_OBJ_FLAGS_DCT = 1 << 1,
29         DEVX_OBJ_FLAGS_CQ = 1 << 2,
30 };
31
32 struct devx_async_data {
33         struct mlx5_ib_dev *mdev;
34         struct list_head list;
35         struct devx_async_cmd_event_file *ev_file;
36         struct mlx5_async_work cb_work;
37         u16 cmd_out_len;
38         /* must be last field in this structure */
39         struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
40 };
41
42 struct devx_async_event_data {
43         struct list_head list; /* headed in ev_file->event_list */
44         struct mlx5_ib_uapi_devx_async_event_hdr hdr;
45 };
46
47 /* first level XA value data structure */
48 struct devx_event {
49         struct xarray object_ids; /* second XA level, Key = object id */
50         struct list_head unaffiliated_list;
51 };
52
53 /* second level XA value data structure */
54 struct devx_obj_event {
55         struct rcu_head rcu;
56         struct list_head obj_sub_list;
57 };
58
59 struct devx_event_subscription {
60         struct list_head file_list; /* headed in ev_file->
61                                      * subscribed_events_list
62                                      */
63         struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
64                                    * devx_obj_event->obj_sub_list
65                                    */
66         struct list_head obj_list; /* headed in devx_object */
67         struct list_head event_list; /* headed in ev_file->event_list or in
68                                       * temp list via subscription
69                                       */
70
71         u8 is_cleaned:1;
72         u32 xa_key_level1;
73         u32 xa_key_level2;
74         struct rcu_head rcu;
75         u64 cookie;
76         struct devx_async_event_file *ev_file;
77         struct eventfd_ctx *eventfd;
78 };
79
80 struct devx_async_event_file {
81         struct ib_uobject uobj;
82         /* Head of events that are subscribed to this FD */
83         struct list_head subscribed_events_list;
84         spinlock_t lock;
85         wait_queue_head_t poll_wait;
86         struct list_head event_list;
87         struct mlx5_ib_dev *dev;
88         u8 omit_data:1;
89         u8 is_overflow_err:1;
90         u8 is_destroyed:1;
91 };
92
93 struct devx_umem {
94         struct mlx5_core_dev            *mdev;
95         struct ib_umem                  *umem;
96         u32                             page_offset;
97         int                             page_shift;
98         int                             ncont;
99         u32                             dinlen;
100         u32                             dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
101 };
102
103 struct devx_umem_reg_cmd {
104         void                            *in;
105         u32                             inlen;
106         u32                             out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
107 };
108
109 static struct mlx5_ib_ucontext *
110 devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
111 {
112         return to_mucontext(ib_uverbs_get_ucontext(attrs));
113 }
114
115 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
116 {
117         u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
118         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
119         void *uctx;
120         int err;
121         u16 uid;
122         u32 cap = 0;
123
124         /* 0 means not supported */
125         if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
126                 return -EINVAL;
127
128         uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
129         if (is_user && capable(CAP_NET_RAW) &&
130             (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
131                 cap |= MLX5_UCTX_CAP_RAW_TX;
132         if (is_user && capable(CAP_SYS_RAWIO) &&
133             (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
134              MLX5_UCTX_CAP_INTERNAL_DEV_RES))
135                 cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
136
137         MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
138         MLX5_SET(uctx, uctx, cap, cap);
139
140         err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
141         if (err)
142                 return err;
143
144         uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
145         return uid;
146 }
147
148 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
149 {
150         u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
151         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
152
153         MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
154         MLX5_SET(destroy_uctx_in, in, uid, uid);
155
156         mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
157 }
158
159 static bool is_legacy_unaffiliated_event_num(u16 event_num)
160 {
161         switch (event_num) {
162         case MLX5_EVENT_TYPE_PORT_CHANGE:
163                 return true;
164         default:
165                 return false;
166         }
167 }
168
169 static bool is_legacy_obj_event_num(u16 event_num)
170 {
171         switch (event_num) {
172         case MLX5_EVENT_TYPE_PATH_MIG:
173         case MLX5_EVENT_TYPE_COMM_EST:
174         case MLX5_EVENT_TYPE_SQ_DRAINED:
175         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
176         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
177         case MLX5_EVENT_TYPE_CQ_ERROR:
178         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
179         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
180         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
181         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
182         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
183         case MLX5_EVENT_TYPE_DCT_DRAINED:
184         case MLX5_EVENT_TYPE_COMP:
185         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
186         case MLX5_EVENT_TYPE_XRQ_ERROR:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static u16 get_legacy_obj_type(u16 opcode)
194 {
195         switch (opcode) {
196         case MLX5_CMD_OP_CREATE_RQ:
197                 return MLX5_EVENT_QUEUE_TYPE_RQ;
198         case MLX5_CMD_OP_CREATE_QP:
199                 return MLX5_EVENT_QUEUE_TYPE_QP;
200         case MLX5_CMD_OP_CREATE_SQ:
201                 return MLX5_EVENT_QUEUE_TYPE_SQ;
202         case MLX5_CMD_OP_CREATE_DCT:
203                 return MLX5_EVENT_QUEUE_TYPE_DCT;
204         default:
205                 return 0;
206         }
207 }
208
209 static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
210 {
211         u16 opcode;
212
213         opcode = (obj->obj_id >> 32) & 0xffff;
214
215         if (is_legacy_obj_event_num(event_num))
216                 return get_legacy_obj_type(opcode);
217
218         switch (opcode) {
219         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
220                 return (obj->obj_id >> 48);
221         case MLX5_CMD_OP_CREATE_RQ:
222                 return MLX5_OBJ_TYPE_RQ;
223         case MLX5_CMD_OP_CREATE_QP:
224                 return MLX5_OBJ_TYPE_QP;
225         case MLX5_CMD_OP_CREATE_SQ:
226                 return MLX5_OBJ_TYPE_SQ;
227         case MLX5_CMD_OP_CREATE_DCT:
228                 return MLX5_OBJ_TYPE_DCT;
229         case MLX5_CMD_OP_CREATE_TIR:
230                 return MLX5_OBJ_TYPE_TIR;
231         case MLX5_CMD_OP_CREATE_TIS:
232                 return MLX5_OBJ_TYPE_TIS;
233         case MLX5_CMD_OP_CREATE_PSV:
234                 return MLX5_OBJ_TYPE_PSV;
235         case MLX5_OBJ_TYPE_MKEY:
236                 return MLX5_OBJ_TYPE_MKEY;
237         case MLX5_CMD_OP_CREATE_RMP:
238                 return MLX5_OBJ_TYPE_RMP;
239         case MLX5_CMD_OP_CREATE_XRC_SRQ:
240                 return MLX5_OBJ_TYPE_XRC_SRQ;
241         case MLX5_CMD_OP_CREATE_XRQ:
242                 return MLX5_OBJ_TYPE_XRQ;
243         case MLX5_CMD_OP_CREATE_RQT:
244                 return MLX5_OBJ_TYPE_RQT;
245         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
246                 return MLX5_OBJ_TYPE_FLOW_COUNTER;
247         case MLX5_CMD_OP_CREATE_CQ:
248                 return MLX5_OBJ_TYPE_CQ;
249         default:
250                 return 0;
251         }
252 }
253
254 static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
255 {
256         switch (event_type) {
257         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
258         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
259         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
260         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
261         case MLX5_EVENT_TYPE_PATH_MIG:
262         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
263         case MLX5_EVENT_TYPE_COMM_EST:
264         case MLX5_EVENT_TYPE_SQ_DRAINED:
265         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
266         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
267                 return eqe->data.qp_srq.type;
268         case MLX5_EVENT_TYPE_CQ_ERROR:
269         case MLX5_EVENT_TYPE_XRQ_ERROR:
270                 return 0;
271         case MLX5_EVENT_TYPE_DCT_DRAINED:
272         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
273                 return MLX5_EVENT_QUEUE_TYPE_DCT;
274         default:
275                 return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
276         }
277 }
278
279 static u32 get_dec_obj_id(u64 obj_id)
280 {
281         return (obj_id & 0xffffffff);
282 }
283
284 /*
285  * As the obj_id in the firmware is not globally unique the object type
286  * must be considered upon checking for a valid object id.
287  * For that the opcode of the creator command is encoded as part of the obj_id.
288  */
289 static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
290 {
291         return ((u64)opcode << 32) | obj_id;
292 }
293
294 static u64 devx_get_obj_id(const void *in)
295 {
296         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
297         u64 obj_id;
298
299         switch (opcode) {
300         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
301         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
302                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT |
303                                         MLX5_GET(general_obj_in_cmd_hdr, in,
304                                                  obj_type) << 16,
305                                         MLX5_GET(general_obj_in_cmd_hdr, in,
306                                                  obj_id));
307                 break;
308         case MLX5_CMD_OP_QUERY_MKEY:
309                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
310                                         MLX5_GET(query_mkey_in, in,
311                                                  mkey_index));
312                 break;
313         case MLX5_CMD_OP_QUERY_CQ:
314                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
315                                         MLX5_GET(query_cq_in, in, cqn));
316                 break;
317         case MLX5_CMD_OP_MODIFY_CQ:
318                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
319                                         MLX5_GET(modify_cq_in, in, cqn));
320                 break;
321         case MLX5_CMD_OP_QUERY_SQ:
322                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
323                                         MLX5_GET(query_sq_in, in, sqn));
324                 break;
325         case MLX5_CMD_OP_MODIFY_SQ:
326                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
327                                         MLX5_GET(modify_sq_in, in, sqn));
328                 break;
329         case MLX5_CMD_OP_QUERY_RQ:
330                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
331                                         MLX5_GET(query_rq_in, in, rqn));
332                 break;
333         case MLX5_CMD_OP_MODIFY_RQ:
334                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
335                                         MLX5_GET(modify_rq_in, in, rqn));
336                 break;
337         case MLX5_CMD_OP_QUERY_RMP:
338                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
339                                         MLX5_GET(query_rmp_in, in, rmpn));
340                 break;
341         case MLX5_CMD_OP_MODIFY_RMP:
342                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
343                                         MLX5_GET(modify_rmp_in, in, rmpn));
344                 break;
345         case MLX5_CMD_OP_QUERY_RQT:
346                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
347                                         MLX5_GET(query_rqt_in, in, rqtn));
348                 break;
349         case MLX5_CMD_OP_MODIFY_RQT:
350                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
351                                         MLX5_GET(modify_rqt_in, in, rqtn));
352                 break;
353         case MLX5_CMD_OP_QUERY_TIR:
354                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
355                                         MLX5_GET(query_tir_in, in, tirn));
356                 break;
357         case MLX5_CMD_OP_MODIFY_TIR:
358                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
359                                         MLX5_GET(modify_tir_in, in, tirn));
360                 break;
361         case MLX5_CMD_OP_QUERY_TIS:
362                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
363                                         MLX5_GET(query_tis_in, in, tisn));
364                 break;
365         case MLX5_CMD_OP_MODIFY_TIS:
366                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
367                                         MLX5_GET(modify_tis_in, in, tisn));
368                 break;
369         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
370                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
371                                         MLX5_GET(query_flow_table_in, in,
372                                                  table_id));
373                 break;
374         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
375                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
376                                         MLX5_GET(modify_flow_table_in, in,
377                                                  table_id));
378                 break;
379         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
380                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
381                                         MLX5_GET(query_flow_group_in, in,
382                                                  group_id));
383                 break;
384         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
385                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
386                                         MLX5_GET(query_fte_in, in,
387                                                  flow_index));
388                 break;
389         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
390                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
391                                         MLX5_GET(set_fte_in, in, flow_index));
392                 break;
393         case MLX5_CMD_OP_QUERY_Q_COUNTER:
394                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
395                                         MLX5_GET(query_q_counter_in, in,
396                                                  counter_set_id));
397                 break;
398         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
399                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
400                                         MLX5_GET(query_flow_counter_in, in,
401                                                  flow_counter_id));
402                 break;
403         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
404                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
405                                         MLX5_GET(general_obj_in_cmd_hdr, in,
406                                                  obj_id));
407                 break;
408         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
409                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
410                                         MLX5_GET(query_scheduling_element_in,
411                                                  in, scheduling_element_id));
412                 break;
413         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
414                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
415                                         MLX5_GET(modify_scheduling_element_in,
416                                                  in, scheduling_element_id));
417                 break;
418         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
419                 obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
420                                         MLX5_GET(add_vxlan_udp_dport_in, in,
421                                                  vxlan_udp_port));
422                 break;
423         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
424                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
425                                         MLX5_GET(query_l2_table_entry_in, in,
426                                                  table_index));
427                 break;
428         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
429                 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
430                                         MLX5_GET(set_l2_table_entry_in, in,
431                                                  table_index));
432                 break;
433         case MLX5_CMD_OP_QUERY_QP:
434                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
435                                         MLX5_GET(query_qp_in, in, qpn));
436                 break;
437         case MLX5_CMD_OP_RST2INIT_QP:
438                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
439                                         MLX5_GET(rst2init_qp_in, in, qpn));
440                 break;
441         case MLX5_CMD_OP_INIT2INIT_QP:
442                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
443                                         MLX5_GET(init2init_qp_in, in, qpn));
444                 break;
445         case MLX5_CMD_OP_INIT2RTR_QP:
446                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
447                                         MLX5_GET(init2rtr_qp_in, in, qpn));
448                 break;
449         case MLX5_CMD_OP_RTR2RTS_QP:
450                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
451                                         MLX5_GET(rtr2rts_qp_in, in, qpn));
452                 break;
453         case MLX5_CMD_OP_RTS2RTS_QP:
454                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
455                                         MLX5_GET(rts2rts_qp_in, in, qpn));
456                 break;
457         case MLX5_CMD_OP_SQERR2RTS_QP:
458                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
459                                         MLX5_GET(sqerr2rts_qp_in, in, qpn));
460                 break;
461         case MLX5_CMD_OP_2ERR_QP:
462                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
463                                         MLX5_GET(qp_2err_in, in, qpn));
464                 break;
465         case MLX5_CMD_OP_2RST_QP:
466                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
467                                         MLX5_GET(qp_2rst_in, in, qpn));
468                 break;
469         case MLX5_CMD_OP_QUERY_DCT:
470                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
471                                         MLX5_GET(query_dct_in, in, dctn));
472                 break;
473         case MLX5_CMD_OP_QUERY_XRQ:
474         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
475         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
476                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
477                                         MLX5_GET(query_xrq_in, in, xrqn));
478                 break;
479         case MLX5_CMD_OP_QUERY_XRC_SRQ:
480                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
481                                         MLX5_GET(query_xrc_srq_in, in,
482                                                  xrc_srqn));
483                 break;
484         case MLX5_CMD_OP_ARM_XRC_SRQ:
485                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
486                                         MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
487                 break;
488         case MLX5_CMD_OP_QUERY_SRQ:
489                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
490                                         MLX5_GET(query_srq_in, in, srqn));
491                 break;
492         case MLX5_CMD_OP_ARM_RQ:
493                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
494                                         MLX5_GET(arm_rq_in, in, srq_number));
495                 break;
496         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
497                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
498                                         MLX5_GET(drain_dct_in, in, dctn));
499                 break;
500         case MLX5_CMD_OP_ARM_XRQ:
501         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
502         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
503         case MLX5_CMD_OP_MODIFY_XRQ:
504                 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
505                                         MLX5_GET(arm_xrq_in, in, xrqn));
506                 break;
507         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
508                 obj_id = get_enc_obj_id
509                                 (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
510                                  MLX5_GET(query_packet_reformat_context_in,
511                                           in, packet_reformat_id));
512                 break;
513         default:
514                 obj_id = 0;
515         }
516
517         return obj_id;
518 }
519
520 static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
521                                  struct ib_uobject *uobj, const void *in)
522 {
523         struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
524         u64 obj_id = devx_get_obj_id(in);
525
526         if (!obj_id)
527                 return false;
528
529         switch (uobj_get_object_id(uobj)) {
530         case UVERBS_OBJECT_CQ:
531                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
532                                       to_mcq(uobj->object)->mcq.cqn) ==
533                                       obj_id;
534
535         case UVERBS_OBJECT_SRQ:
536         {
537                 struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
538                 u16 opcode;
539
540                 switch (srq->common.res) {
541                 case MLX5_RES_XSRQ:
542                         opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
543                         break;
544                 case MLX5_RES_XRQ:
545                         opcode = MLX5_CMD_OP_CREATE_XRQ;
546                         break;
547                 default:
548                         if (!dev->mdev->issi)
549                                 opcode = MLX5_CMD_OP_CREATE_SRQ;
550                         else
551                                 opcode = MLX5_CMD_OP_CREATE_RMP;
552                 }
553
554                 return get_enc_obj_id(opcode,
555                                       to_msrq(uobj->object)->msrq.srqn) ==
556                                       obj_id;
557         }
558
559         case UVERBS_OBJECT_QP:
560         {
561                 struct mlx5_ib_qp *qp = to_mqp(uobj->object);
562
563                 if (qp->type == IB_QPT_RAW_PACKET ||
564                     (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
565                         struct mlx5_ib_raw_packet_qp *raw_packet_qp =
566                                                          &qp->raw_packet_qp;
567                         struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
568                         struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
569
570                         return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
571                                                rq->base.mqp.qpn) == obj_id ||
572                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
573                                                sq->base.mqp.qpn) == obj_id ||
574                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
575                                                rq->tirn) == obj_id ||
576                                 get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
577                                                sq->tisn) == obj_id);
578                 }
579
580                 if (qp->type == MLX5_IB_QPT_DCT)
581                         return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
582                                               qp->dct.mdct.mqp.qpn) == obj_id;
583                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
584                                       qp->ibqp.qp_num) == obj_id;
585         }
586
587         case UVERBS_OBJECT_WQ:
588                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
589                                       to_mrwq(uobj->object)->core_qp.qpn) ==
590                                       obj_id;
591
592         case UVERBS_OBJECT_RWQ_IND_TBL:
593                 return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
594                                       to_mrwq_ind_table(uobj->object)->rqtn) ==
595                                       obj_id;
596
597         case MLX5_IB_OBJECT_DEVX_OBJ:
598         {
599                 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
600                 struct devx_obj *devx_uobj = uobj->object;
601
602                 if (opcode == MLX5_CMD_OP_QUERY_FLOW_COUNTER &&
603                     devx_uobj->flow_counter_bulk_size) {
604                         u64 end;
605
606                         end = devx_uobj->obj_id +
607                                 devx_uobj->flow_counter_bulk_size;
608                         return devx_uobj->obj_id <= obj_id && end > obj_id;
609                 }
610
611                 return devx_uobj->obj_id == obj_id;
612         }
613
614         default:
615                 return false;
616         }
617 }
618
619 static void devx_set_umem_valid(const void *in)
620 {
621         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
622
623         switch (opcode) {
624         case MLX5_CMD_OP_CREATE_MKEY:
625                 MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
626                 break;
627         case MLX5_CMD_OP_CREATE_CQ:
628         {
629                 void *cqc;
630
631                 MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
632                 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
633                 MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
634                 break;
635         }
636         case MLX5_CMD_OP_CREATE_QP:
637         {
638                 void *qpc;
639
640                 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
641                 MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
642                 MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
643                 break;
644         }
645
646         case MLX5_CMD_OP_CREATE_RQ:
647         {
648                 void *rqc, *wq;
649
650                 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
651                 wq  = MLX5_ADDR_OF(rqc, rqc, wq);
652                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
653                 MLX5_SET(wq, wq, wq_umem_valid, 1);
654                 break;
655         }
656
657         case MLX5_CMD_OP_CREATE_SQ:
658         {
659                 void *sqc, *wq;
660
661                 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
662                 wq = MLX5_ADDR_OF(sqc, sqc, wq);
663                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
664                 MLX5_SET(wq, wq, wq_umem_valid, 1);
665                 break;
666         }
667
668         case MLX5_CMD_OP_MODIFY_CQ:
669                 MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
670                 break;
671
672         case MLX5_CMD_OP_CREATE_RMP:
673         {
674                 void *rmpc, *wq;
675
676                 rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
677                 wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
678                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
679                 MLX5_SET(wq, wq, wq_umem_valid, 1);
680                 break;
681         }
682
683         case MLX5_CMD_OP_CREATE_XRQ:
684         {
685                 void *xrqc, *wq;
686
687                 xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
688                 wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
689                 MLX5_SET(wq, wq, dbr_umem_valid, 1);
690                 MLX5_SET(wq, wq, wq_umem_valid, 1);
691                 break;
692         }
693
694         case MLX5_CMD_OP_CREATE_XRC_SRQ:
695         {
696                 void *xrc_srqc;
697
698                 MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
699                 xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
700                                         xrc_srq_context_entry);
701                 MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
702                 break;
703         }
704
705         default:
706                 return;
707         }
708 }
709
710 static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
711 {
712         *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
713
714         switch (*opcode) {
715         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
716         case MLX5_CMD_OP_CREATE_MKEY:
717         case MLX5_CMD_OP_CREATE_CQ:
718         case MLX5_CMD_OP_ALLOC_PD:
719         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
720         case MLX5_CMD_OP_CREATE_RMP:
721         case MLX5_CMD_OP_CREATE_SQ:
722         case MLX5_CMD_OP_CREATE_RQ:
723         case MLX5_CMD_OP_CREATE_RQT:
724         case MLX5_CMD_OP_CREATE_TIR:
725         case MLX5_CMD_OP_CREATE_TIS:
726         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
727         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
728         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
729         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
730         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
731         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
732         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
733         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
734         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
735         case MLX5_CMD_OP_CREATE_QP:
736         case MLX5_CMD_OP_CREATE_SRQ:
737         case MLX5_CMD_OP_CREATE_XRC_SRQ:
738         case MLX5_CMD_OP_CREATE_DCT:
739         case MLX5_CMD_OP_CREATE_XRQ:
740         case MLX5_CMD_OP_ATTACH_TO_MCG:
741         case MLX5_CMD_OP_ALLOC_XRCD:
742                 return true;
743         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
744         {
745                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
746                 if (op_mod == 0)
747                         return true;
748                 return false;
749         }
750         case MLX5_CMD_OP_CREATE_PSV:
751         {
752                 u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
753
754                 if (num_psv == 1)
755                         return true;
756                 return false;
757         }
758         default:
759                 return false;
760         }
761 }
762
763 static bool devx_is_obj_modify_cmd(const void *in)
764 {
765         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
766
767         switch (opcode) {
768         case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
769         case MLX5_CMD_OP_MODIFY_CQ:
770         case MLX5_CMD_OP_MODIFY_RMP:
771         case MLX5_CMD_OP_MODIFY_SQ:
772         case MLX5_CMD_OP_MODIFY_RQ:
773         case MLX5_CMD_OP_MODIFY_RQT:
774         case MLX5_CMD_OP_MODIFY_TIR:
775         case MLX5_CMD_OP_MODIFY_TIS:
776         case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
777         case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
778         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
779         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
780         case MLX5_CMD_OP_RST2INIT_QP:
781         case MLX5_CMD_OP_INIT2RTR_QP:
782         case MLX5_CMD_OP_INIT2INIT_QP:
783         case MLX5_CMD_OP_RTR2RTS_QP:
784         case MLX5_CMD_OP_RTS2RTS_QP:
785         case MLX5_CMD_OP_SQERR2RTS_QP:
786         case MLX5_CMD_OP_2ERR_QP:
787         case MLX5_CMD_OP_2RST_QP:
788         case MLX5_CMD_OP_ARM_XRC_SRQ:
789         case MLX5_CMD_OP_ARM_RQ:
790         case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
791         case MLX5_CMD_OP_ARM_XRQ:
792         case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
793         case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
794         case MLX5_CMD_OP_MODIFY_XRQ:
795                 return true;
796         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
797         {
798                 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
799
800                 if (op_mod == 1)
801                         return true;
802                 return false;
803         }
804         default:
805                 return false;
806         }
807 }
808
809 static bool devx_is_obj_query_cmd(const void *in)
810 {
811         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
812
813         switch (opcode) {
814         case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
815         case MLX5_CMD_OP_QUERY_MKEY:
816         case MLX5_CMD_OP_QUERY_CQ:
817         case MLX5_CMD_OP_QUERY_RMP:
818         case MLX5_CMD_OP_QUERY_SQ:
819         case MLX5_CMD_OP_QUERY_RQ:
820         case MLX5_CMD_OP_QUERY_RQT:
821         case MLX5_CMD_OP_QUERY_TIR:
822         case MLX5_CMD_OP_QUERY_TIS:
823         case MLX5_CMD_OP_QUERY_Q_COUNTER:
824         case MLX5_CMD_OP_QUERY_FLOW_TABLE:
825         case MLX5_CMD_OP_QUERY_FLOW_GROUP:
826         case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
827         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
828         case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
829         case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
830         case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
831         case MLX5_CMD_OP_QUERY_QP:
832         case MLX5_CMD_OP_QUERY_SRQ:
833         case MLX5_CMD_OP_QUERY_XRC_SRQ:
834         case MLX5_CMD_OP_QUERY_DCT:
835         case MLX5_CMD_OP_QUERY_XRQ:
836         case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
837         case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
838         case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
839                 return true;
840         default:
841                 return false;
842         }
843 }
844
845 static bool devx_is_whitelist_cmd(void *in)
846 {
847         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
848
849         switch (opcode) {
850         case MLX5_CMD_OP_QUERY_HCA_CAP:
851         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
852         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
853                 return true;
854         default:
855                 return false;
856         }
857 }
858
859 static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
860 {
861         if (devx_is_whitelist_cmd(cmd_in)) {
862                 struct mlx5_ib_dev *dev;
863
864                 if (c->devx_uid)
865                         return c->devx_uid;
866
867                 dev = to_mdev(c->ibucontext.device);
868                 if (dev->devx_whitelist_uid)
869                         return dev->devx_whitelist_uid;
870
871                 return -EOPNOTSUPP;
872         }
873
874         if (!c->devx_uid)
875                 return -EINVAL;
876
877         return c->devx_uid;
878 }
879
880 static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
881 {
882         u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
883
884         /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
885         if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
886              MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
887             (opcode >= MLX5_CMD_OP_GENERAL_START &&
888              opcode < MLX5_CMD_OP_GENERAL_END))
889                 return true;
890
891         switch (opcode) {
892         case MLX5_CMD_OP_QUERY_HCA_CAP:
893         case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
894         case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
895         case MLX5_CMD_OP_QUERY_VPORT_STATE:
896         case MLX5_CMD_OP_QUERY_ADAPTER:
897         case MLX5_CMD_OP_QUERY_ISSI:
898         case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
899         case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
900         case MLX5_CMD_OP_QUERY_VNIC_ENV:
901         case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
902         case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
903         case MLX5_CMD_OP_NOP:
904         case MLX5_CMD_OP_QUERY_CONG_STATUS:
905         case MLX5_CMD_OP_QUERY_CONG_PARAMS:
906         case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
907         case MLX5_CMD_OP_QUERY_LAG:
908                 return true;
909         default:
910                 return false;
911         }
912 }
913
914 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
915         struct uverbs_attr_bundle *attrs)
916 {
917         struct mlx5_ib_ucontext *c;
918         struct mlx5_ib_dev *dev;
919         int user_vector;
920         int dev_eqn;
921         int err;
922
923         if (uverbs_copy_from(&user_vector, attrs,
924                              MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
925                 return -EFAULT;
926
927         c = devx_ufile2uctx(attrs);
928         if (IS_ERR(c))
929                 return PTR_ERR(c);
930         dev = to_mdev(c->ibucontext.device);
931
932         err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn);
933         if (err < 0)
934                 return err;
935
936         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
937                            &dev_eqn, sizeof(dev_eqn)))
938                 return -EFAULT;
939
940         return 0;
941 }
942
943 /*
944  *Security note:
945  * The hardware protection mechanism works like this: Each device object that
946  * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
947  * the device specification manual) upon its creation. Then upon doorbell,
948  * hardware fetches the object context for which the doorbell was rang, and
949  * validates that the UAR through which the DB was rang matches the UAR ID
950  * of the object.
951  * If no match the doorbell is silently ignored by the hardware. Of course,
952  * the user cannot ring a doorbell on a UAR that was not mapped to it.
953  * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
954  * mailboxes (except tagging them with UID), we expose to the user its UAR
955  * ID, so it can embed it in these objects in the expected specification
956  * format. So the only thing the user can do is hurt itself by creating a
957  * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
958  * may ring a doorbell on its objects.
959  * The consequence of that will be that another user can schedule a QP/SQ
960  * of the buggy user for execution (just insert it to the hardware schedule
961  * queue or arm its CQ for event generation), no further harm is expected.
962  */
963 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
964         struct uverbs_attr_bundle *attrs)
965 {
966         struct mlx5_ib_ucontext *c;
967         struct mlx5_ib_dev *dev;
968         u32 user_idx;
969         s32 dev_idx;
970
971         c = devx_ufile2uctx(attrs);
972         if (IS_ERR(c))
973                 return PTR_ERR(c);
974         dev = to_mdev(c->ibucontext.device);
975
976         if (uverbs_copy_from(&user_idx, attrs,
977                              MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
978                 return -EFAULT;
979
980         dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
981         if (dev_idx < 0)
982                 return dev_idx;
983
984         if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
985                            &dev_idx, sizeof(dev_idx)))
986                 return -EFAULT;
987
988         return 0;
989 }
990
991 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
992         struct uverbs_attr_bundle *attrs)
993 {
994         struct mlx5_ib_ucontext *c;
995         struct mlx5_ib_dev *dev;
996         void *cmd_in = uverbs_attr_get_alloced_ptr(
997                 attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
998         int cmd_out_len = uverbs_attr_get_len(attrs,
999                                         MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
1000         void *cmd_out;
1001         int err;
1002         int uid;
1003
1004         c = devx_ufile2uctx(attrs);
1005         if (IS_ERR(c))
1006                 return PTR_ERR(c);
1007         dev = to_mdev(c->ibucontext.device);
1008
1009         uid = devx_get_uid(c, cmd_in);
1010         if (uid < 0)
1011                 return uid;
1012
1013         /* Only white list of some general HCA commands are allowed for this method. */
1014         if (!devx_is_general_cmd(cmd_in, dev))
1015                 return -EINVAL;
1016
1017         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1018         if (IS_ERR(cmd_out))
1019                 return PTR_ERR(cmd_out);
1020
1021         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1022         err = mlx5_cmd_exec(dev->mdev, cmd_in,
1023                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
1024                             cmd_out, cmd_out_len);
1025         if (err)
1026                 return err;
1027
1028         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
1029                               cmd_out_len);
1030 }
1031
1032 static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
1033                                        u32 *dinlen,
1034                                        u32 *obj_id)
1035 {
1036         u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
1037         u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
1038
1039         *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1040         *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
1041
1042         MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
1043         MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
1044
1045         switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
1046         case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
1047                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1048                 MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
1049                 break;
1050
1051         case MLX5_CMD_OP_CREATE_UMEM:
1052                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1053                          MLX5_CMD_OP_DESTROY_UMEM);
1054                 break;
1055         case MLX5_CMD_OP_CREATE_MKEY:
1056                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
1057                 break;
1058         case MLX5_CMD_OP_CREATE_CQ:
1059                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
1060                 break;
1061         case MLX5_CMD_OP_ALLOC_PD:
1062                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
1063                 break;
1064         case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
1065                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1066                          MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
1067                 break;
1068         case MLX5_CMD_OP_CREATE_RMP:
1069                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
1070                 break;
1071         case MLX5_CMD_OP_CREATE_SQ:
1072                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
1073                 break;
1074         case MLX5_CMD_OP_CREATE_RQ:
1075                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
1076                 break;
1077         case MLX5_CMD_OP_CREATE_RQT:
1078                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
1079                 break;
1080         case MLX5_CMD_OP_CREATE_TIR:
1081                 *obj_id = MLX5_GET(create_tir_out, out, tirn);
1082                 MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
1083                 MLX5_SET(destroy_tir_in, din, tirn, *obj_id);
1084                 break;
1085         case MLX5_CMD_OP_CREATE_TIS:
1086                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
1087                 break;
1088         case MLX5_CMD_OP_ALLOC_Q_COUNTER:
1089                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1090                          MLX5_CMD_OP_DEALLOC_Q_COUNTER);
1091                 break;
1092         case MLX5_CMD_OP_CREATE_FLOW_TABLE:
1093                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
1094                 *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
1095                 MLX5_SET(destroy_flow_table_in, din, other_vport,
1096                          MLX5_GET(create_flow_table_in,  in, other_vport));
1097                 MLX5_SET(destroy_flow_table_in, din, vport_number,
1098                          MLX5_GET(create_flow_table_in,  in, vport_number));
1099                 MLX5_SET(destroy_flow_table_in, din, table_type,
1100                          MLX5_GET(create_flow_table_in,  in, table_type));
1101                 MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
1102                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1103                          MLX5_CMD_OP_DESTROY_FLOW_TABLE);
1104                 break;
1105         case MLX5_CMD_OP_CREATE_FLOW_GROUP:
1106                 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
1107                 *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
1108                 MLX5_SET(destroy_flow_group_in, din, other_vport,
1109                          MLX5_GET(create_flow_group_in, in, other_vport));
1110                 MLX5_SET(destroy_flow_group_in, din, vport_number,
1111                          MLX5_GET(create_flow_group_in, in, vport_number));
1112                 MLX5_SET(destroy_flow_group_in, din, table_type,
1113                          MLX5_GET(create_flow_group_in, in, table_type));
1114                 MLX5_SET(destroy_flow_group_in, din, table_id,
1115                          MLX5_GET(create_flow_group_in, in, table_id));
1116                 MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
1117                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1118                          MLX5_CMD_OP_DESTROY_FLOW_GROUP);
1119                 break;
1120         case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
1121                 *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
1122                 *obj_id = MLX5_GET(set_fte_in, in, flow_index);
1123                 MLX5_SET(delete_fte_in, din, other_vport,
1124                          MLX5_GET(set_fte_in,  in, other_vport));
1125                 MLX5_SET(delete_fte_in, din, vport_number,
1126                          MLX5_GET(set_fte_in, in, vport_number));
1127                 MLX5_SET(delete_fte_in, din, table_type,
1128                          MLX5_GET(set_fte_in, in, table_type));
1129                 MLX5_SET(delete_fte_in, din, table_id,
1130                          MLX5_GET(set_fte_in, in, table_id));
1131                 MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
1132                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1133                          MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
1134                 break;
1135         case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
1136                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1137                          MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
1138                 break;
1139         case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
1140                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1141                          MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
1142                 break;
1143         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
1144                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1145                          MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
1146                 break;
1147         case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
1148                 *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
1149                 *obj_id = MLX5_GET(create_scheduling_element_out, out,
1150                                    scheduling_element_id);
1151                 MLX5_SET(destroy_scheduling_element_in, din,
1152                          scheduling_hierarchy,
1153                          MLX5_GET(create_scheduling_element_in, in,
1154                                   scheduling_hierarchy));
1155                 MLX5_SET(destroy_scheduling_element_in, din,
1156                          scheduling_element_id, *obj_id);
1157                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1158                          MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
1159                 break;
1160         case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
1161                 *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
1162                 *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
1163                 MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
1164                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1165                          MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
1166                 break;
1167         case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
1168                 *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
1169                 *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
1170                 MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
1171                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1172                          MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
1173                 break;
1174         case MLX5_CMD_OP_CREATE_QP:
1175                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
1176                 break;
1177         case MLX5_CMD_OP_CREATE_SRQ:
1178                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
1179                 break;
1180         case MLX5_CMD_OP_CREATE_XRC_SRQ:
1181                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1182                          MLX5_CMD_OP_DESTROY_XRC_SRQ);
1183                 break;
1184         case MLX5_CMD_OP_CREATE_DCT:
1185                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
1186                 break;
1187         case MLX5_CMD_OP_CREATE_XRQ:
1188                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
1189                 break;
1190         case MLX5_CMD_OP_ATTACH_TO_MCG:
1191                 *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
1192                 MLX5_SET(detach_from_mcg_in, din, qpn,
1193                          MLX5_GET(attach_to_mcg_in, in, qpn));
1194                 memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
1195                        MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
1196                        MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
1197                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
1198                 break;
1199         case MLX5_CMD_OP_ALLOC_XRCD:
1200                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
1201                 break;
1202         case MLX5_CMD_OP_CREATE_PSV:
1203                 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1204                          MLX5_CMD_OP_DESTROY_PSV);
1205                 MLX5_SET(destroy_psv_in, din, psvn,
1206                          MLX5_GET(create_psv_out, out, psv0_index));
1207                 break;
1208         default:
1209                 /* The entry must match to one of the devx_is_obj_create_cmd */
1210                 WARN_ON(true);
1211                 break;
1212         }
1213 }
1214
1215 static int devx_handle_mkey_indirect(struct devx_obj *obj,
1216                                      struct mlx5_ib_dev *dev,
1217                                      void *in, void *out)
1218 {
1219         struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
1220         struct mlx5_core_mkey *mkey;
1221         void *mkc;
1222         u8 key;
1223
1224         mkey = &devx_mr->mmkey;
1225         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1226         key = MLX5_GET(mkc, mkc, mkey_7_0);
1227         mkey->key = mlx5_idx_to_mkey(
1228                         MLX5_GET(create_mkey_out, out, mkey_index)) | key;
1229         mkey->type = MLX5_MKEY_INDIRECT_DEVX;
1230         mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
1231         mkey->size = MLX5_GET64(mkc, mkc, len);
1232         mkey->pd = MLX5_GET(mkc, mkc, pd);
1233         devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
1234
1235         return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey,
1236                                GFP_KERNEL));
1237 }
1238
1239 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
1240                                    struct devx_obj *obj,
1241                                    void *in, int in_len)
1242 {
1243         int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
1244                         MLX5_FLD_SZ_BYTES(create_mkey_in,
1245                         memory_key_mkey_entry);
1246         void *mkc;
1247         u8 access_mode;
1248
1249         if (in_len < min_len)
1250                 return -EINVAL;
1251
1252         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1253
1254         access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
1255         access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
1256
1257         if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
1258                 access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
1259                 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1260                         obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
1261                 return 0;
1262         }
1263
1264         MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1265         return 0;
1266 }
1267
1268 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
1269                                       struct devx_event_subscription *sub)
1270 {
1271         struct devx_event *event;
1272         struct devx_obj_event *xa_val_level2;
1273
1274         if (sub->is_cleaned)
1275                 return;
1276
1277         sub->is_cleaned = 1;
1278         list_del_rcu(&sub->xa_list);
1279
1280         if (list_empty(&sub->obj_list))
1281                 return;
1282
1283         list_del_rcu(&sub->obj_list);
1284         /* check whether key level 1 for this obj_sub_list is empty */
1285         event = xa_load(&dev->devx_event_table.event_xa,
1286                         sub->xa_key_level1);
1287         WARN_ON(!event);
1288
1289         xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
1290         if (list_empty(&xa_val_level2->obj_sub_list)) {
1291                 xa_erase(&event->object_ids,
1292                          sub->xa_key_level2);
1293                 kfree_rcu(xa_val_level2, rcu);
1294         }
1295 }
1296
1297 static int devx_obj_cleanup(struct ib_uobject *uobject,
1298                             enum rdma_remove_reason why,
1299                             struct uverbs_attr_bundle *attrs)
1300 {
1301         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1302         struct mlx5_devx_event_table *devx_event_table;
1303         struct devx_obj *obj = uobject->object;
1304         struct devx_event_subscription *sub_entry, *tmp;
1305         struct mlx5_ib_dev *dev;
1306         int ret;
1307
1308         dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1309         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1310                 /*
1311                  * The pagefault_single_data_segment() does commands against
1312                  * the mmkey, we must wait for that to stop before freeing the
1313                  * mkey, as another allocation could get the same mkey #.
1314                  */
1315                 xa_erase(&obj->ib_dev->odp_mkeys,
1316                          mlx5_base_mkey(obj->devx_mr.mmkey.key));
1317                 synchronize_srcu(&dev->odp_srcu);
1318         }
1319
1320         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1321                 ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
1322         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1323                 ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1324         else
1325                 ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
1326                                     obj->dinlen, out, sizeof(out));
1327         if (ib_is_destroy_retryable(ret, why, uobject))
1328                 return ret;
1329
1330         devx_event_table = &dev->devx_event_table;
1331
1332         mutex_lock(&devx_event_table->event_xa_lock);
1333         list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
1334                 devx_cleanup_subscription(dev, sub_entry);
1335         mutex_unlock(&devx_event_table->event_xa_lock);
1336
1337         kfree(obj);
1338         return ret;
1339 }
1340
1341 static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
1342 {
1343         struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
1344         struct mlx5_devx_event_table *table;
1345         struct devx_event *event;
1346         struct devx_obj_event *obj_event;
1347         u32 obj_id = mcq->cqn;
1348
1349         table = &obj->ib_dev->devx_event_table;
1350         rcu_read_lock();
1351         event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
1352         if (!event)
1353                 goto out;
1354
1355         obj_event = xa_load(&event->object_ids, obj_id);
1356         if (!obj_event)
1357                 goto out;
1358
1359         dispatch_event_fd(&obj_event->obj_sub_list, eqe);
1360 out:
1361         rcu_read_unlock();
1362 }
1363
1364 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
1365         struct uverbs_attr_bundle *attrs)
1366 {
1367         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1368         int cmd_out_len =  uverbs_attr_get_len(attrs,
1369                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
1370         int cmd_in_len = uverbs_attr_get_len(attrs,
1371                                         MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1372         void *cmd_out;
1373         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1374                 attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
1375         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1376                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1377         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1378         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1379         struct devx_obj *obj;
1380         u16 obj_type = 0;
1381         int err;
1382         int uid;
1383         u32 obj_id;
1384         u16 opcode;
1385
1386         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1387                 return -EINVAL;
1388
1389         uid = devx_get_uid(c, cmd_in);
1390         if (uid < 0)
1391                 return uid;
1392
1393         if (!devx_is_obj_create_cmd(cmd_in, &opcode))
1394                 return -EINVAL;
1395
1396         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1397         if (IS_ERR(cmd_out))
1398                 return PTR_ERR(cmd_out);
1399
1400         obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
1401         if (!obj)
1402                 return -ENOMEM;
1403
1404         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1405         if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
1406                 err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
1407                 if (err)
1408                         goto obj_free;
1409         } else {
1410                 devx_set_umem_valid(cmd_in);
1411         }
1412
1413         if (opcode == MLX5_CMD_OP_CREATE_DCT) {
1414                 obj->flags |= DEVX_OBJ_FLAGS_DCT;
1415                 err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
1416                                            cmd_in_len, cmd_out, cmd_out_len);
1417         } else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
1418                 obj->flags |= DEVX_OBJ_FLAGS_CQ;
1419                 obj->core_cq.comp = devx_cq_comp;
1420                 err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
1421                                           cmd_in, cmd_in_len, cmd_out,
1422                                           cmd_out_len);
1423         } else {
1424                 err = mlx5_cmd_exec(dev->mdev, cmd_in,
1425                                     cmd_in_len,
1426                                     cmd_out, cmd_out_len);
1427         }
1428
1429         if (err)
1430                 goto obj_free;
1431
1432         if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
1433                 u32 bulk = MLX5_GET(alloc_flow_counter_in,
1434                                     cmd_in,
1435                                     flow_counter_bulk_log_size);
1436
1437                 if (bulk)
1438                         bulk = 1 << bulk;
1439                 else
1440                         bulk = 128UL * MLX5_GET(alloc_flow_counter_in,
1441                                                 cmd_in,
1442                                                 flow_counter_bulk);
1443                 obj->flow_counter_bulk_size = bulk;
1444         }
1445
1446         uobj->object = obj;
1447         INIT_LIST_HEAD(&obj->event_sub);
1448         obj->ib_dev = dev;
1449         devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
1450                                    &obj_id);
1451         WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
1452
1453         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
1454         if (err)
1455                 goto obj_destroy;
1456
1457         if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
1458                 obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
1459         obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
1460
1461         if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
1462                 err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
1463                 if (err)
1464                         goto obj_destroy;
1465         }
1466         return 0;
1467
1468 obj_destroy:
1469         if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1470                 mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
1471         else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1472                 mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1473         else
1474                 mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
1475                               sizeof(out));
1476 obj_free:
1477         kfree(obj);
1478         return err;
1479 }
1480
1481 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
1482         struct uverbs_attr_bundle *attrs)
1483 {
1484         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
1485         int cmd_out_len = uverbs_attr_get_len(attrs,
1486                                         MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
1487         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1488                                                           MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
1489         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1490                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1491         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1492         void *cmd_out;
1493         int err;
1494         int uid;
1495
1496         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1497                 return -EINVAL;
1498
1499         uid = devx_get_uid(c, cmd_in);
1500         if (uid < 0)
1501                 return uid;
1502
1503         if (!devx_is_obj_modify_cmd(cmd_in))
1504                 return -EINVAL;
1505
1506         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1507                 return -EINVAL;
1508
1509         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1510         if (IS_ERR(cmd_out))
1511                 return PTR_ERR(cmd_out);
1512
1513         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1514         devx_set_umem_valid(cmd_in);
1515
1516         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1517                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
1518                             cmd_out, cmd_out_len);
1519         if (err)
1520                 return err;
1521
1522         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1523                               cmd_out, cmd_out_len);
1524 }
1525
1526 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
1527         struct uverbs_attr_bundle *attrs)
1528 {
1529         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
1530         int cmd_out_len = uverbs_attr_get_len(attrs,
1531                                               MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
1532         struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1533                                                           MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
1534         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1535                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1536         void *cmd_out;
1537         int err;
1538         int uid;
1539         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1540
1541         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1542                 return -EINVAL;
1543
1544         uid = devx_get_uid(c, cmd_in);
1545         if (uid < 0)
1546                 return uid;
1547
1548         if (!devx_is_obj_query_cmd(cmd_in))
1549                 return -EINVAL;
1550
1551         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1552                 return -EINVAL;
1553
1554         cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1555         if (IS_ERR(cmd_out))
1556                 return PTR_ERR(cmd_out);
1557
1558         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1559         err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1560                             uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
1561                             cmd_out, cmd_out_len);
1562         if (err)
1563                 return err;
1564
1565         return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1566                               cmd_out, cmd_out_len);
1567 }
1568
1569 struct devx_async_event_queue {
1570         spinlock_t              lock;
1571         wait_queue_head_t       poll_wait;
1572         struct list_head        event_list;
1573         atomic_t                bytes_in_use;
1574         u8                      is_destroyed:1;
1575 };
1576
1577 struct devx_async_cmd_event_file {
1578         struct ib_uobject               uobj;
1579         struct devx_async_event_queue   ev_queue;
1580         struct mlx5_async_ctx           async_ctx;
1581 };
1582
1583 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
1584 {
1585         spin_lock_init(&ev_queue->lock);
1586         INIT_LIST_HEAD(&ev_queue->event_list);
1587         init_waitqueue_head(&ev_queue->poll_wait);
1588         atomic_set(&ev_queue->bytes_in_use, 0);
1589         ev_queue->is_destroyed = 0;
1590 }
1591
1592 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
1593         struct uverbs_attr_bundle *attrs)
1594 {
1595         struct devx_async_cmd_event_file *ev_file;
1596
1597         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1598                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
1599         struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
1600
1601         ev_file = container_of(uobj, struct devx_async_cmd_event_file,
1602                                uobj);
1603         devx_init_event_queue(&ev_file->ev_queue);
1604         mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
1605         return 0;
1606 }
1607
1608 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
1609         struct uverbs_attr_bundle *attrs)
1610 {
1611         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1612                 attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
1613         struct devx_async_event_file *ev_file;
1614         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1615                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1616         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1617         u32 flags;
1618         int err;
1619
1620         err = uverbs_get_flags32(&flags, attrs,
1621                 MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
1622                 MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
1623
1624         if (err)
1625                 return err;
1626
1627         ev_file = container_of(uobj, struct devx_async_event_file,
1628                                uobj);
1629         spin_lock_init(&ev_file->lock);
1630         INIT_LIST_HEAD(&ev_file->event_list);
1631         init_waitqueue_head(&ev_file->poll_wait);
1632         if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
1633                 ev_file->omit_data = 1;
1634         INIT_LIST_HEAD(&ev_file->subscribed_events_list);
1635         ev_file->dev = dev;
1636         get_device(&dev->ib_dev.dev);
1637         return 0;
1638 }
1639
1640 static void devx_query_callback(int status, struct mlx5_async_work *context)
1641 {
1642         struct devx_async_data *async_data =
1643                 container_of(context, struct devx_async_data, cb_work);
1644         struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
1645         struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
1646         unsigned long flags;
1647
1648         /*
1649          * Note that if the struct devx_async_cmd_event_file uobj begins to be
1650          * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
1651          * routine returns, ensuring that it always remains valid here.
1652          */
1653         spin_lock_irqsave(&ev_queue->lock, flags);
1654         list_add_tail(&async_data->list, &ev_queue->event_list);
1655         spin_unlock_irqrestore(&ev_queue->lock, flags);
1656
1657         wake_up_interruptible(&ev_queue->poll_wait);
1658 }
1659
1660 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
1661
1662 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
1663         struct uverbs_attr_bundle *attrs)
1664 {
1665         void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
1666                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
1667         struct ib_uobject *uobj = uverbs_attr_get_uobject(
1668                                 attrs,
1669                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
1670         u16 cmd_out_len;
1671         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1672                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1673         struct ib_uobject *fd_uobj;
1674         int err;
1675         int uid;
1676         struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1677         struct devx_async_cmd_event_file *ev_file;
1678         struct devx_async_data *async_data;
1679
1680         if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1681                 return -EINVAL;
1682
1683         uid = devx_get_uid(c, cmd_in);
1684         if (uid < 0)
1685                 return uid;
1686
1687         if (!devx_is_obj_query_cmd(cmd_in))
1688                 return -EINVAL;
1689
1690         err = uverbs_get_const(&cmd_out_len, attrs,
1691                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
1692         if (err)
1693                 return err;
1694
1695         if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1696                 return -EINVAL;
1697
1698         fd_uobj = uverbs_attr_get_uobject(attrs,
1699                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
1700         if (IS_ERR(fd_uobj))
1701                 return PTR_ERR(fd_uobj);
1702
1703         ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1704                                uobj);
1705
1706         if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
1707                         MAX_ASYNC_BYTES_IN_USE) {
1708                 atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1709                 return -EAGAIN;
1710         }
1711
1712         async_data = kvzalloc(struct_size(async_data, hdr.out_data,
1713                                           cmd_out_len), GFP_KERNEL);
1714         if (!async_data) {
1715                 err = -ENOMEM;
1716                 goto sub_bytes;
1717         }
1718
1719         err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
1720                                MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
1721         if (err)
1722                 goto free_async;
1723
1724         async_data->cmd_out_len = cmd_out_len;
1725         async_data->mdev = mdev;
1726         async_data->ev_file = ev_file;
1727
1728         MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1729         err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
1730                     uverbs_attr_get_len(attrs,
1731                                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
1732                     async_data->hdr.out_data,
1733                     async_data->cmd_out_len,
1734                     devx_query_callback, &async_data->cb_work);
1735
1736         if (err)
1737                 goto free_async;
1738
1739         return 0;
1740
1741 free_async:
1742         kvfree(async_data);
1743 sub_bytes:
1744         atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1745         return err;
1746 }
1747
1748 static void
1749 subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
1750                            u32 key_level1,
1751                            bool is_level2,
1752                            u32 key_level2)
1753 {
1754         struct devx_event *event;
1755         struct devx_obj_event *xa_val_level2;
1756
1757         /* Level 1 is valid for future use, no need to free */
1758         if (!is_level2)
1759                 return;
1760
1761         event = xa_load(&devx_event_table->event_xa, key_level1);
1762         WARN_ON(!event);
1763
1764         xa_val_level2 = xa_load(&event->object_ids,
1765                                 key_level2);
1766         if (list_empty(&xa_val_level2->obj_sub_list)) {
1767                 xa_erase(&event->object_ids,
1768                          key_level2);
1769                 kfree_rcu(xa_val_level2, rcu);
1770         }
1771 }
1772
1773 static int
1774 subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
1775                          u32 key_level1,
1776                          bool is_level2,
1777                          u32 key_level2)
1778 {
1779         struct devx_obj_event *obj_event;
1780         struct devx_event *event;
1781         int err;
1782
1783         event = xa_load(&devx_event_table->event_xa, key_level1);
1784         if (!event) {
1785                 event = kzalloc(sizeof(*event), GFP_KERNEL);
1786                 if (!event)
1787                         return -ENOMEM;
1788
1789                 INIT_LIST_HEAD(&event->unaffiliated_list);
1790                 xa_init(&event->object_ids);
1791
1792                 err = xa_insert(&devx_event_table->event_xa,
1793                                 key_level1,
1794                                 event,
1795                                 GFP_KERNEL);
1796                 if (err) {
1797                         kfree(event);
1798                         return err;
1799                 }
1800         }
1801
1802         if (!is_level2)
1803                 return 0;
1804
1805         obj_event = xa_load(&event->object_ids, key_level2);
1806         if (!obj_event) {
1807                 obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
1808                 if (!obj_event)
1809                         /* Level1 is valid for future use, no need to free */
1810                         return -ENOMEM;
1811
1812                 err = xa_insert(&event->object_ids,
1813                                 key_level2,
1814                                 obj_event,
1815                                 GFP_KERNEL);
1816                 if (err) {
1817                         kfree(obj_event);
1818                         return err;
1819                 }
1820                 INIT_LIST_HEAD(&obj_event->obj_sub_list);
1821         }
1822
1823         return 0;
1824 }
1825
1826 static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
1827                                    struct devx_obj *obj)
1828 {
1829         int i;
1830
1831         for (i = 0; i < num_events; i++) {
1832                 if (obj) {
1833                         if (!is_legacy_obj_event_num(event_type_num_list[i]))
1834                                 return false;
1835                 } else if (!is_legacy_unaffiliated_event_num(
1836                                 event_type_num_list[i])) {
1837                         return false;
1838                 }
1839         }
1840
1841         return true;
1842 }
1843
1844 #define MAX_SUPP_EVENT_NUM 255
1845 static bool is_valid_events(struct mlx5_core_dev *dev,
1846                             int num_events, u16 *event_type_num_list,
1847                             struct devx_obj *obj)
1848 {
1849         __be64 *aff_events;
1850         __be64 *unaff_events;
1851         int mask_entry;
1852         int mask_bit;
1853         int i;
1854
1855         if (MLX5_CAP_GEN(dev, event_cap)) {
1856                 aff_events = MLX5_CAP_DEV_EVENT(dev,
1857                                                 user_affiliated_events);
1858                 unaff_events = MLX5_CAP_DEV_EVENT(dev,
1859                                                   user_unaffiliated_events);
1860         } else {
1861                 return is_valid_events_legacy(num_events, event_type_num_list,
1862                                               obj);
1863         }
1864
1865         for (i = 0; i < num_events; i++) {
1866                 if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
1867                         return false;
1868
1869                 mask_entry = event_type_num_list[i] / 64;
1870                 mask_bit = event_type_num_list[i] % 64;
1871
1872                 if (obj) {
1873                         /* CQ completion */
1874                         if (event_type_num_list[i] == 0)
1875                                 continue;
1876
1877                         if (!(be64_to_cpu(aff_events[mask_entry]) &
1878                                         (1ull << mask_bit)))
1879                                 return false;
1880
1881                         continue;
1882                 }
1883
1884                 if (!(be64_to_cpu(unaff_events[mask_entry]) &
1885                                 (1ull << mask_bit)))
1886                         return false;
1887         }
1888
1889         return true;
1890 }
1891
1892 #define MAX_NUM_EVENTS 16
1893 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
1894         struct uverbs_attr_bundle *attrs)
1895 {
1896         struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
1897                                 attrs,
1898                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
1899         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1900                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1901         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1902         struct ib_uobject *fd_uobj;
1903         struct devx_obj *obj = NULL;
1904         struct devx_async_event_file *ev_file;
1905         struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
1906         u16 *event_type_num_list;
1907         struct devx_event_subscription *event_sub, *tmp_sub;
1908         struct list_head sub_list;
1909         int redirect_fd;
1910         bool use_eventfd = false;
1911         int num_events;
1912         int num_alloc_xa_entries = 0;
1913         u16 obj_type = 0;
1914         u64 cookie = 0;
1915         u32 obj_id = 0;
1916         int err;
1917         int i;
1918
1919         if (!c->devx_uid)
1920                 return -EINVAL;
1921
1922         if (!IS_ERR(devx_uobj)) {
1923                 obj = (struct devx_obj *)devx_uobj->object;
1924                 if (obj)
1925                         obj_id = get_dec_obj_id(obj->obj_id);
1926         }
1927
1928         fd_uobj = uverbs_attr_get_uobject(attrs,
1929                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
1930         if (IS_ERR(fd_uobj))
1931                 return PTR_ERR(fd_uobj);
1932
1933         ev_file = container_of(fd_uobj, struct devx_async_event_file,
1934                                uobj);
1935
1936         if (uverbs_attr_is_valid(attrs,
1937                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
1938                 err = uverbs_copy_from(&redirect_fd, attrs,
1939                                MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
1940                 if (err)
1941                         return err;
1942
1943                 use_eventfd = true;
1944         }
1945
1946         if (uverbs_attr_is_valid(attrs,
1947                                  MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
1948                 if (use_eventfd)
1949                         return -EINVAL;
1950
1951                 err = uverbs_copy_from(&cookie, attrs,
1952                                 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
1953                 if (err)
1954                         return err;
1955         }
1956
1957         num_events = uverbs_attr_ptr_get_array_size(
1958                 attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
1959                 sizeof(u16));
1960
1961         if (num_events < 0)
1962                 return num_events;
1963
1964         if (num_events > MAX_NUM_EVENTS)
1965                 return -EINVAL;
1966
1967         event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
1968                         MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
1969
1970         if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
1971                 return -EINVAL;
1972
1973         INIT_LIST_HEAD(&sub_list);
1974
1975         /* Protect from concurrent subscriptions to same XA entries to allow
1976          * both to succeed
1977          */
1978         mutex_lock(&devx_event_table->event_xa_lock);
1979         for (i = 0; i < num_events; i++) {
1980                 u32 key_level1;
1981
1982                 if (obj)
1983                         obj_type = get_dec_obj_type(obj,
1984                                                     event_type_num_list[i]);
1985                 key_level1 = event_type_num_list[i] | obj_type << 16;
1986
1987                 err = subscribe_event_xa_alloc(devx_event_table,
1988                                                key_level1,
1989                                                obj,
1990                                                obj_id);
1991                 if (err)
1992                         goto err;
1993
1994                 num_alloc_xa_entries++;
1995                 event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
1996                 if (!event_sub) {
1997                         err = -ENOMEM;
1998                         goto err;
1999                 }
2000
2001                 list_add_tail(&event_sub->event_list, &sub_list);
2002                 uverbs_uobject_get(&ev_file->uobj);
2003                 if (use_eventfd) {
2004                         event_sub->eventfd =
2005                                 eventfd_ctx_fdget(redirect_fd);
2006
2007                         if (IS_ERR(event_sub->eventfd)) {
2008                                 err = PTR_ERR(event_sub->eventfd);
2009                                 event_sub->eventfd = NULL;
2010                                 goto err;
2011                         }
2012                 }
2013
2014                 event_sub->cookie = cookie;
2015                 event_sub->ev_file = ev_file;
2016                 /* May be needed upon cleanup the devx object/subscription */
2017                 event_sub->xa_key_level1 = key_level1;
2018                 event_sub->xa_key_level2 = obj_id;
2019                 INIT_LIST_HEAD(&event_sub->obj_list);
2020         }
2021
2022         /* Once all the allocations and the XA data insertions were done we
2023          * can go ahead and add all the subscriptions to the relevant lists
2024          * without concern of a failure.
2025          */
2026         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2027                 struct devx_event *event;
2028                 struct devx_obj_event *obj_event;
2029
2030                 list_del_init(&event_sub->event_list);
2031
2032                 spin_lock_irq(&ev_file->lock);
2033                 list_add_tail_rcu(&event_sub->file_list,
2034                                   &ev_file->subscribed_events_list);
2035                 spin_unlock_irq(&ev_file->lock);
2036
2037                 event = xa_load(&devx_event_table->event_xa,
2038                                 event_sub->xa_key_level1);
2039                 WARN_ON(!event);
2040
2041                 if (!obj) {
2042                         list_add_tail_rcu(&event_sub->xa_list,
2043                                           &event->unaffiliated_list);
2044                         continue;
2045                 }
2046
2047                 obj_event = xa_load(&event->object_ids, obj_id);
2048                 WARN_ON(!obj_event);
2049                 list_add_tail_rcu(&event_sub->xa_list,
2050                                   &obj_event->obj_sub_list);
2051                 list_add_tail_rcu(&event_sub->obj_list,
2052                                   &obj->event_sub);
2053         }
2054
2055         mutex_unlock(&devx_event_table->event_xa_lock);
2056         return 0;
2057
2058 err:
2059         list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2060                 list_del(&event_sub->event_list);
2061
2062                 subscribe_event_xa_dealloc(devx_event_table,
2063                                            event_sub->xa_key_level1,
2064                                            obj,
2065                                            obj_id);
2066
2067                 if (event_sub->eventfd)
2068                         eventfd_ctx_put(event_sub->eventfd);
2069                 uverbs_uobject_put(&event_sub->ev_file->uobj);
2070                 kfree(event_sub);
2071         }
2072
2073         mutex_unlock(&devx_event_table->event_xa_lock);
2074         return err;
2075 }
2076
2077 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
2078                          struct uverbs_attr_bundle *attrs,
2079                          struct devx_umem *obj)
2080 {
2081         u64 addr;
2082         size_t size;
2083         u32 access;
2084         int npages;
2085         int err;
2086         u32 page_mask;
2087
2088         if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
2089             uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
2090                 return -EFAULT;
2091
2092         err = uverbs_get_flags32(&access, attrs,
2093                                  MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2094                                  IB_ACCESS_LOCAL_WRITE |
2095                                  IB_ACCESS_REMOTE_WRITE |
2096                                  IB_ACCESS_REMOTE_READ);
2097         if (err)
2098                 return err;
2099
2100         err = ib_check_mr_access(access);
2101         if (err)
2102                 return err;
2103
2104         obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access);
2105         if (IS_ERR(obj->umem))
2106                 return PTR_ERR(obj->umem);
2107
2108         mlx5_ib_cont_pages(obj->umem, obj->umem->address,
2109                            MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
2110                            &obj->page_shift, &obj->ncont, NULL);
2111
2112         if (!npages) {
2113                 ib_umem_release(obj->umem);
2114                 return -EINVAL;
2115         }
2116
2117         page_mask = (1 << obj->page_shift) - 1;
2118         obj->page_offset = obj->umem->address & page_mask;
2119
2120         return 0;
2121 }
2122
2123 static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
2124                                    struct devx_umem *obj,
2125                                    struct devx_umem_reg_cmd *cmd)
2126 {
2127         cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
2128                     (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
2129         cmd->in = uverbs_zalloc(attrs, cmd->inlen);
2130         return PTR_ERR_OR_ZERO(cmd->in);
2131 }
2132
2133 static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
2134                                     struct devx_umem *obj,
2135                                     struct devx_umem_reg_cmd *cmd)
2136 {
2137         void *umem;
2138         __be64 *mtt;
2139
2140         umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
2141         mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
2142
2143         MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
2144         MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
2145         MLX5_SET(umem, umem, log_page_size, obj->page_shift -
2146                                             MLX5_ADAPTER_PAGE_SHIFT);
2147         MLX5_SET(umem, umem, page_offset, obj->page_offset);
2148         mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
2149                              (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
2150                              MLX5_IB_MTT_READ);
2151 }
2152
2153 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
2154         struct uverbs_attr_bundle *attrs)
2155 {
2156         struct devx_umem_reg_cmd cmd;
2157         struct devx_umem *obj;
2158         struct ib_uobject *uobj = uverbs_attr_get_uobject(
2159                 attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2160         u32 obj_id;
2161         struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
2162                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2163         struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
2164         int err;
2165
2166         if (!c->devx_uid)
2167                 return -EINVAL;
2168
2169         obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
2170         if (!obj)
2171                 return -ENOMEM;
2172
2173         err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
2174         if (err)
2175                 goto err_obj_free;
2176
2177         err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
2178         if (err)
2179                 goto err_umem_release;
2180
2181         devx_umem_reg_cmd_build(dev, obj, &cmd);
2182
2183         MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
2184         err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
2185                             sizeof(cmd.out));
2186         if (err)
2187                 goto err_umem_release;
2188
2189         obj->mdev = dev->mdev;
2190         uobj->object = obj;
2191         devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
2192         uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2193
2194         err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id,
2195                              sizeof(obj_id));
2196         return err;
2197
2198 err_umem_release:
2199         ib_umem_release(obj->umem);
2200 err_obj_free:
2201         kfree(obj);
2202         return err;
2203 }
2204
2205 static int devx_umem_cleanup(struct ib_uobject *uobject,
2206                              enum rdma_remove_reason why,
2207                              struct uverbs_attr_bundle *attrs)
2208 {
2209         struct devx_umem *obj = uobject->object;
2210         u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
2211         int err;
2212
2213         err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
2214         if (ib_is_destroy_retryable(err, why, uobject))
2215                 return err;
2216
2217         ib_umem_release(obj->umem);
2218         kfree(obj);
2219         return 0;
2220 }
2221
2222 static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
2223                                   unsigned long event_type)
2224 {
2225         __be64 *unaff_events;
2226         int mask_entry;
2227         int mask_bit;
2228
2229         if (!MLX5_CAP_GEN(dev, event_cap))
2230                 return is_legacy_unaffiliated_event_num(event_type);
2231
2232         unaff_events = MLX5_CAP_DEV_EVENT(dev,
2233                                           user_unaffiliated_events);
2234         WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
2235
2236         mask_entry = event_type / 64;
2237         mask_bit = event_type % 64;
2238
2239         if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
2240                 return false;
2241
2242         return true;
2243 }
2244
2245 static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
2246 {
2247         struct mlx5_eqe *eqe = data;
2248         u32 obj_id = 0;
2249
2250         switch (event_type) {
2251         case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
2252         case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
2253         case MLX5_EVENT_TYPE_PATH_MIG:
2254         case MLX5_EVENT_TYPE_COMM_EST:
2255         case MLX5_EVENT_TYPE_SQ_DRAINED:
2256         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
2257         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
2258         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
2259         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
2260         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
2261                 obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
2262                 break;
2263         case MLX5_EVENT_TYPE_XRQ_ERROR:
2264                 obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
2265                 break;
2266         case MLX5_EVENT_TYPE_DCT_DRAINED:
2267         case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
2268                 obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
2269                 break;
2270         case MLX5_EVENT_TYPE_CQ_ERROR:
2271                 obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
2272                 break;
2273         default:
2274                 obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
2275                 break;
2276         }
2277
2278         return obj_id;
2279 }
2280
2281 static int deliver_event(struct devx_event_subscription *event_sub,
2282                          const void *data)
2283 {
2284         struct devx_async_event_file *ev_file;
2285         struct devx_async_event_data *event_data;
2286         unsigned long flags;
2287
2288         ev_file = event_sub->ev_file;
2289
2290         if (ev_file->omit_data) {
2291                 spin_lock_irqsave(&ev_file->lock, flags);
2292                 if (!list_empty(&event_sub->event_list) ||
2293                     ev_file->is_destroyed) {
2294                         spin_unlock_irqrestore(&ev_file->lock, flags);
2295                         return 0;
2296                 }
2297
2298                 list_add_tail(&event_sub->event_list, &ev_file->event_list);
2299                 spin_unlock_irqrestore(&ev_file->lock, flags);
2300                 wake_up_interruptible(&ev_file->poll_wait);
2301                 return 0;
2302         }
2303
2304         event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
2305                              GFP_ATOMIC);
2306         if (!event_data) {
2307                 spin_lock_irqsave(&ev_file->lock, flags);
2308                 ev_file->is_overflow_err = 1;
2309                 spin_unlock_irqrestore(&ev_file->lock, flags);
2310                 return -ENOMEM;
2311         }
2312
2313         event_data->hdr.cookie = event_sub->cookie;
2314         memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
2315
2316         spin_lock_irqsave(&ev_file->lock, flags);
2317         if (!ev_file->is_destroyed)
2318                 list_add_tail(&event_data->list, &ev_file->event_list);
2319         else
2320                 kfree(event_data);
2321         spin_unlock_irqrestore(&ev_file->lock, flags);
2322         wake_up_interruptible(&ev_file->poll_wait);
2323
2324         return 0;
2325 }
2326
2327 static void dispatch_event_fd(struct list_head *fd_list,
2328                               const void *data)
2329 {
2330         struct devx_event_subscription *item;
2331
2332         list_for_each_entry_rcu(item, fd_list, xa_list) {
2333                 if (item->eventfd)
2334                         eventfd_signal(item->eventfd, 1);
2335                 else
2336                         deliver_event(item, data);
2337         }
2338 }
2339
2340 static int devx_event_notifier(struct notifier_block *nb,
2341                                unsigned long event_type, void *data)
2342 {
2343         struct mlx5_devx_event_table *table;
2344         struct mlx5_ib_dev *dev;
2345         struct devx_event *event;
2346         struct devx_obj_event *obj_event;
2347         u16 obj_type = 0;
2348         bool is_unaffiliated;
2349         u32 obj_id;
2350
2351         /* Explicit filtering to kernel events which may occur frequently */
2352         if (event_type == MLX5_EVENT_TYPE_CMD ||
2353             event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
2354                 return NOTIFY_OK;
2355
2356         table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
2357         dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
2358         is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
2359
2360         if (!is_unaffiliated)
2361                 obj_type = get_event_obj_type(event_type, data);
2362
2363         rcu_read_lock();
2364         event = xa_load(&table->event_xa, event_type | (obj_type << 16));
2365         if (!event) {
2366                 rcu_read_unlock();
2367                 return NOTIFY_DONE;
2368         }
2369
2370         if (is_unaffiliated) {
2371                 dispatch_event_fd(&event->unaffiliated_list, data);
2372                 rcu_read_unlock();
2373                 return NOTIFY_OK;
2374         }
2375
2376         obj_id = devx_get_obj_id_from_event(event_type, data);
2377         obj_event = xa_load(&event->object_ids, obj_id);
2378         if (!obj_event) {
2379                 rcu_read_unlock();
2380                 return NOTIFY_DONE;
2381         }
2382
2383         dispatch_event_fd(&obj_event->obj_sub_list, data);
2384
2385         rcu_read_unlock();
2386         return NOTIFY_OK;
2387 }
2388
2389 int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
2390 {
2391         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2392         int uid;
2393
2394         uid = mlx5_ib_devx_create(dev, false);
2395         if (uid > 0) {
2396                 dev->devx_whitelist_uid = uid;
2397                 xa_init(&table->event_xa);
2398                 mutex_init(&table->event_xa_lock);
2399                 MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
2400                 mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
2401         }
2402
2403         return 0;
2404 }
2405
2406 void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
2407 {
2408         struct mlx5_devx_event_table *table = &dev->devx_event_table;
2409         struct devx_event_subscription *sub, *tmp;
2410         struct devx_event *event;
2411         void *entry;
2412         unsigned long id;
2413
2414         if (dev->devx_whitelist_uid) {
2415                 mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
2416                 mutex_lock(&dev->devx_event_table.event_xa_lock);
2417                 xa_for_each(&table->event_xa, id, entry) {
2418                         event = entry;
2419                         list_for_each_entry_safe(
2420                                 sub, tmp, &event->unaffiliated_list, xa_list)
2421                                 devx_cleanup_subscription(dev, sub);
2422                         kfree(entry);
2423                 }
2424                 mutex_unlock(&dev->devx_event_table.event_xa_lock);
2425                 xa_destroy(&table->event_xa);
2426
2427                 mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
2428         }
2429 }
2430
2431 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
2432                                          size_t count, loff_t *pos)
2433 {
2434         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2435         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2436         struct devx_async_data *event;
2437         int ret = 0;
2438         size_t eventsz;
2439
2440         spin_lock_irq(&ev_queue->lock);
2441
2442         while (list_empty(&ev_queue->event_list)) {
2443                 spin_unlock_irq(&ev_queue->lock);
2444
2445                 if (filp->f_flags & O_NONBLOCK)
2446                         return -EAGAIN;
2447
2448                 if (wait_event_interruptible(
2449                             ev_queue->poll_wait,
2450                             (!list_empty(&ev_queue->event_list) ||
2451                              ev_queue->is_destroyed))) {
2452                         return -ERESTARTSYS;
2453                 }
2454
2455                 spin_lock_irq(&ev_queue->lock);
2456                 if (ev_queue->is_destroyed) {
2457                         spin_unlock_irq(&ev_queue->lock);
2458                         return -EIO;
2459                 }
2460         }
2461
2462         event = list_entry(ev_queue->event_list.next,
2463                            struct devx_async_data, list);
2464         eventsz = event->cmd_out_len +
2465                         sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
2466
2467         if (eventsz > count) {
2468                 spin_unlock_irq(&ev_queue->lock);
2469                 return -ENOSPC;
2470         }
2471
2472         list_del(ev_queue->event_list.next);
2473         spin_unlock_irq(&ev_queue->lock);
2474
2475         if (copy_to_user(buf, &event->hdr, eventsz))
2476                 ret = -EFAULT;
2477         else
2478                 ret = eventsz;
2479
2480         atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
2481         kvfree(event);
2482         return ret;
2483 }
2484
2485 static __poll_t devx_async_cmd_event_poll(struct file *filp,
2486                                               struct poll_table_struct *wait)
2487 {
2488         struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2489         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2490         __poll_t pollflags = 0;
2491
2492         poll_wait(filp, &ev_queue->poll_wait, wait);
2493
2494         spin_lock_irq(&ev_queue->lock);
2495         if (ev_queue->is_destroyed)
2496                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2497         else if (!list_empty(&ev_queue->event_list))
2498                 pollflags = EPOLLIN | EPOLLRDNORM;
2499         spin_unlock_irq(&ev_queue->lock);
2500
2501         return pollflags;
2502 }
2503
2504 static const struct file_operations devx_async_cmd_event_fops = {
2505         .owner   = THIS_MODULE,
2506         .read    = devx_async_cmd_event_read,
2507         .poll    = devx_async_cmd_event_poll,
2508         .release = uverbs_uobject_fd_release,
2509         .llseek  = no_llseek,
2510 };
2511
2512 static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
2513                                      size_t count, loff_t *pos)
2514 {
2515         struct devx_async_event_file *ev_file = filp->private_data;
2516         struct devx_event_subscription *event_sub;
2517         struct devx_async_event_data *event;
2518         int ret = 0;
2519         size_t eventsz;
2520         bool omit_data;
2521         void *event_data;
2522
2523         omit_data = ev_file->omit_data;
2524
2525         spin_lock_irq(&ev_file->lock);
2526
2527         if (ev_file->is_overflow_err) {
2528                 ev_file->is_overflow_err = 0;
2529                 spin_unlock_irq(&ev_file->lock);
2530                 return -EOVERFLOW;
2531         }
2532
2533
2534         while (list_empty(&ev_file->event_list)) {
2535                 spin_unlock_irq(&ev_file->lock);
2536
2537                 if (filp->f_flags & O_NONBLOCK)
2538                         return -EAGAIN;
2539
2540                 if (wait_event_interruptible(ev_file->poll_wait,
2541                             (!list_empty(&ev_file->event_list) ||
2542                              ev_file->is_destroyed))) {
2543                         return -ERESTARTSYS;
2544                 }
2545
2546                 spin_lock_irq(&ev_file->lock);
2547                 if (ev_file->is_destroyed) {
2548                         spin_unlock_irq(&ev_file->lock);
2549                         return -EIO;
2550                 }
2551         }
2552
2553         if (omit_data) {
2554                 event_sub = list_first_entry(&ev_file->event_list,
2555                                         struct devx_event_subscription,
2556                                         event_list);
2557                 eventsz = sizeof(event_sub->cookie);
2558                 event_data = &event_sub->cookie;
2559         } else {
2560                 event = list_first_entry(&ev_file->event_list,
2561                                       struct devx_async_event_data, list);
2562                 eventsz = sizeof(struct mlx5_eqe) +
2563                         sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
2564                 event_data = &event->hdr;
2565         }
2566
2567         if (eventsz > count) {
2568                 spin_unlock_irq(&ev_file->lock);
2569                 return -EINVAL;
2570         }
2571
2572         if (omit_data)
2573                 list_del_init(&event_sub->event_list);
2574         else
2575                 list_del(&event->list);
2576
2577         spin_unlock_irq(&ev_file->lock);
2578
2579         if (copy_to_user(buf, event_data, eventsz))
2580                 /* This points to an application issue, not a kernel concern */
2581                 ret = -EFAULT;
2582         else
2583                 ret = eventsz;
2584
2585         if (!omit_data)
2586                 kfree(event);
2587         return ret;
2588 }
2589
2590 static __poll_t devx_async_event_poll(struct file *filp,
2591                                       struct poll_table_struct *wait)
2592 {
2593         struct devx_async_event_file *ev_file = filp->private_data;
2594         __poll_t pollflags = 0;
2595
2596         poll_wait(filp, &ev_file->poll_wait, wait);
2597
2598         spin_lock_irq(&ev_file->lock);
2599         if (ev_file->is_destroyed)
2600                 pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
2601         else if (!list_empty(&ev_file->event_list))
2602                 pollflags = EPOLLIN | EPOLLRDNORM;
2603         spin_unlock_irq(&ev_file->lock);
2604
2605         return pollflags;
2606 }
2607
2608 static void devx_free_subscription(struct rcu_head *rcu)
2609 {
2610         struct devx_event_subscription *event_sub =
2611                 container_of(rcu, struct devx_event_subscription, rcu);
2612
2613         if (event_sub->eventfd)
2614                 eventfd_ctx_put(event_sub->eventfd);
2615         uverbs_uobject_put(&event_sub->ev_file->uobj);
2616         kfree(event_sub);
2617 }
2618
2619 static const struct file_operations devx_async_event_fops = {
2620         .owner   = THIS_MODULE,
2621         .read    = devx_async_event_read,
2622         .poll    = devx_async_event_poll,
2623         .release = uverbs_uobject_fd_release,
2624         .llseek  = no_llseek,
2625 };
2626
2627 static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
2628                                              enum rdma_remove_reason why)
2629 {
2630         struct devx_async_cmd_event_file *comp_ev_file =
2631                 container_of(uobj, struct devx_async_cmd_event_file,
2632                              uobj);
2633         struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2634         struct devx_async_data *entry, *tmp;
2635
2636         spin_lock_irq(&ev_queue->lock);
2637         ev_queue->is_destroyed = 1;
2638         spin_unlock_irq(&ev_queue->lock);
2639         wake_up_interruptible(&ev_queue->poll_wait);
2640
2641         mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
2642
2643         spin_lock_irq(&comp_ev_file->ev_queue.lock);
2644         list_for_each_entry_safe(entry, tmp,
2645                                  &comp_ev_file->ev_queue.event_list, list) {
2646                 list_del(&entry->list);
2647                 kvfree(entry);
2648         }
2649         spin_unlock_irq(&comp_ev_file->ev_queue.lock);
2650         return 0;
2651 };
2652
2653 static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
2654                                          enum rdma_remove_reason why)
2655 {
2656         struct devx_async_event_file *ev_file =
2657                 container_of(uobj, struct devx_async_event_file,
2658                              uobj);
2659         struct devx_event_subscription *event_sub, *event_sub_tmp;
2660         struct mlx5_ib_dev *dev = ev_file->dev;
2661
2662         spin_lock_irq(&ev_file->lock);
2663         ev_file->is_destroyed = 1;
2664
2665         /* free the pending events allocation */
2666         if (ev_file->omit_data) {
2667                 struct devx_event_subscription *event_sub, *tmp;
2668
2669                 list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
2670                                          event_list)
2671                         list_del_init(&event_sub->event_list);
2672
2673         } else {
2674                 struct devx_async_event_data *entry, *tmp;
2675
2676                 list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
2677                                          list) {
2678                         list_del(&entry->list);
2679                         kfree(entry);
2680                 }
2681         }
2682
2683         spin_unlock_irq(&ev_file->lock);
2684         wake_up_interruptible(&ev_file->poll_wait);
2685
2686         mutex_lock(&dev->devx_event_table.event_xa_lock);
2687         /* delete the subscriptions which are related to this FD */
2688         list_for_each_entry_safe(event_sub, event_sub_tmp,
2689                                  &ev_file->subscribed_events_list, file_list) {
2690                 devx_cleanup_subscription(dev, event_sub);
2691                 list_del_rcu(&event_sub->file_list);
2692                 /* subscription may not be used by the read API any more */
2693                 call_rcu(&event_sub->rcu, devx_free_subscription);
2694         }
2695         mutex_unlock(&dev->devx_event_table.event_xa_lock);
2696
2697         put_device(&dev->ib_dev.dev);
2698         return 0;
2699 };
2700
2701 DECLARE_UVERBS_NAMED_METHOD(
2702         MLX5_IB_METHOD_DEVX_UMEM_REG,
2703         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
2704                         MLX5_IB_OBJECT_DEVX_UMEM,
2705                         UVERBS_ACCESS_NEW,
2706                         UA_MANDATORY),
2707         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
2708                            UVERBS_ATTR_TYPE(u64),
2709                            UA_MANDATORY),
2710         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
2711                            UVERBS_ATTR_TYPE(u64),
2712                            UA_MANDATORY),
2713         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2714                              enum ib_access_flags),
2715         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
2716                             UVERBS_ATTR_TYPE(u32),
2717                             UA_MANDATORY));
2718
2719 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2720         MLX5_IB_METHOD_DEVX_UMEM_DEREG,
2721         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
2722                         MLX5_IB_OBJECT_DEVX_UMEM,
2723                         UVERBS_ACCESS_DESTROY,
2724                         UA_MANDATORY));
2725
2726 DECLARE_UVERBS_NAMED_METHOD(
2727         MLX5_IB_METHOD_DEVX_QUERY_EQN,
2728         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
2729                            UVERBS_ATTR_TYPE(u32),
2730                            UA_MANDATORY),
2731         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
2732                             UVERBS_ATTR_TYPE(u32),
2733                             UA_MANDATORY));
2734
2735 DECLARE_UVERBS_NAMED_METHOD(
2736         MLX5_IB_METHOD_DEVX_QUERY_UAR,
2737         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
2738                            UVERBS_ATTR_TYPE(u32),
2739                            UA_MANDATORY),
2740         UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
2741                             UVERBS_ATTR_TYPE(u32),
2742                             UA_MANDATORY));
2743
2744 DECLARE_UVERBS_NAMED_METHOD(
2745         MLX5_IB_METHOD_DEVX_OTHER,
2746         UVERBS_ATTR_PTR_IN(
2747                 MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
2748                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2749                 UA_MANDATORY,
2750                 UA_ALLOC_AND_COPY),
2751         UVERBS_ATTR_PTR_OUT(
2752                 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
2753                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2754                 UA_MANDATORY));
2755
2756 DECLARE_UVERBS_NAMED_METHOD(
2757         MLX5_IB_METHOD_DEVX_OBJ_CREATE,
2758         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
2759                         MLX5_IB_OBJECT_DEVX_OBJ,
2760                         UVERBS_ACCESS_NEW,
2761                         UA_MANDATORY),
2762         UVERBS_ATTR_PTR_IN(
2763                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
2764                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2765                 UA_MANDATORY,
2766                 UA_ALLOC_AND_COPY),
2767         UVERBS_ATTR_PTR_OUT(
2768                 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
2769                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2770                 UA_MANDATORY));
2771
2772 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2773         MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
2774         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
2775                         MLX5_IB_OBJECT_DEVX_OBJ,
2776                         UVERBS_ACCESS_DESTROY,
2777                         UA_MANDATORY));
2778
2779 DECLARE_UVERBS_NAMED_METHOD(
2780         MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
2781         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
2782                         UVERBS_IDR_ANY_OBJECT,
2783                         UVERBS_ACCESS_READ,
2784                         UA_MANDATORY),
2785         UVERBS_ATTR_PTR_IN(
2786                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
2787                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2788                 UA_MANDATORY,
2789                 UA_ALLOC_AND_COPY),
2790         UVERBS_ATTR_PTR_OUT(
2791                 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
2792                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2793                 UA_MANDATORY));
2794
2795 DECLARE_UVERBS_NAMED_METHOD(
2796         MLX5_IB_METHOD_DEVX_OBJ_QUERY,
2797         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2798                         UVERBS_IDR_ANY_OBJECT,
2799                         UVERBS_ACCESS_READ,
2800                         UA_MANDATORY),
2801         UVERBS_ATTR_PTR_IN(
2802                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2803                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2804                 UA_MANDATORY,
2805                 UA_ALLOC_AND_COPY),
2806         UVERBS_ATTR_PTR_OUT(
2807                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
2808                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2809                 UA_MANDATORY));
2810
2811 DECLARE_UVERBS_NAMED_METHOD(
2812         MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
2813         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2814                         UVERBS_IDR_ANY_OBJECT,
2815                         UVERBS_ACCESS_READ,
2816                         UA_MANDATORY),
2817         UVERBS_ATTR_PTR_IN(
2818                 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2819                 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2820                 UA_MANDATORY,
2821                 UA_ALLOC_AND_COPY),
2822         UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
2823                 u16, UA_MANDATORY),
2824         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
2825                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2826                 UVERBS_ACCESS_READ,
2827                 UA_MANDATORY),
2828         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
2829                 UVERBS_ATTR_TYPE(u64),
2830                 UA_MANDATORY));
2831
2832 DECLARE_UVERBS_NAMED_METHOD(
2833         MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
2834         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
2835                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2836                 UVERBS_ACCESS_READ,
2837                 UA_MANDATORY),
2838         UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
2839                 MLX5_IB_OBJECT_DEVX_OBJ,
2840                 UVERBS_ACCESS_READ,
2841                 UA_OPTIONAL),
2842         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2843                 UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
2844                 UA_MANDATORY,
2845                 UA_ALLOC_AND_COPY),
2846         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
2847                 UVERBS_ATTR_TYPE(u64),
2848                 UA_OPTIONAL),
2849         UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
2850                 UVERBS_ATTR_TYPE(u32),
2851                 UA_OPTIONAL));
2852
2853 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
2854                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
2855                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
2856                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
2857                               &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
2858
2859 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
2860                             UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
2861                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
2862                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
2863                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
2864                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
2865                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
2866
2867 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
2868                             UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
2869                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
2870                             &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
2871
2872
2873 DECLARE_UVERBS_NAMED_METHOD(
2874         MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
2875         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
2876                         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2877                         UVERBS_ACCESS_NEW,
2878                         UA_MANDATORY));
2879
2880 DECLARE_UVERBS_NAMED_OBJECT(
2881         MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2882         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
2883                              devx_async_cmd_event_destroy_uobj,
2884                              &devx_async_cmd_event_fops, "[devx_async_cmd]",
2885                              O_RDONLY),
2886         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
2887
2888 DECLARE_UVERBS_NAMED_METHOD(
2889         MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
2890         UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
2891                         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2892                         UVERBS_ACCESS_NEW,
2893                         UA_MANDATORY),
2894         UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
2895                         enum mlx5_ib_uapi_devx_create_event_channel_flags,
2896                         UA_MANDATORY));
2897
2898 DECLARE_UVERBS_NAMED_OBJECT(
2899         MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2900         UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
2901                              devx_async_event_destroy_uobj,
2902                              &devx_async_event_fops, "[devx_async_event]",
2903                              O_RDONLY),
2904         &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
2905
2906 static bool devx_is_supported(struct ib_device *device)
2907 {
2908         struct mlx5_ib_dev *dev = to_mdev(device);
2909
2910         return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
2911 }
2912
2913 const struct uapi_definition mlx5_ib_devx_defs[] = {
2914         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2915                 MLX5_IB_OBJECT_DEVX,
2916                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2917         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2918                 MLX5_IB_OBJECT_DEVX_OBJ,
2919                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2920         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2921                 MLX5_IB_OBJECT_DEVX_UMEM,
2922                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2923         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2924                 MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2925                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2926         UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2927                 MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2928                 UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2929         {},
2930 };