GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / infiniband / hw / mlx5 / wr.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
4  */
5
6 #include <linux/gfp.h>
7 #include <linux/mlx5/qp.h>
8 #include <linux/mlx5/driver.h>
9 #include "wr.h"
10
11 static const u32 mlx5_ib_opcode[] = {
12         [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
13         [IB_WR_LSO]                             = MLX5_OPCODE_LSO,
14         [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
15         [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
16         [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
17         [IB_WR_RDMA_READ]                       = MLX5_OPCODE_RDMA_READ,
18         [IB_WR_ATOMIC_CMP_AND_SWP]              = MLX5_OPCODE_ATOMIC_CS,
19         [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
20         [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
21         [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
22         [IB_WR_REG_MR]                          = MLX5_OPCODE_UMR,
23         [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
24         [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
25         [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
26 };
27
28 /* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
29  * next nearby edge and get new address translation for current WQE position.
30  * @sq - SQ buffer.
31  * @seg: Current WQE position (16B aligned).
32  * @wqe_sz: Total current WQE size [16B].
33  * @cur_edge: Updated current edge.
34  */
35 static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
36                                          u32 wqe_sz, void **cur_edge)
37 {
38         u32 idx;
39
40         if (likely(*seg != *cur_edge))
41                 return;
42
43         idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
44         *cur_edge = get_sq_edge(sq, idx);
45
46         *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
47 }
48
49 /* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
50  * pointers. At the end @seg is aligned to 16B regardless the copied size.
51  * @sq - SQ buffer.
52  * @cur_edge: Updated current edge.
53  * @seg: Current WQE position (16B aligned).
54  * @wqe_sz: Total current WQE size [16B].
55  * @src: Pointer to copy from.
56  * @n: Number of bytes to copy.
57  */
58 static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
59                                    void **seg, u32 *wqe_sz, const void *src,
60                                    size_t n)
61 {
62         while (likely(n)) {
63                 size_t leftlen = *cur_edge - *seg;
64                 size_t copysz = min_t(size_t, leftlen, n);
65                 size_t stride;
66
67                 memcpy(*seg, src, copysz);
68
69                 n -= copysz;
70                 src += copysz;
71                 stride = !n ? ALIGN(copysz, 16) : copysz;
72                 *seg += stride;
73                 *wqe_sz += stride >> 4;
74                 handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
75         }
76 }
77
78 static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
79                             struct ib_cq *ib_cq)
80 {
81         struct mlx5_ib_cq *cq;
82         unsigned int cur;
83
84         cur = wq->head - wq->tail;
85         if (likely(cur + nreq < wq->max_post))
86                 return 0;
87
88         cq = to_mcq(ib_cq);
89         spin_lock(&cq->lock);
90         cur = wq->head - wq->tail;
91         spin_unlock(&cq->lock);
92
93         return cur + nreq >= wq->max_post;
94 }
95
96 static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
97                                           u64 remote_addr, u32 rkey)
98 {
99         rseg->raddr    = cpu_to_be64(remote_addr);
100         rseg->rkey     = cpu_to_be32(rkey);
101         rseg->reserved = 0;
102 }
103
104 static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
105                         void **seg, int *size, void **cur_edge)
106 {
107         struct mlx5_wqe_eth_seg *eseg = *seg;
108
109         memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
110
111         if (wr->send_flags & IB_SEND_IP_CSUM)
112                 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
113                                  MLX5_ETH_WQE_L4_CSUM;
114
115         if (wr->opcode == IB_WR_LSO) {
116                 struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
117                 size_t left, copysz;
118                 void *pdata = ud_wr->header;
119                 size_t stride;
120
121                 left = ud_wr->hlen;
122                 eseg->mss = cpu_to_be16(ud_wr->mss);
123                 eseg->inline_hdr.sz = cpu_to_be16(left);
124
125                 /* memcpy_send_wqe should get a 16B align address. Hence, we
126                  * first copy up to the current edge and then, if needed,
127                  * continue to memcpy_send_wqe.
128                  */
129                 copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
130                                left);
131                 memcpy(eseg->inline_hdr.data, pdata, copysz);
132                 stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
133                                sizeof(eseg->inline_hdr.start) + copysz, 16);
134                 *size += stride / 16;
135                 *seg += stride;
136
137                 if (copysz < left) {
138                         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
139                         left -= copysz;
140                         pdata += copysz;
141                         memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
142                                         left);
143                 }
144
145                 return;
146         }
147
148         *seg += sizeof(struct mlx5_wqe_eth_seg);
149         *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
150 }
151
152 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
153                              const struct ib_send_wr *wr)
154 {
155         memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
156         dseg->av.dqp_dct =
157                 cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
158         dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
159 }
160
161 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
162 {
163         dseg->byte_count = cpu_to_be32(sg->length);
164         dseg->lkey       = cpu_to_be32(sg->lkey);
165         dseg->addr       = cpu_to_be64(sg->addr);
166 }
167
168 static u64 get_xlt_octo(u64 bytes)
169 {
170         return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
171                MLX5_IB_UMR_OCTOWORD;
172 }
173
174 static __be64 frwr_mkey_mask(bool atomic)
175 {
176         u64 result;
177
178         result = MLX5_MKEY_MASK_LEN             |
179                 MLX5_MKEY_MASK_PAGE_SIZE        |
180                 MLX5_MKEY_MASK_START_ADDR       |
181                 MLX5_MKEY_MASK_EN_RINVAL        |
182                 MLX5_MKEY_MASK_KEY              |
183                 MLX5_MKEY_MASK_LR               |
184                 MLX5_MKEY_MASK_LW               |
185                 MLX5_MKEY_MASK_RR               |
186                 MLX5_MKEY_MASK_RW               |
187                 MLX5_MKEY_MASK_SMALL_FENCE      |
188                 MLX5_MKEY_MASK_FREE;
189
190         if (atomic)
191                 result |= MLX5_MKEY_MASK_A;
192
193         return cpu_to_be64(result);
194 }
195
196 static __be64 sig_mkey_mask(void)
197 {
198         u64 result;
199
200         result = MLX5_MKEY_MASK_LEN             |
201                 MLX5_MKEY_MASK_PAGE_SIZE        |
202                 MLX5_MKEY_MASK_START_ADDR       |
203                 MLX5_MKEY_MASK_EN_SIGERR        |
204                 MLX5_MKEY_MASK_EN_RINVAL        |
205                 MLX5_MKEY_MASK_KEY              |
206                 MLX5_MKEY_MASK_LR               |
207                 MLX5_MKEY_MASK_LW               |
208                 MLX5_MKEY_MASK_RR               |
209                 MLX5_MKEY_MASK_RW               |
210                 MLX5_MKEY_MASK_SMALL_FENCE      |
211                 MLX5_MKEY_MASK_FREE             |
212                 MLX5_MKEY_MASK_BSF_EN;
213
214         return cpu_to_be64(result);
215 }
216
217 static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
218                             struct mlx5_ib_mr *mr, u8 flags, bool atomic)
219 {
220         int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
221
222         memset(umr, 0, sizeof(*umr));
223
224         umr->flags = flags;
225         umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
226         umr->mkey_mask = frwr_mkey_mask(atomic);
227 }
228
229 static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
230 {
231         memset(umr, 0, sizeof(*umr));
232         umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
233         umr->flags = MLX5_UMR_INLINE;
234 }
235
236 static __be64 get_umr_enable_mr_mask(void)
237 {
238         u64 result;
239
240         result = MLX5_MKEY_MASK_KEY |
241                  MLX5_MKEY_MASK_FREE;
242
243         return cpu_to_be64(result);
244 }
245
246 static __be64 get_umr_disable_mr_mask(void)
247 {
248         u64 result;
249
250         result = MLX5_MKEY_MASK_FREE;
251
252         return cpu_to_be64(result);
253 }
254
255 static __be64 get_umr_update_translation_mask(void)
256 {
257         u64 result;
258
259         result = MLX5_MKEY_MASK_LEN |
260                  MLX5_MKEY_MASK_PAGE_SIZE |
261                  MLX5_MKEY_MASK_START_ADDR;
262
263         return cpu_to_be64(result);
264 }
265
266 static __be64 get_umr_update_access_mask(int atomic,
267                                          int relaxed_ordering_write,
268                                          int relaxed_ordering_read)
269 {
270         u64 result;
271
272         result = MLX5_MKEY_MASK_LR |
273                  MLX5_MKEY_MASK_LW |
274                  MLX5_MKEY_MASK_RR |
275                  MLX5_MKEY_MASK_RW;
276
277         if (atomic)
278                 result |= MLX5_MKEY_MASK_A;
279
280         if (relaxed_ordering_write)
281                 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
282
283         if (relaxed_ordering_read)
284                 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
285
286         return cpu_to_be64(result);
287 }
288
289 static __be64 get_umr_update_pd_mask(void)
290 {
291         u64 result;
292
293         result = MLX5_MKEY_MASK_PD;
294
295         return cpu_to_be64(result);
296 }
297
298 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
299 {
300         if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
301             MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
302                 return -EPERM;
303
304         if (mask & MLX5_MKEY_MASK_A &&
305             MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
306                 return -EPERM;
307
308         if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
309             !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
310                 return -EPERM;
311
312         if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
313             !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
314                 return -EPERM;
315
316         return 0;
317 }
318
319 static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
320                                struct mlx5_wqe_umr_ctrl_seg *umr,
321                                const struct ib_send_wr *wr)
322 {
323         const struct mlx5_umr_wr *umrwr = umr_wr(wr);
324
325         memset(umr, 0, sizeof(*umr));
326
327         if (!umrwr->ignore_free_state) {
328                 if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
329                          /* fail if free */
330                         umr->flags = MLX5_UMR_CHECK_FREE;
331                 else
332                         /* fail if not free */
333                         umr->flags = MLX5_UMR_CHECK_NOT_FREE;
334         }
335
336         umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
337         if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
338                 u64 offset = get_xlt_octo(umrwr->offset);
339
340                 umr->xlt_offset = cpu_to_be16(offset & 0xffff);
341                 umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
342                 umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
343         }
344         if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
345                 umr->mkey_mask |= get_umr_update_translation_mask();
346         if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
347                 umr->mkey_mask |= get_umr_update_access_mask(
348                         !!(MLX5_CAP_GEN(dev->mdev, atomic)),
349                         !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
350                         !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
351                 umr->mkey_mask |= get_umr_update_pd_mask();
352         }
353         if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
354                 umr->mkey_mask |= get_umr_enable_mr_mask();
355         if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
356                 umr->mkey_mask |= get_umr_disable_mr_mask();
357
358         if (!wr->num_sge)
359                 umr->flags |= MLX5_UMR_INLINE;
360
361         return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
362 }
363
364 static u8 get_umr_flags(int acc)
365 {
366         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
367                (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
368                (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
369                (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
370                 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
371 }
372
373 static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
374                              struct mlx5_ib_mr *mr,
375                              u32 key, int access)
376 {
377         int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
378
379         memset(seg, 0, sizeof(*seg));
380
381         if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
382                 seg->log2_page_size = ilog2(mr->ibmr.page_size);
383         else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
384                 /* KLMs take twice the size of MTTs */
385                 ndescs *= 2;
386
387         seg->flags = get_umr_flags(access) | mr->access_mode;
388         seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
389         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
390         seg->start_addr = cpu_to_be64(mr->ibmr.iova);
391         seg->len = cpu_to_be64(mr->ibmr.length);
392         seg->xlt_oct_size = cpu_to_be32(ndescs);
393 }
394
395 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
396 {
397         memset(seg, 0, sizeof(*seg));
398         seg->status = MLX5_MKEY_STATUS_FREE;
399 }
400
401 static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
402                                  struct mlx5_mkey_seg *seg,
403                                  const struct ib_send_wr *wr)
404 {
405         const struct mlx5_umr_wr *umrwr = umr_wr(wr);
406
407         memset(seg, 0, sizeof(*seg));
408         if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
409                 MLX5_SET(mkc, seg, free, 1);
410
411         MLX5_SET(mkc, seg, a,
412                  !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
413         MLX5_SET(mkc, seg, rw,
414                  !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
415         MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
416         MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
417         MLX5_SET(mkc, seg, lr, 1);
418         if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
419                 MLX5_SET(mkc, seg, relaxed_ordering_write,
420                          !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
421         if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
422                 MLX5_SET(mkc, seg, relaxed_ordering_read,
423                          !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
424
425         if (umrwr->pd)
426                 MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
427         if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
428             !umrwr->length)
429                 MLX5_SET(mkc, seg, length64, 1);
430
431         MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
432         MLX5_SET64(mkc, seg, len, umrwr->length);
433         MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
434         MLX5_SET(mkc, seg, qpn, 0xffffff);
435         MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
436 }
437
438 static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
439                              struct mlx5_ib_mr *mr,
440                              struct mlx5_ib_pd *pd)
441 {
442         int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
443
444         dseg->addr = cpu_to_be64(mr->desc_map);
445         dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
446         dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
447 }
448
449 static __be32 send_ieth(const struct ib_send_wr *wr)
450 {
451         switch (wr->opcode) {
452         case IB_WR_SEND_WITH_IMM:
453         case IB_WR_RDMA_WRITE_WITH_IMM:
454                 return wr->ex.imm_data;
455
456         case IB_WR_SEND_WITH_INV:
457                 return cpu_to_be32(wr->ex.invalidate_rkey);
458
459         default:
460                 return 0;
461         }
462 }
463
464 static u8 calc_sig(void *wqe, int size)
465 {
466         u8 *p = wqe;
467         u8 res = 0;
468         int i;
469
470         for (i = 0; i < size; i++)
471                 res ^= p[i];
472
473         return ~res;
474 }
475
476 static u8 wq_sig(void *wqe)
477 {
478         return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
479 }
480
481 static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
482                             void **wqe, int *wqe_sz, void **cur_edge)
483 {
484         struct mlx5_wqe_inline_seg *seg;
485         size_t offset;
486         int inl = 0;
487         int i;
488
489         seg = *wqe;
490         *wqe += sizeof(*seg);
491         offset = sizeof(*seg);
492
493         for (i = 0; i < wr->num_sge; i++) {
494                 size_t len  = wr->sg_list[i].length;
495                 void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
496
497                 inl += len;
498
499                 if (unlikely(inl > qp->max_inline_data))
500                         return -ENOMEM;
501
502                 while (likely(len)) {
503                         size_t leftlen;
504                         size_t copysz;
505
506                         handle_post_send_edge(&qp->sq, wqe,
507                                               *wqe_sz + (offset >> 4),
508                                               cur_edge);
509
510                         leftlen = *cur_edge - *wqe;
511                         copysz = min_t(size_t, leftlen, len);
512
513                         memcpy(*wqe, addr, copysz);
514                         len -= copysz;
515                         addr += copysz;
516                         *wqe += copysz;
517                         offset += copysz;
518                 }
519         }
520
521         seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
522
523         *wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
524
525         return 0;
526 }
527
528 static u16 prot_field_size(enum ib_signature_type type)
529 {
530         switch (type) {
531         case IB_SIG_TYPE_T10_DIF:
532                 return MLX5_DIF_SIZE;
533         default:
534                 return 0;
535         }
536 }
537
538 static u8 bs_selector(int block_size)
539 {
540         switch (block_size) {
541         case 512:           return 0x1;
542         case 520:           return 0x2;
543         case 4096:          return 0x3;
544         case 4160:          return 0x4;
545         case 1073741824:    return 0x5;
546         default:            return 0;
547         }
548 }
549
550 static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
551                               struct mlx5_bsf_inl *inl)
552 {
553         /* Valid inline section and allow BSF refresh */
554         inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
555                                        MLX5_BSF_REFRESH_DIF);
556         inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
557         inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
558         /* repeating block */
559         inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
560         inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
561                         MLX5_DIF_CRC : MLX5_DIF_IPCS;
562
563         if (domain->sig.dif.ref_remap)
564                 inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
565
566         if (domain->sig.dif.app_escape) {
567                 if (domain->sig.dif.ref_escape)
568                         inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
569                 else
570                         inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
571         }
572
573         inl->dif_app_bitmask_check =
574                 cpu_to_be16(domain->sig.dif.apptag_check_mask);
575 }
576
577 static int mlx5_set_bsf(struct ib_mr *sig_mr,
578                         struct ib_sig_attrs *sig_attrs,
579                         struct mlx5_bsf *bsf, u32 data_size)
580 {
581         struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
582         struct mlx5_bsf_basic *basic = &bsf->basic;
583         struct ib_sig_domain *mem = &sig_attrs->mem;
584         struct ib_sig_domain *wire = &sig_attrs->wire;
585
586         memset(bsf, 0, sizeof(*bsf));
587
588         /* Basic + Extended + Inline */
589         basic->bsf_size_sbs = 1 << 7;
590         /* Input domain check byte mask */
591         basic->check_byte_mask = sig_attrs->check_mask;
592         basic->raw_data_size = cpu_to_be32(data_size);
593
594         /* Memory domain */
595         switch (sig_attrs->mem.sig_type) {
596         case IB_SIG_TYPE_NONE:
597                 break;
598         case IB_SIG_TYPE_T10_DIF:
599                 basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
600                 basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
601                 mlx5_fill_inl_bsf(mem, &bsf->m_inl);
602                 break;
603         default:
604                 return -EINVAL;
605         }
606
607         /* Wire domain */
608         switch (sig_attrs->wire.sig_type) {
609         case IB_SIG_TYPE_NONE:
610                 break;
611         case IB_SIG_TYPE_T10_DIF:
612                 if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
613                     mem->sig_type == wire->sig_type) {
614                         /* Same block structure */
615                         basic->bsf_size_sbs |= 1 << 4;
616                         if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
617                                 basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
618                         if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
619                                 basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
620                         if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
621                                 basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
622                 } else
623                         basic->wire.bs_selector =
624                                 bs_selector(wire->sig.dif.pi_interval);
625
626                 basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
627                 mlx5_fill_inl_bsf(wire, &bsf->w_inl);
628                 break;
629         default:
630                 return -EINVAL;
631         }
632
633         return 0;
634 }
635
636
637 static int set_sig_data_segment(const struct ib_send_wr *send_wr,
638                                 struct ib_mr *sig_mr,
639                                 struct ib_sig_attrs *sig_attrs,
640                                 struct mlx5_ib_qp *qp, void **seg, int *size,
641                                 void **cur_edge)
642 {
643         struct mlx5_bsf *bsf;
644         u32 data_len;
645         u32 data_key;
646         u64 data_va;
647         u32 prot_len = 0;
648         u32 prot_key = 0;
649         u64 prot_va = 0;
650         bool prot = false;
651         int ret;
652         int wqe_size;
653         struct mlx5_ib_mr *mr = to_mmr(sig_mr);
654         struct mlx5_ib_mr *pi_mr = mr->pi_mr;
655
656         data_len = pi_mr->data_length;
657         data_key = pi_mr->ibmr.lkey;
658         data_va = pi_mr->data_iova;
659         if (pi_mr->meta_ndescs) {
660                 prot_len = pi_mr->meta_length;
661                 prot_key = pi_mr->ibmr.lkey;
662                 prot_va = pi_mr->pi_iova;
663                 prot = true;
664         }
665
666         if (!prot || (data_key == prot_key && data_va == prot_va &&
667                       data_len == prot_len)) {
668                 /**
669                  * Source domain doesn't contain signature information
670                  * or data and protection are interleaved in memory.
671                  * So need construct:
672                  *                  ------------------
673                  *                 |     data_klm     |
674                  *                  ------------------
675                  *                 |       BSF        |
676                  *                  ------------------
677                  **/
678                 struct mlx5_klm *data_klm = *seg;
679
680                 data_klm->bcount = cpu_to_be32(data_len);
681                 data_klm->key = cpu_to_be32(data_key);
682                 data_klm->va = cpu_to_be64(data_va);
683                 wqe_size = ALIGN(sizeof(*data_klm), 64);
684         } else {
685                 /**
686                  * Source domain contains signature information
687                  * So need construct a strided block format:
688                  *               ---------------------------
689                  *              |     stride_block_ctrl     |
690                  *               ---------------------------
691                  *              |          data_klm         |
692                  *               ---------------------------
693                  *              |          prot_klm         |
694                  *               ---------------------------
695                  *              |             BSF           |
696                  *               ---------------------------
697                  **/
698                 struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
699                 struct mlx5_stride_block_entry *data_sentry;
700                 struct mlx5_stride_block_entry *prot_sentry;
701                 u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
702                 int prot_size;
703
704                 sblock_ctrl = *seg;
705                 data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
706                 prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
707
708                 prot_size = prot_field_size(sig_attrs->mem.sig_type);
709                 if (!prot_size) {
710                         pr_err("Bad block size given: %u\n", block_size);
711                         return -EINVAL;
712                 }
713                 sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
714                                                             prot_size);
715                 sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
716                 sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
717                 sblock_ctrl->num_entries = cpu_to_be16(2);
718
719                 data_sentry->bcount = cpu_to_be16(block_size);
720                 data_sentry->key = cpu_to_be32(data_key);
721                 data_sentry->va = cpu_to_be64(data_va);
722                 data_sentry->stride = cpu_to_be16(block_size);
723
724                 prot_sentry->bcount = cpu_to_be16(prot_size);
725                 prot_sentry->key = cpu_to_be32(prot_key);
726                 prot_sentry->va = cpu_to_be64(prot_va);
727                 prot_sentry->stride = cpu_to_be16(prot_size);
728
729                 wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
730                                  sizeof(*prot_sentry), 64);
731         }
732
733         *seg += wqe_size;
734         *size += wqe_size / 16;
735         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
736
737         bsf = *seg;
738         ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
739         if (ret)
740                 return -EINVAL;
741
742         *seg += sizeof(*bsf);
743         *size += sizeof(*bsf) / 16;
744         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
745
746         return 0;
747 }
748
749 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
750                                  struct ib_mr *sig_mr, int access_flags,
751                                  u32 size, u32 length, u32 pdn)
752 {
753         u32 sig_key = sig_mr->rkey;
754         u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
755
756         memset(seg, 0, sizeof(*seg));
757
758         seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
759         seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
760         seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
761                                     MLX5_MKEY_BSF_EN | pdn);
762         seg->len = cpu_to_be64(length);
763         seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
764         seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
765 }
766
767 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
768                                 u32 size)
769 {
770         memset(umr, 0, sizeof(*umr));
771
772         umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
773         umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
774         umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
775         umr->mkey_mask = sig_mkey_mask();
776 }
777
778 static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
779                          struct mlx5_ib_qp *qp, void **seg, int *size,
780                          void **cur_edge)
781 {
782         const struct ib_reg_wr *wr = reg_wr(send_wr);
783         struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
784         struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
785         struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
786         u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
787         u32 xlt_size;
788         int region_len, ret;
789
790         if (unlikely(send_wr->num_sge != 0) ||
791             unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
792             unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
793             unlikely(!sig_mr->sig->sig_status_checked))
794                 return -EINVAL;
795
796         /* length of the protected region, data + protection */
797         region_len = pi_mr->ibmr.length;
798
799         /**
800          * KLM octoword size - if protection was provided
801          * then we use strided block format (3 octowords),
802          * else we use single KLM (1 octoword)
803          **/
804         if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
805                 xlt_size = 0x30;
806         else
807                 xlt_size = sizeof(struct mlx5_klm);
808
809         set_sig_umr_segment(*seg, xlt_size);
810         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
811         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
812         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
813
814         set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
815                              pdn);
816         *seg += sizeof(struct mlx5_mkey_seg);
817         *size += sizeof(struct mlx5_mkey_seg) / 16;
818         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
819
820         ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
821                                    cur_edge);
822         if (ret)
823                 return ret;
824
825         sig_mr->sig->sig_status_checked = false;
826         return 0;
827 }
828
829 static int set_psv_wr(struct ib_sig_domain *domain,
830                       u32 psv_idx, void **seg, int *size)
831 {
832         struct mlx5_seg_set_psv *psv_seg = *seg;
833
834         memset(psv_seg, 0, sizeof(*psv_seg));
835         psv_seg->psv_num = cpu_to_be32(psv_idx);
836         switch (domain->sig_type) {
837         case IB_SIG_TYPE_NONE:
838                 break;
839         case IB_SIG_TYPE_T10_DIF:
840                 psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
841                                                      domain->sig.dif.app_tag);
842                 psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
843                 break;
844         default:
845                 pr_err("Bad signature type (%d) is given.\n",
846                        domain->sig_type);
847                 return -EINVAL;
848         }
849
850         *seg += sizeof(*psv_seg);
851         *size += sizeof(*psv_seg) / 16;
852
853         return 0;
854 }
855
856 static int set_reg_wr(struct mlx5_ib_qp *qp,
857                       const struct ib_reg_wr *wr,
858                       void **seg, int *size, void **cur_edge,
859                       bool check_not_free)
860 {
861         struct mlx5_ib_mr *mr = to_mmr(wr->mr);
862         struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
863         struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
864         int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
865         bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
866         bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
867         u8 flags = 0;
868
869         /* Matches access in mlx5_set_umr_free_mkey() */
870         if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
871                 mlx5_ib_warn(
872                         to_mdev(qp->ibqp.device),
873                         "Fast update for MR access flags is not possible\n");
874                 return -EINVAL;
875         }
876
877         if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
878                 mlx5_ib_warn(to_mdev(qp->ibqp.device),
879                              "Invalid IB_SEND_INLINE send flag\n");
880                 return -EINVAL;
881         }
882
883         if (check_not_free)
884                 flags |= MLX5_UMR_CHECK_NOT_FREE;
885         if (umr_inline)
886                 flags |= MLX5_UMR_INLINE;
887
888         set_reg_umr_seg(*seg, mr, flags, atomic);
889         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
890         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
891         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
892
893         set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
894         *seg += sizeof(struct mlx5_mkey_seg);
895         *size += sizeof(struct mlx5_mkey_seg) / 16;
896         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
897
898         if (umr_inline) {
899                 memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
900                                 mr_list_size);
901                 *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
902         } else {
903                 set_reg_data_seg(*seg, mr, pd);
904                 *seg += sizeof(struct mlx5_wqe_data_seg);
905                 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
906         }
907         return 0;
908 }
909
910 static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
911                         void **cur_edge)
912 {
913         set_linv_umr_seg(*seg);
914         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
915         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
916         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
917         set_linv_mkey_seg(*seg);
918         *seg += sizeof(struct mlx5_mkey_seg);
919         *size += sizeof(struct mlx5_mkey_seg) / 16;
920         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
921 }
922
923 static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
924 {
925         __be32 *p = NULL;
926         int i, j;
927
928         pr_debug("dump WQE index %u:\n", idx);
929         for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
930                 if ((i & 0xf) == 0) {
931                         p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
932                         pr_debug("WQBB at %p:\n", (void *)p);
933                         j = 0;
934                         idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
935                 }
936                 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
937                          be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
938                          be32_to_cpu(p[j + 3]));
939         }
940 }
941
942 static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
943                        struct mlx5_wqe_ctrl_seg **ctrl,
944                        const struct ib_send_wr *wr, unsigned int *idx,
945                        int *size, void **cur_edge, int nreq,
946                        bool send_signaled, bool solicited)
947 {
948         if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
949                 return -ENOMEM;
950
951         *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
952         *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
953         *ctrl = *seg;
954         *(uint32_t *)(*seg + 8) = 0;
955         (*ctrl)->imm = send_ieth(wr);
956         (*ctrl)->fm_ce_se = qp->sq_signal_bits |
957                 (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
958                 (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
959
960         *seg += sizeof(**ctrl);
961         *size = sizeof(**ctrl) / 16;
962         *cur_edge = qp->sq.cur_edge;
963
964         return 0;
965 }
966
967 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
968                      struct mlx5_wqe_ctrl_seg **ctrl,
969                      const struct ib_send_wr *wr, unsigned int *idx, int *size,
970                      void **cur_edge, int nreq)
971 {
972         return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
973                            wr->send_flags & IB_SEND_SIGNALED,
974                            wr->send_flags & IB_SEND_SOLICITED);
975 }
976
977 static void finish_wqe(struct mlx5_ib_qp *qp,
978                        struct mlx5_wqe_ctrl_seg *ctrl,
979                        void *seg, u8 size, void *cur_edge,
980                        unsigned int idx, u64 wr_id, int nreq, u8 fence,
981                        u32 mlx5_opcode)
982 {
983         u8 opmod = 0;
984
985         ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
986                                              mlx5_opcode | ((u32)opmod << 24));
987         ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
988         ctrl->fm_ce_se |= fence;
989         if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
990                 ctrl->signature = wq_sig(ctrl);
991
992         qp->sq.wrid[idx] = wr_id;
993         qp->sq.w_list[idx].opcode = mlx5_opcode;
994         qp->sq.wqe_head[idx] = qp->sq.head + nreq;
995         qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
996         qp->sq.w_list[idx].next = qp->sq.cur_post;
997
998         /* We save the edge which was possibly updated during the WQE
999          * construction, into SQ's cache.
1000          */
1001         seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
1002         qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
1003                           get_sq_edge(&qp->sq, qp->sq.cur_post &
1004                                       (qp->sq.wqe_cnt - 1)) :
1005                           cur_edge;
1006 }
1007
1008 static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
1009 {
1010         set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
1011         *seg += sizeof(struct mlx5_wqe_raddr_seg);
1012         *size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
1013 }
1014
1015 static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1016                              struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1017                              int *size, void **cur_edge, unsigned int idx)
1018 {
1019         qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
1020         (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
1021         set_linv_wr(qp, seg, size, cur_edge);
1022 }
1023
1024 static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1025                          struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1026                          void **cur_edge, unsigned int idx)
1027 {
1028         qp->sq.wr_data[idx] = IB_WR_REG_MR;
1029         (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
1030         return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
1031 }
1032
1033 static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1034                       const struct ib_send_wr *wr,
1035                       struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1036                       void **cur_edge, unsigned int *idx, int nreq,
1037                       struct ib_sig_domain *domain, u32 psv_index,
1038                       u8 next_fence)
1039 {
1040         int err;
1041
1042         /*
1043          * SET_PSV WQEs are not signaled and solicited on error.
1044          */
1045         err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
1046                           false, true);
1047         if (unlikely(err)) {
1048                 mlx5_ib_warn(dev, "\n");
1049                 err = -ENOMEM;
1050                 goto out;
1051         }
1052         err = set_psv_wr(domain, psv_index, seg, size);
1053         if (unlikely(err)) {
1054                 mlx5_ib_warn(dev, "\n");
1055                 goto out;
1056         }
1057         finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
1058                    next_fence, MLX5_OPCODE_SET_PSV);
1059
1060 out:
1061         return err;
1062 }
1063
1064 static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
1065                                    struct mlx5_ib_qp *qp,
1066                                    const struct ib_send_wr *wr,
1067                                    struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1068                                    int *size, void **cur_edge,
1069                                    unsigned int *idx, int nreq, u8 fence,
1070                                    u8 next_fence)
1071 {
1072         struct mlx5_ib_mr *mr;
1073         struct mlx5_ib_mr *pi_mr;
1074         struct mlx5_ib_mr pa_pi_mr;
1075         struct ib_sig_attrs *sig_attrs;
1076         struct ib_reg_wr reg_pi_wr;
1077         int err;
1078
1079         qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
1080
1081         mr = to_mmr(reg_wr(wr)->mr);
1082         pi_mr = mr->pi_mr;
1083
1084         if (pi_mr) {
1085                 memset(&reg_pi_wr, 0,
1086                        sizeof(struct ib_reg_wr));
1087
1088                 reg_pi_wr.mr = &pi_mr->ibmr;
1089                 reg_pi_wr.access = reg_wr(wr)->access;
1090                 reg_pi_wr.key = pi_mr->ibmr.rkey;
1091
1092                 (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
1093                 /* UMR for data + prot registration */
1094                 err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
1095                 if (unlikely(err))
1096                         goto out;
1097
1098                 finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
1099                            nreq, fence, MLX5_OPCODE_UMR);
1100
1101                 err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
1102                 if (unlikely(err)) {
1103                         mlx5_ib_warn(dev, "\n");
1104                         err = -ENOMEM;
1105                         goto out;
1106                 }
1107         } else {
1108                 memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
1109                 /* No UMR, use local_dma_lkey */
1110                 pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
1111                 pa_pi_mr.ndescs = mr->ndescs;
1112                 pa_pi_mr.data_length = mr->data_length;
1113                 pa_pi_mr.data_iova = mr->data_iova;
1114                 if (mr->meta_ndescs) {
1115                         pa_pi_mr.meta_ndescs = mr->meta_ndescs;
1116                         pa_pi_mr.meta_length = mr->meta_length;
1117                         pa_pi_mr.pi_iova = mr->pi_iova;
1118                 }
1119
1120                 pa_pi_mr.ibmr.length = mr->ibmr.length;
1121                 mr->pi_mr = &pa_pi_mr;
1122         }
1123         (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
1124         /* UMR for sig MR */
1125         err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
1126         if (unlikely(err)) {
1127                 mlx5_ib_warn(dev, "\n");
1128                 goto out;
1129         }
1130         finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
1131                    fence, MLX5_OPCODE_UMR);
1132
1133         sig_attrs = mr->ibmr.sig_attrs;
1134         err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
1135                          &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
1136                          next_fence);
1137         if (unlikely(err))
1138                 goto out;
1139
1140         err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
1141                          &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
1142                          next_fence);
1143         if (unlikely(err))
1144                 goto out;
1145
1146         qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1147
1148 out:
1149         return err;
1150 }
1151
1152 static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1153                          const struct ib_send_wr *wr,
1154                          struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1155                          void **cur_edge, unsigned int *idx, int nreq, u8 fence,
1156                          u8 next_fence, int *num_sge)
1157 {
1158         int err = 0;
1159
1160         switch (wr->opcode) {
1161         case IB_WR_RDMA_READ:
1162         case IB_WR_RDMA_WRITE:
1163         case IB_WR_RDMA_WRITE_WITH_IMM:
1164                 handle_rdma_op(wr, seg, size);
1165                 break;
1166
1167         case IB_WR_ATOMIC_CMP_AND_SWP:
1168         case IB_WR_ATOMIC_FETCH_AND_ADD:
1169         case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
1170                 mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
1171                 err = -EOPNOTSUPP;
1172                 goto out;
1173
1174         case IB_WR_LOCAL_INV:
1175                 handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
1176                 *num_sge = 0;
1177                 break;
1178
1179         case IB_WR_REG_MR:
1180                 err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
1181                 if (unlikely(err))
1182                         goto out;
1183                 *num_sge = 0;
1184                 break;
1185
1186         case IB_WR_REG_MR_INTEGRITY:
1187                 err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
1188                                               cur_edge, idx, nreq, fence,
1189                                               next_fence);
1190                 if (unlikely(err))
1191                         goto out;
1192                 *num_sge = 0;
1193                 break;
1194
1195         default:
1196                 break;
1197         }
1198
1199 out:
1200         return err;
1201 }
1202
1203 static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
1204 {
1205         switch (wr->opcode) {
1206         case IB_WR_RDMA_WRITE:
1207         case IB_WR_RDMA_WRITE_WITH_IMM:
1208                 handle_rdma_op(wr, seg, size);
1209                 break;
1210         default:
1211                 break;
1212         }
1213 }
1214
1215 static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
1216                               const struct ib_send_wr *wr, void **seg,
1217                               int *size, void **cur_edge)
1218 {
1219         set_datagram_seg(*seg, wr);
1220         *seg += sizeof(struct mlx5_wqe_datagram_seg);
1221         *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1222         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1223 }
1224
1225 static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1226                           void **seg, int *size, void **cur_edge)
1227 {
1228         set_datagram_seg(*seg, wr);
1229         *seg += sizeof(struct mlx5_wqe_datagram_seg);
1230         *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1231         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1232
1233         /* handle qp that supports ud offload */
1234         if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
1235                 struct mlx5_wqe_eth_pad *pad;
1236
1237                 pad = *seg;
1238                 memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
1239                 *seg += sizeof(struct mlx5_wqe_eth_pad);
1240                 *size += sizeof(struct mlx5_wqe_eth_pad) / 16;
1241                 set_eth_seg(wr, qp, seg, size, cur_edge);
1242                 handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1243         }
1244 }
1245
1246 static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1247                               const struct ib_send_wr *wr,
1248                               struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1249                               int *size, void **cur_edge, unsigned int idx)
1250 {
1251         int err = 0;
1252
1253         if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
1254                 err = -EINVAL;
1255                 mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
1256                 goto out;
1257         }
1258
1259         qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
1260         (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
1261         err = set_reg_umr_segment(dev, *seg, wr);
1262         if (unlikely(err))
1263                 goto out;
1264         *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
1265         *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
1266         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1267         set_reg_mkey_segment(dev, *seg, wr);
1268         *seg += sizeof(struct mlx5_mkey_seg);
1269         *size += sizeof(struct mlx5_mkey_seg) / 16;
1270         handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1271 out:
1272         return err;
1273 }
1274
1275 int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1276                       const struct ib_send_wr **bad_wr, bool drain)
1277 {
1278         struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
1279         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1280         struct mlx5_core_dev *mdev = dev->mdev;
1281         struct mlx5_ib_qp *qp;
1282         struct mlx5_wqe_xrc_seg *xrc;
1283         struct mlx5_bf *bf;
1284         void *cur_edge;
1285         int size;
1286         unsigned long flags;
1287         unsigned int idx;
1288         int err = 0;
1289         int num_sge;
1290         void *seg;
1291         int nreq;
1292         int i;
1293         u8 next_fence = 0;
1294         u8 fence;
1295
1296         if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1297                      !drain)) {
1298                 *bad_wr = wr;
1299                 return -EIO;
1300         }
1301
1302         if (unlikely(ibqp->qp_type == IB_QPT_GSI))
1303                 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
1304
1305         qp = to_mqp(ibqp);
1306         bf = &qp->bf;
1307
1308         spin_lock_irqsave(&qp->sq.lock, flags);
1309
1310         for (nreq = 0; wr; nreq++, wr = wr->next) {
1311                 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
1312                         mlx5_ib_warn(dev, "\n");
1313                         err = -EINVAL;
1314                         *bad_wr = wr;
1315                         goto out;
1316                 }
1317
1318                 num_sge = wr->num_sge;
1319                 if (unlikely(num_sge > qp->sq.max_gs)) {
1320                         mlx5_ib_warn(dev, "\n");
1321                         err = -EINVAL;
1322                         *bad_wr = wr;
1323                         goto out;
1324                 }
1325
1326                 err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
1327                                 nreq);
1328                 if (err) {
1329                         mlx5_ib_warn(dev, "\n");
1330                         err = -ENOMEM;
1331                         *bad_wr = wr;
1332                         goto out;
1333                 }
1334
1335                 if (wr->opcode == IB_WR_REG_MR ||
1336                     wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1337                         fence = dev->umr_fence;
1338                         next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1339                 } else  {
1340                         if (wr->send_flags & IB_SEND_FENCE) {
1341                                 if (qp->next_fence)
1342                                         fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
1343                                 else
1344                                         fence = MLX5_FENCE_MODE_FENCE;
1345                         } else {
1346                                 fence = qp->next_fence;
1347                         }
1348                 }
1349
1350                 switch (ibqp->qp_type) {
1351                 case IB_QPT_XRC_INI:
1352                         xrc = seg;
1353                         seg += sizeof(*xrc);
1354                         size += sizeof(*xrc) / 16;
1355                         fallthrough;
1356                 case IB_QPT_RC:
1357                         err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
1358                                             &cur_edge, &idx, nreq, fence,
1359                                             next_fence, &num_sge);
1360                         if (unlikely(err)) {
1361                                 *bad_wr = wr;
1362                                 goto out;
1363                         } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1364                                 goto skip_psv;
1365                         }
1366                         break;
1367
1368                 case IB_QPT_UC:
1369                         handle_qpt_uc(wr, &seg, &size);
1370                         break;
1371                 case IB_QPT_SMI:
1372                         if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
1373                                 mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
1374                                 err = -EPERM;
1375                                 *bad_wr = wr;
1376                                 goto out;
1377                         }
1378                         fallthrough;
1379                 case MLX5_IB_QPT_HW_GSI:
1380                         handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
1381                         break;
1382                 case IB_QPT_UD:
1383                         handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
1384                         break;
1385                 case MLX5_IB_QPT_REG_UMR:
1386                         err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
1387                                                        &size, &cur_edge, idx);
1388                         if (unlikely(err))
1389                                 goto out;
1390                         break;
1391
1392                 default:
1393                         break;
1394                 }
1395
1396                 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
1397                         err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
1398                         if (unlikely(err)) {
1399                                 mlx5_ib_warn(dev, "\n");
1400                                 *bad_wr = wr;
1401                                 goto out;
1402                         }
1403                 } else {
1404                         for (i = 0; i < num_sge; i++) {
1405                                 handle_post_send_edge(&qp->sq, &seg, size,
1406                                                       &cur_edge);
1407                                 if (unlikely(!wr->sg_list[i].length))
1408                                         continue;
1409
1410                                 set_data_ptr_seg(
1411                                         (struct mlx5_wqe_data_seg *)seg,
1412                                         wr->sg_list + i);
1413                                 size += sizeof(struct mlx5_wqe_data_seg) / 16;
1414                                 seg += sizeof(struct mlx5_wqe_data_seg);
1415                         }
1416                 }
1417
1418                 qp->next_fence = next_fence;
1419                 finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
1420                            fence, mlx5_ib_opcode[wr->opcode]);
1421 skip_psv:
1422                 if (0)
1423                         dump_wqe(qp, idx, size);
1424         }
1425
1426 out:
1427         if (likely(nreq)) {
1428                 qp->sq.head += nreq;
1429
1430                 /* Make sure that descriptors are written before
1431                  * updating doorbell record and ringing the doorbell
1432                  */
1433                 wmb();
1434
1435                 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
1436
1437                 /* Make sure doorbell record is visible to the HCA before
1438                  * we hit doorbell.
1439                  */
1440                 wmb();
1441
1442                 mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
1443                 /* Make sure doorbells don't leak out of SQ spinlock
1444                  * and reach the HCA out of order.
1445                  */
1446                 bf->offset ^= bf->buf_size;
1447         }
1448
1449         spin_unlock_irqrestore(&qp->sq.lock, flags);
1450
1451         return err;
1452 }
1453
1454 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
1455 {
1456          sig->signature = calc_sig(sig, (max_gs + 1) << 2);
1457 }
1458
1459 int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1460                       const struct ib_recv_wr **bad_wr, bool drain)
1461 {
1462         struct mlx5_ib_qp *qp = to_mqp(ibqp);
1463         struct mlx5_wqe_data_seg *scat;
1464         struct mlx5_rwqe_sig *sig;
1465         struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1466         struct mlx5_core_dev *mdev = dev->mdev;
1467         unsigned long flags;
1468         int err = 0;
1469         int nreq;
1470         int ind;
1471         int i;
1472
1473         if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1474                      !drain)) {
1475                 *bad_wr = wr;
1476                 return -EIO;
1477         }
1478
1479         if (unlikely(ibqp->qp_type == IB_QPT_GSI))
1480                 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
1481
1482         spin_lock_irqsave(&qp->rq.lock, flags);
1483
1484         ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
1485
1486         for (nreq = 0; wr; nreq++, wr = wr->next) {
1487                 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1488                         err = -ENOMEM;
1489                         *bad_wr = wr;
1490                         goto out;
1491                 }
1492
1493                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1494                         err = -EINVAL;
1495                         *bad_wr = wr;
1496                         goto out;
1497                 }
1498
1499                 scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
1500                 if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
1501                         scat++;
1502
1503                 for (i = 0; i < wr->num_sge; i++)
1504                         set_data_ptr_seg(scat + i, wr->sg_list + i);
1505
1506                 if (i < qp->rq.max_gs) {
1507                         scat[i].byte_count = 0;
1508                         scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
1509                         scat[i].addr       = 0;
1510                 }
1511
1512                 if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
1513                         sig = (struct mlx5_rwqe_sig *)scat;
1514                         set_sig_seg(sig, qp->rq.max_gs);
1515                 }
1516
1517                 qp->rq.wrid[ind] = wr->wr_id;
1518
1519                 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
1520         }
1521
1522 out:
1523         if (likely(nreq)) {
1524                 qp->rq.head += nreq;
1525
1526                 /* Make sure that descriptors are written before
1527                  * doorbell record.
1528                  */
1529                 wmb();
1530
1531                 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1532         }
1533
1534         spin_unlock_irqrestore(&qp->rq.lock, flags);
1535
1536         return err;
1537 }