GNU Linux-libre 4.14.251-gnu1
[releases.git] / drivers / infiniband / sw / rxe / rxe_req.c
1 /*
2  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/skbuff.h>
35 #include <crypto/hash.h>
36
37 #include "rxe.h"
38 #include "rxe_loc.h"
39 #include "rxe_queue.h"
40
41 static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
42                        u32 opcode);
43
44 static inline void retry_first_write_send(struct rxe_qp *qp,
45                                           struct rxe_send_wqe *wqe,
46                                           unsigned int mask, int npsn)
47 {
48         int i;
49
50         for (i = 0; i < npsn; i++) {
51                 int to_send = (wqe->dma.resid > qp->mtu) ?
52                                 qp->mtu : wqe->dma.resid;
53
54                 qp->req.opcode = next_opcode(qp, wqe,
55                                              wqe->wr.opcode);
56
57                 if (wqe->wr.send_flags & IB_SEND_INLINE) {
58                         wqe->dma.resid -= to_send;
59                         wqe->dma.sge_offset += to_send;
60                 } else {
61                         advance_dma_data(&wqe->dma, to_send);
62                 }
63                 if (mask & WR_WRITE_MASK)
64                         wqe->iova += qp->mtu;
65         }
66 }
67
68 static void req_retry(struct rxe_qp *qp)
69 {
70         struct rxe_send_wqe *wqe;
71         unsigned int wqe_index;
72         unsigned int mask;
73         int npsn;
74         int first = 1;
75
76         qp->req.wqe_index       = consumer_index(qp->sq.queue);
77         qp->req.psn             = qp->comp.psn;
78         qp->req.opcode          = -1;
79
80         for (wqe_index = consumer_index(qp->sq.queue);
81                 wqe_index != producer_index(qp->sq.queue);
82                 wqe_index = next_index(qp->sq.queue, wqe_index)) {
83                 wqe = addr_from_index(qp->sq.queue, wqe_index);
84                 mask = wr_opcode_mask(wqe->wr.opcode, qp);
85
86                 if (wqe->state == wqe_state_posted)
87                         break;
88
89                 if (wqe->state == wqe_state_done)
90                         continue;
91
92                 wqe->iova = (mask & WR_ATOMIC_MASK) ?
93                              wqe->wr.wr.atomic.remote_addr :
94                              (mask & WR_READ_OR_WRITE_MASK) ?
95                              wqe->wr.wr.rdma.remote_addr :
96                              0;
97
98                 if (!first || (mask & WR_READ_MASK) == 0) {
99                         wqe->dma.resid = wqe->dma.length;
100                         wqe->dma.cur_sge = 0;
101                         wqe->dma.sge_offset = 0;
102                 }
103
104                 if (first) {
105                         first = 0;
106
107                         if (mask & WR_WRITE_OR_SEND_MASK) {
108                                 npsn = (qp->comp.psn - wqe->first_psn) &
109                                         BTH_PSN_MASK;
110                                 retry_first_write_send(qp, wqe, mask, npsn);
111                         }
112
113                         if (mask & WR_READ_MASK) {
114                                 npsn = (wqe->dma.length - wqe->dma.resid) /
115                                         qp->mtu;
116                                 wqe->iova += npsn * qp->mtu;
117                         }
118                 }
119
120                 wqe->state = wqe_state_posted;
121         }
122 }
123
124 void rnr_nak_timer(unsigned long data)
125 {
126         struct rxe_qp *qp = (struct rxe_qp *)data;
127
128         pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
129         rxe_run_task(&qp->req.task, 1);
130 }
131
132 static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
133 {
134         struct rxe_send_wqe *wqe = queue_head(qp->sq.queue);
135         unsigned long flags;
136
137         if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
138                 /* check to see if we are drained;
139                  * state_lock used by requester and completer
140                  */
141                 spin_lock_irqsave(&qp->state_lock, flags);
142                 do {
143                         if (qp->req.state != QP_STATE_DRAIN) {
144                                 /* comp just finished */
145                                 spin_unlock_irqrestore(&qp->state_lock,
146                                                        flags);
147                                 break;
148                         }
149
150                         if (wqe && ((qp->req.wqe_index !=
151                                 consumer_index(qp->sq.queue)) ||
152                                 (wqe->state != wqe_state_posted))) {
153                                 /* comp not done yet */
154                                 spin_unlock_irqrestore(&qp->state_lock,
155                                                        flags);
156                                 break;
157                         }
158
159                         qp->req.state = QP_STATE_DRAINED;
160                         spin_unlock_irqrestore(&qp->state_lock, flags);
161
162                         if (qp->ibqp.event_handler) {
163                                 struct ib_event ev;
164
165                                 ev.device = qp->ibqp.device;
166                                 ev.element.qp = &qp->ibqp;
167                                 ev.event = IB_EVENT_SQ_DRAINED;
168                                 qp->ibqp.event_handler(&ev,
169                                         qp->ibqp.qp_context);
170                         }
171                 } while (0);
172         }
173
174         if (qp->req.wqe_index == producer_index(qp->sq.queue))
175                 return NULL;
176
177         wqe = addr_from_index(qp->sq.queue, qp->req.wqe_index);
178
179         if (unlikely((qp->req.state == QP_STATE_DRAIN ||
180                       qp->req.state == QP_STATE_DRAINED) &&
181                      (wqe->state != wqe_state_processing)))
182                 return NULL;
183
184         if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) &&
185                      (qp->req.wqe_index != consumer_index(qp->sq.queue)))) {
186                 qp->req.wait_fence = 1;
187                 return NULL;
188         }
189
190         wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
191         return wqe;
192 }
193
194 static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
195 {
196         switch (opcode) {
197         case IB_WR_RDMA_WRITE:
198                 if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
199                     qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
200                         return fits ?
201                                 IB_OPCODE_RC_RDMA_WRITE_LAST :
202                                 IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
203                 else
204                         return fits ?
205                                 IB_OPCODE_RC_RDMA_WRITE_ONLY :
206                                 IB_OPCODE_RC_RDMA_WRITE_FIRST;
207
208         case IB_WR_RDMA_WRITE_WITH_IMM:
209                 if (qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_FIRST ||
210                     qp->req.opcode == IB_OPCODE_RC_RDMA_WRITE_MIDDLE)
211                         return fits ?
212                                 IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
213                                 IB_OPCODE_RC_RDMA_WRITE_MIDDLE;
214                 else
215                         return fits ?
216                                 IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
217                                 IB_OPCODE_RC_RDMA_WRITE_FIRST;
218
219         case IB_WR_SEND:
220                 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
221                     qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
222                         return fits ?
223                                 IB_OPCODE_RC_SEND_LAST :
224                                 IB_OPCODE_RC_SEND_MIDDLE;
225                 else
226                         return fits ?
227                                 IB_OPCODE_RC_SEND_ONLY :
228                                 IB_OPCODE_RC_SEND_FIRST;
229
230         case IB_WR_SEND_WITH_IMM:
231                 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
232                     qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
233                         return fits ?
234                                 IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE :
235                                 IB_OPCODE_RC_SEND_MIDDLE;
236                 else
237                         return fits ?
238                                 IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
239                                 IB_OPCODE_RC_SEND_FIRST;
240
241         case IB_WR_RDMA_READ:
242                 return IB_OPCODE_RC_RDMA_READ_REQUEST;
243
244         case IB_WR_ATOMIC_CMP_AND_SWP:
245                 return IB_OPCODE_RC_COMPARE_SWAP;
246
247         case IB_WR_ATOMIC_FETCH_AND_ADD:
248                 return IB_OPCODE_RC_FETCH_ADD;
249
250         case IB_WR_SEND_WITH_INV:
251                 if (qp->req.opcode == IB_OPCODE_RC_SEND_FIRST ||
252                     qp->req.opcode == IB_OPCODE_RC_SEND_MIDDLE)
253                         return fits ? IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE :
254                                 IB_OPCODE_RC_SEND_MIDDLE;
255                 else
256                         return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
257                                 IB_OPCODE_RC_SEND_FIRST;
258         case IB_WR_REG_MR:
259         case IB_WR_LOCAL_INV:
260                 return opcode;
261         }
262
263         return -EINVAL;
264 }
265
266 static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits)
267 {
268         switch (opcode) {
269         case IB_WR_RDMA_WRITE:
270                 if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
271                     qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
272                         return fits ?
273                                 IB_OPCODE_UC_RDMA_WRITE_LAST :
274                                 IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
275                 else
276                         return fits ?
277                                 IB_OPCODE_UC_RDMA_WRITE_ONLY :
278                                 IB_OPCODE_UC_RDMA_WRITE_FIRST;
279
280         case IB_WR_RDMA_WRITE_WITH_IMM:
281                 if (qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_FIRST ||
282                     qp->req.opcode == IB_OPCODE_UC_RDMA_WRITE_MIDDLE)
283                         return fits ?
284                                 IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE :
285                                 IB_OPCODE_UC_RDMA_WRITE_MIDDLE;
286                 else
287                         return fits ?
288                                 IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE :
289                                 IB_OPCODE_UC_RDMA_WRITE_FIRST;
290
291         case IB_WR_SEND:
292                 if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
293                     qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
294                         return fits ?
295                                 IB_OPCODE_UC_SEND_LAST :
296                                 IB_OPCODE_UC_SEND_MIDDLE;
297                 else
298                         return fits ?
299                                 IB_OPCODE_UC_SEND_ONLY :
300                                 IB_OPCODE_UC_SEND_FIRST;
301
302         case IB_WR_SEND_WITH_IMM:
303                 if (qp->req.opcode == IB_OPCODE_UC_SEND_FIRST ||
304                     qp->req.opcode == IB_OPCODE_UC_SEND_MIDDLE)
305                         return fits ?
306                                 IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE :
307                                 IB_OPCODE_UC_SEND_MIDDLE;
308                 else
309                         return fits ?
310                                 IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE :
311                                 IB_OPCODE_UC_SEND_FIRST;
312         }
313
314         return -EINVAL;
315 }
316
317 static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
318                        u32 opcode)
319 {
320         int fits = (wqe->dma.resid <= qp->mtu);
321
322         switch (qp_type(qp)) {
323         case IB_QPT_RC:
324                 return next_opcode_rc(qp, opcode, fits);
325
326         case IB_QPT_UC:
327                 return next_opcode_uc(qp, opcode, fits);
328
329         case IB_QPT_SMI:
330         case IB_QPT_UD:
331         case IB_QPT_GSI:
332                 switch (opcode) {
333                 case IB_WR_SEND:
334                         return IB_OPCODE_UD_SEND_ONLY;
335
336                 case IB_WR_SEND_WITH_IMM:
337                         return IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
338                 }
339                 break;
340
341         default:
342                 break;
343         }
344
345         return -EINVAL;
346 }
347
348 static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
349 {
350         int depth;
351
352         if (wqe->has_rd_atomic)
353                 return 0;
354
355         qp->req.need_rd_atomic = 1;
356         depth = atomic_dec_return(&qp->req.rd_atomic);
357
358         if (depth >= 0) {
359                 qp->req.need_rd_atomic = 0;
360                 wqe->has_rd_atomic = 1;
361                 return 0;
362         }
363
364         atomic_inc(&qp->req.rd_atomic);
365         return -EAGAIN;
366 }
367
368 static inline int get_mtu(struct rxe_qp *qp)
369 {
370         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
371
372         if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
373                 return qp->mtu;
374
375         return rxe->port.mtu_cap;
376 }
377
378 static struct sk_buff *init_req_packet(struct rxe_qp *qp,
379                                        struct rxe_send_wqe *wqe,
380                                        int opcode, int payload,
381                                        struct rxe_pkt_info *pkt)
382 {
383         struct rxe_dev          *rxe = to_rdev(qp->ibqp.device);
384         struct rxe_port         *port = &rxe->port;
385         struct sk_buff          *skb;
386         struct rxe_send_wr      *ibwr = &wqe->wr;
387         struct rxe_av           *av;
388         int                     pad = (-payload) & 0x3;
389         int                     paylen;
390         int                     solicited;
391         u16                     pkey;
392         u32                     qp_num;
393         int                     ack_req;
394
395         /* length from start of bth to end of icrc */
396         paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
397
398         /* pkt->hdr, rxe, port_num and mask are initialized in ifc
399          * layer
400          */
401         pkt->opcode     = opcode;
402         pkt->qp         = qp;
403         pkt->psn        = qp->req.psn;
404         pkt->mask       = rxe_opcode[opcode].mask;
405         pkt->paylen     = paylen;
406         pkt->offset     = 0;
407         pkt->wqe        = wqe;
408
409         /* init skb */
410         av = rxe_get_av(pkt);
411         skb = rxe_init_packet(rxe, av, paylen, pkt);
412         if (unlikely(!skb))
413                 return NULL;
414
415         /* init bth */
416         solicited = (ibwr->send_flags & IB_SEND_SOLICITED) &&
417                         (pkt->mask & RXE_END_MASK) &&
418                         ((pkt->mask & (RXE_SEND_MASK)) ||
419                         (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
420                         (RXE_WRITE_MASK | RXE_IMMDT_MASK));
421
422         pkey = (qp_type(qp) == IB_QPT_GSI) ?
423                  port->pkey_tbl[ibwr->wr.ud.pkey_index] :
424                  port->pkey_tbl[qp->attr.pkey_index];
425
426         qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
427                                          qp->attr.dest_qp_num;
428
429         ack_req = ((pkt->mask & RXE_END_MASK) ||
430                 (qp->req.noack_pkts++ > RXE_MAX_PKT_PER_ACK));
431         if (ack_req)
432                 qp->req.noack_pkts = 0;
433
434         bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num,
435                  ack_req, pkt->psn);
436
437         /* init optional headers */
438         if (pkt->mask & RXE_RETH_MASK) {
439                 reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
440                 reth_set_va(pkt, wqe->iova);
441                 reth_set_len(pkt, wqe->dma.resid);
442         }
443
444         if (pkt->mask & RXE_IMMDT_MASK)
445                 immdt_set_imm(pkt, ibwr->ex.imm_data);
446
447         if (pkt->mask & RXE_IETH_MASK)
448                 ieth_set_rkey(pkt, ibwr->ex.invalidate_rkey);
449
450         if (pkt->mask & RXE_ATMETH_MASK) {
451                 atmeth_set_va(pkt, wqe->iova);
452                 if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
453                     opcode == IB_OPCODE_RD_COMPARE_SWAP) {
454                         atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
455                         atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
456                 } else {
457                         atmeth_set_swap_add(pkt, ibwr->wr.atomic.compare_add);
458                 }
459                 atmeth_set_rkey(pkt, ibwr->wr.atomic.rkey);
460         }
461
462         if (pkt->mask & RXE_DETH_MASK) {
463                 if (qp->ibqp.qp_num == 1)
464                         deth_set_qkey(pkt, GSI_QKEY);
465                 else
466                         deth_set_qkey(pkt, ibwr->wr.ud.remote_qkey);
467                 deth_set_sqp(pkt, qp->ibqp.qp_num);
468         }
469
470         return skb;
471 }
472
473 static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
474                        struct rxe_pkt_info *pkt, struct sk_buff *skb,
475                        int paylen)
476 {
477         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
478         u32 crc = 0;
479         u32 *p;
480         int err;
481
482         err = rxe_prepare(rxe, pkt, skb, &crc);
483         if (err)
484                 return err;
485
486         if (pkt->mask & RXE_WRITE_OR_SEND) {
487                 if (wqe->wr.send_flags & IB_SEND_INLINE) {
488                         u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
489
490                         crc = rxe_crc32(rxe, crc, tmp, paylen);
491                         memcpy(payload_addr(pkt), tmp, paylen);
492
493                         wqe->dma.resid -= paylen;
494                         wqe->dma.sge_offset += paylen;
495                 } else {
496                         err = copy_data(rxe, qp->pd, 0, &wqe->dma,
497                                         payload_addr(pkt), paylen,
498                                         from_mem_obj,
499                                         &crc);
500                         if (err)
501                                 return err;
502                 }
503                 if (bth_pad(pkt)) {
504                         u8 *pad = payload_addr(pkt) + paylen;
505
506                         memset(pad, 0, bth_pad(pkt));
507                         crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
508                 }
509         }
510         p = payload_addr(pkt) + paylen + bth_pad(pkt);
511
512         *p = ~crc;
513
514         return 0;
515 }
516
517 static void update_wqe_state(struct rxe_qp *qp,
518                 struct rxe_send_wqe *wqe,
519                 struct rxe_pkt_info *pkt)
520 {
521         if (pkt->mask & RXE_END_MASK) {
522                 if (qp_type(qp) == IB_QPT_RC)
523                         wqe->state = wqe_state_pending;
524         } else {
525                 wqe->state = wqe_state_processing;
526         }
527 }
528
529 static void update_wqe_psn(struct rxe_qp *qp,
530                            struct rxe_send_wqe *wqe,
531                            struct rxe_pkt_info *pkt,
532                            int payload)
533 {
534         /* number of packets left to send including current one */
535         int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
536
537         /* handle zero length packet case */
538         if (num_pkt == 0)
539                 num_pkt = 1;
540
541         if (pkt->mask & RXE_START_MASK) {
542                 wqe->first_psn = qp->req.psn;
543                 wqe->last_psn = (qp->req.psn + num_pkt - 1) & BTH_PSN_MASK;
544         }
545
546         if (pkt->mask & RXE_READ_MASK)
547                 qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
548         else
549                 qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
550 }
551
552 static void save_state(struct rxe_send_wqe *wqe,
553                        struct rxe_qp *qp,
554                        struct rxe_send_wqe *rollback_wqe,
555                        u32 *rollback_psn)
556 {
557         rollback_wqe->state     = wqe->state;
558         rollback_wqe->first_psn = wqe->first_psn;
559         rollback_wqe->last_psn  = wqe->last_psn;
560         *rollback_psn           = qp->req.psn;
561 }
562
563 static void rollback_state(struct rxe_send_wqe *wqe,
564                            struct rxe_qp *qp,
565                            struct rxe_send_wqe *rollback_wqe,
566                            u32 rollback_psn)
567 {
568         wqe->state     = rollback_wqe->state;
569         wqe->first_psn = rollback_wqe->first_psn;
570         wqe->last_psn  = rollback_wqe->last_psn;
571         qp->req.psn    = rollback_psn;
572 }
573
574 static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
575                          struct rxe_pkt_info *pkt, int payload)
576 {
577         qp->req.opcode = pkt->opcode;
578
579         if (pkt->mask & RXE_END_MASK)
580                 qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
581
582         qp->need_req_skb = 0;
583
584         if (qp->qp_timeout_jiffies && !timer_pending(&qp->retrans_timer))
585                 mod_timer(&qp->retrans_timer,
586                           jiffies + qp->qp_timeout_jiffies);
587 }
588
589 int rxe_requester(void *arg)
590 {
591         struct rxe_qp *qp = (struct rxe_qp *)arg;
592         struct rxe_pkt_info pkt;
593         struct sk_buff *skb;
594         struct rxe_send_wqe *wqe;
595         enum rxe_hdr_mask mask;
596         int payload;
597         int mtu;
598         int opcode;
599         int ret;
600         struct rxe_send_wqe rollback_wqe;
601         u32 rollback_psn;
602
603         rxe_add_ref(qp);
604
605 next_wqe:
606         if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
607                 goto exit;
608
609         if (unlikely(qp->req.state == QP_STATE_RESET)) {
610                 qp->req.wqe_index = consumer_index(qp->sq.queue);
611                 qp->req.opcode = -1;
612                 qp->req.need_rd_atomic = 0;
613                 qp->req.wait_psn = 0;
614                 qp->req.need_retry = 0;
615                 goto exit;
616         }
617
618         if (unlikely(qp->req.need_retry)) {
619                 req_retry(qp);
620                 qp->req.need_retry = 0;
621         }
622
623         wqe = req_next_wqe(qp);
624         if (unlikely(!wqe))
625                 goto exit;
626
627         if (wqe->mask & WR_REG_MASK) {
628                 if (wqe->wr.opcode == IB_WR_LOCAL_INV) {
629                         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
630                         struct rxe_mem *rmr;
631
632                         rmr = rxe_pool_get_index(&rxe->mr_pool,
633                                                  wqe->wr.ex.invalidate_rkey >> 8);
634                         if (!rmr) {
635                                 pr_err("No mr for key %#x\n",
636                                        wqe->wr.ex.invalidate_rkey);
637                                 wqe->state = wqe_state_error;
638                                 wqe->status = IB_WC_MW_BIND_ERR;
639                                 goto exit;
640                         }
641                         rmr->state = RXE_MEM_STATE_FREE;
642                         rxe_drop_ref(rmr);
643                         wqe->state = wqe_state_done;
644                         wqe->status = IB_WC_SUCCESS;
645                 } else if (wqe->wr.opcode == IB_WR_REG_MR) {
646                         struct rxe_mem *rmr = to_rmr(wqe->wr.wr.reg.mr);
647
648                         rmr->state = RXE_MEM_STATE_VALID;
649                         rmr->access = wqe->wr.wr.reg.access;
650                         rmr->lkey = wqe->wr.wr.reg.key;
651                         rmr->rkey = wqe->wr.wr.reg.key;
652                         rmr->iova = wqe->wr.wr.reg.mr->iova;
653                         wqe->state = wqe_state_done;
654                         wqe->status = IB_WC_SUCCESS;
655                 } else {
656                         goto exit;
657                 }
658                 if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
659                     qp->sq_sig_type == IB_SIGNAL_ALL_WR)
660                         rxe_run_task(&qp->comp.task, 1);
661                 qp->req.wqe_index = next_index(qp->sq.queue,
662                                                 qp->req.wqe_index);
663                 goto next_wqe;
664         }
665
666         if (unlikely(qp_type(qp) == IB_QPT_RC &&
667                 psn_compare(qp->req.psn, (qp->comp.psn +
668                                 RXE_MAX_UNACKED_PSNS)) > 0)) {
669                 qp->req.wait_psn = 1;
670                 goto exit;
671         }
672
673         /* Limit the number of inflight SKBs per QP */
674         if (unlikely(atomic_read(&qp->skb_out) >
675                      RXE_INFLIGHT_SKBS_PER_QP_HIGH)) {
676                 qp->need_req_skb = 1;
677                 goto exit;
678         }
679
680         opcode = next_opcode(qp, wqe, wqe->wr.opcode);
681         if (unlikely(opcode < 0)) {
682                 wqe->status = IB_WC_LOC_QP_OP_ERR;
683                 goto exit;
684         }
685
686         mask = rxe_opcode[opcode].mask;
687         if (unlikely(mask & RXE_READ_OR_ATOMIC)) {
688                 if (check_init_depth(qp, wqe))
689                         goto exit;
690         }
691
692         mtu = get_mtu(qp);
693         payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
694         if (payload > mtu) {
695                 if (qp_type(qp) == IB_QPT_UD) {
696                         /* C10-93.1.1: If the total sum of all the buffer lengths specified for a
697                          * UD message exceeds the MTU of the port as returned by QueryHCA, the CI
698                          * shall not emit any packets for this message. Further, the CI shall not
699                          * generate an error due to this condition.
700                          */
701
702                         /* fake a successful UD send */
703                         wqe->first_psn = qp->req.psn;
704                         wqe->last_psn = qp->req.psn;
705                         qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
706                         qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
707                         qp->req.wqe_index = next_index(qp->sq.queue,
708                                                        qp->req.wqe_index);
709                         wqe->state = wqe_state_done;
710                         wqe->status = IB_WC_SUCCESS;
711                         __rxe_do_task(&qp->comp.task);
712                         rxe_drop_ref(qp);
713                         return 0;
714                 }
715                 payload = mtu;
716         }
717
718         skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
719         if (unlikely(!skb)) {
720                 pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
721                 goto err;
722         }
723
724         if (fill_packet(qp, wqe, &pkt, skb, payload)) {
725                 pr_debug("qp#%d Error during fill packet\n", qp_num(qp));
726                 goto err;
727         }
728
729         /*
730          * To prevent a race on wqe access between requester and completer,
731          * wqe members state and psn need to be set before calling
732          * rxe_xmit_packet().
733          * Otherwise, completer might initiate an unjustified retry flow.
734          */
735         save_state(wqe, qp, &rollback_wqe, &rollback_psn);
736         update_wqe_state(qp, wqe, &pkt);
737         update_wqe_psn(qp, wqe, &pkt, payload);
738         ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
739         if (ret) {
740                 qp->need_req_skb = 1;
741
742                 rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
743
744                 if (ret == -EAGAIN) {
745                         rxe_run_task(&qp->req.task, 1);
746                         goto exit;
747                 }
748
749                 goto err;
750         }
751
752         update_state(qp, wqe, &pkt, payload);
753
754         goto next_wqe;
755
756 err:
757         kfree_skb(skb);
758         wqe->status = IB_WC_LOC_PROT_ERR;
759         wqe->state = wqe_state_error;
760         __rxe_do_task(&qp->comp.task);
761
762 exit:
763         rxe_drop_ref(qp);
764         return -EAGAIN;
765 }