GNU Linux-libre 5.10.215-gnu1
[releases.git] / drivers / infiniband / sw / siw / siw_qp_rx.c
1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
5
6 #include <linux/errno.h>
7 #include <linux/types.h>
8 #include <linux/net.h>
9 #include <linux/scatterlist.h>
10 #include <linux/highmem.h>
11
12 #include <rdma/iw_cm.h>
13 #include <rdma/ib_verbs.h>
14
15 #include "siw.h"
16 #include "siw_verbs.h"
17 #include "siw_mem.h"
18
19 /*
20  * siw_rx_umem()
21  *
22  * Receive data of @len into target referenced by @dest_addr.
23  *
24  * @srx:        Receive Context
25  * @umem:       siw representation of target memory
26  * @dest_addr:  user virtual address
27  * @len:        number of bytes to place
28  */
29 static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
30                        u64 dest_addr, int len)
31 {
32         int copied = 0;
33
34         while (len) {
35                 struct page *p;
36                 int pg_off, bytes, rv;
37                 void *dest;
38
39                 p = siw_get_upage(umem, dest_addr);
40                 if (unlikely(!p)) {
41                         pr_warn("siw: %s: [QP %u]: bogus addr: %pK, %pK\n",
42                                 __func__, qp_id(rx_qp(srx)),
43                                 (void *)(uintptr_t)dest_addr,
44                                 (void *)(uintptr_t)umem->fp_addr);
45                         /* siw internal error */
46                         srx->skb_copied += copied;
47                         srx->skb_new -= copied;
48
49                         return -EFAULT;
50                 }
51                 pg_off = dest_addr & ~PAGE_MASK;
52                 bytes = min(len, (int)PAGE_SIZE - pg_off);
53
54                 siw_dbg_qp(rx_qp(srx), "page %pK, bytes=%u\n", p, bytes);
55
56                 dest = kmap_atomic(p);
57                 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off,
58                                    bytes);
59
60                 if (unlikely(rv)) {
61                         kunmap_atomic(dest);
62                         srx->skb_copied += copied;
63                         srx->skb_new -= copied;
64
65                         pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n",
66                                 qp_id(rx_qp(srx)), __func__, len, p, rv);
67
68                         return -EFAULT;
69                 }
70                 if (srx->mpa_crc_hd) {
71                         if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
72                                 crypto_shash_update(srx->mpa_crc_hd,
73                                         (u8 *)(dest + pg_off), bytes);
74                                 kunmap_atomic(dest);
75                         } else {
76                                 kunmap_atomic(dest);
77                                 /*
78                                  * Do CRC on original, not target buffer.
79                                  * Some user land applications may
80                                  * concurrently write the target buffer,
81                                  * which would yield a broken CRC.
82                                  * Walking the skb twice is very ineffcient.
83                                  * Folding the CRC into skb_copy_bits()
84                                  * would be much better, but is currently
85                                  * not supported.
86                                  */
87                                 siw_crc_skb(srx, bytes);
88                         }
89                 } else {
90                         kunmap_atomic(dest);
91                 }
92                 srx->skb_offset += bytes;
93                 copied += bytes;
94                 len -= bytes;
95                 dest_addr += bytes;
96                 pg_off = 0;
97         }
98         srx->skb_copied += copied;
99         srx->skb_new -= copied;
100
101         return copied;
102 }
103
104 static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
105 {
106         int rv;
107
108         siw_dbg_qp(rx_qp(srx), "kva: 0x%pK, len: %u\n", kva, len);
109
110         rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len);
111         if (unlikely(rv)) {
112                 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%pK, rv %d\n",
113                         qp_id(rx_qp(srx)), __func__, len, kva, rv);
114
115                 return rv;
116         }
117         if (srx->mpa_crc_hd)
118                 crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
119
120         srx->skb_offset += len;
121         srx->skb_copied += len;
122         srx->skb_new -= len;
123
124         return len;
125 }
126
127 static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
128                       struct siw_mem *mem, u64 addr, int len)
129 {
130         struct siw_pbl *pbl = mem->pbl;
131         u64 offset = addr - mem->va;
132         int copied = 0;
133
134         while (len) {
135                 int bytes;
136                 dma_addr_t buf_addr =
137                         siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx);
138                 if (!buf_addr)
139                         break;
140
141                 bytes = min(bytes, len);
142                 if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) ==
143                     bytes) {
144                         copied += bytes;
145                         offset += bytes;
146                         len -= bytes;
147                 } else {
148                         break;
149                 }
150         }
151         return copied;
152 }
153
154 /*
155  * siw_rresp_check_ntoh()
156  *
157  * Check incoming RRESP fragment header against expected
158  * header values and update expected values for potential next
159  * fragment.
160  *
161  * NOTE: This function must be called only if a RRESP DDP segment
162  *       starts but not for fragmented consecutive pieces of an
163  *       already started DDP segment.
164  */
165 static int siw_rresp_check_ntoh(struct siw_rx_stream *srx,
166                                 struct siw_rx_fpdu *frx)
167 {
168         struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp;
169         struct siw_wqe *wqe = &frx->wqe_active;
170         enum ddp_ecode ecode;
171
172         u32 sink_stag = be32_to_cpu(rresp->sink_stag);
173         u64 sink_to = be64_to_cpu(rresp->sink_to);
174
175         if (frx->first_ddp_seg) {
176                 srx->ddp_stag = wqe->sqe.sge[0].lkey;
177                 srx->ddp_to = wqe->sqe.sge[0].laddr;
178                 frx->pbl_idx = 0;
179         }
180         /* Below checks extend beyond the semantics of DDP, and
181          * into RDMAP:
182          * We check if the read response matches exactly the
183          * read request which was send to the remote peer to
184          * trigger this read response. RFC5040/5041 do not
185          * always have a proper error code for the detected
186          * error cases. We choose 'base or bounds error' for
187          * cases where the inbound STag is valid, but offset
188          * or length do not match our response receive state.
189          */
190         if (unlikely(srx->ddp_stag != sink_stag)) {
191                 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n",
192                         qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag);
193                 ecode = DDP_ECODE_T_INVALID_STAG;
194                 goto error;
195         }
196         if (unlikely(srx->ddp_to != sink_to)) {
197                 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n",
198                         qp_id(rx_qp(srx)), (unsigned long long)sink_to,
199                         (unsigned long long)srx->ddp_to);
200                 ecode = DDP_ECODE_T_BASE_BOUNDS;
201                 goto error;
202         }
203         if (unlikely(!frx->more_ddp_segs &&
204                      (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) {
205                 pr_warn("siw: [QP %u]: rresp len: %d != %d\n",
206                         qp_id(rx_qp(srx)),
207                         wqe->processed + srx->fpdu_part_rem, wqe->bytes);
208                 ecode = DDP_ECODE_T_BASE_BOUNDS;
209                 goto error;
210         }
211         return 0;
212 error:
213         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
214                            DDP_ETYPE_TAGGED_BUF, ecode, 0);
215         return -EINVAL;
216 }
217
218 /*
219  * siw_write_check_ntoh()
220  *
221  * Check incoming WRITE fragment header against expected
222  * header values and update expected values for potential next
223  * fragment
224  *
225  * NOTE: This function must be called only if a WRITE DDP segment
226  *       starts but not for fragmented consecutive pieces of an
227  *       already started DDP segment.
228  */
229 static int siw_write_check_ntoh(struct siw_rx_stream *srx,
230                                 struct siw_rx_fpdu *frx)
231 {
232         struct iwarp_rdma_write *write = &srx->hdr.rwrite;
233         enum ddp_ecode ecode;
234
235         u32 sink_stag = be32_to_cpu(write->sink_stag);
236         u64 sink_to = be64_to_cpu(write->sink_to);
237
238         if (frx->first_ddp_seg) {
239                 srx->ddp_stag = sink_stag;
240                 srx->ddp_to = sink_to;
241                 frx->pbl_idx = 0;
242         } else {
243                 if (unlikely(srx->ddp_stag != sink_stag)) {
244                         pr_warn("siw: [QP %u]: write stag: %08x != %08x\n",
245                                 qp_id(rx_qp(srx)), sink_stag,
246                                 srx->ddp_stag);
247                         ecode = DDP_ECODE_T_INVALID_STAG;
248                         goto error;
249                 }
250                 if (unlikely(srx->ddp_to != sink_to)) {
251                         pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n",
252                                 qp_id(rx_qp(srx)),
253                                 (unsigned long long)sink_to,
254                                 (unsigned long long)srx->ddp_to);
255                         ecode = DDP_ECODE_T_BASE_BOUNDS;
256                         goto error;
257                 }
258         }
259         return 0;
260 error:
261         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
262                            DDP_ETYPE_TAGGED_BUF, ecode, 0);
263         return -EINVAL;
264 }
265
266 /*
267  * siw_send_check_ntoh()
268  *
269  * Check incoming SEND fragment header against expected
270  * header values and update expected MSN if no next
271  * fragment expected
272  *
273  * NOTE: This function must be called only if a SEND DDP segment
274  *       starts but not for fragmented consecutive pieces of an
275  *       already started DDP segment.
276  */
277 static int siw_send_check_ntoh(struct siw_rx_stream *srx,
278                                struct siw_rx_fpdu *frx)
279 {
280         struct iwarp_send_inv *send = &srx->hdr.send_inv;
281         struct siw_wqe *wqe = &frx->wqe_active;
282         enum ddp_ecode ecode;
283
284         u32 ddp_msn = be32_to_cpu(send->ddp_msn);
285         u32 ddp_mo = be32_to_cpu(send->ddp_mo);
286         u32 ddp_qn = be32_to_cpu(send->ddp_qn);
287
288         if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) {
289                 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n",
290                         qp_id(rx_qp(srx)), ddp_qn);
291                 ecode = DDP_ECODE_UT_INVALID_QN;
292                 goto error;
293         }
294         if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) {
295                 pr_warn("siw: [QP %u]: send msn: %u != %u\n",
296                         qp_id(rx_qp(srx)), ddp_msn,
297                         srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]);
298                 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE;
299                 goto error;
300         }
301         if (unlikely(ddp_mo != wqe->processed)) {
302                 pr_warn("siw: [QP %u], send mo: %u != %u\n",
303                         qp_id(rx_qp(srx)), ddp_mo, wqe->processed);
304                 ecode = DDP_ECODE_UT_INVALID_MO;
305                 goto error;
306         }
307         if (frx->first_ddp_seg) {
308                 /* initialize user memory write position */
309                 frx->sge_idx = 0;
310                 frx->sge_off = 0;
311                 frx->pbl_idx = 0;
312
313                 /* only valid for SEND_INV and SEND_SE_INV operations */
314                 srx->inval_stag = be32_to_cpu(send->inval_stag);
315         }
316         if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) {
317                 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n",
318                            wqe->bytes, wqe->processed, srx->fpdu_part_rem);
319                 wqe->wc_status = SIW_WC_LOC_LEN_ERR;
320                 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF;
321                 goto error;
322         }
323         return 0;
324 error:
325         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
326                            DDP_ETYPE_UNTAGGED_BUF, ecode, 0);
327         return -EINVAL;
328 }
329
330 static struct siw_wqe *siw_rqe_get(struct siw_qp *qp)
331 {
332         struct siw_rqe *rqe;
333         struct siw_srq *srq;
334         struct siw_wqe *wqe = NULL;
335         bool srq_event = false;
336         unsigned long flags;
337
338         srq = qp->srq;
339         if (srq) {
340                 spin_lock_irqsave(&srq->lock, flags);
341                 if (unlikely(!srq->num_rqe))
342                         goto out;
343
344                 rqe = &srq->recvq[srq->rq_get % srq->num_rqe];
345         } else {
346                 if (unlikely(!qp->recvq))
347                         goto out;
348
349                 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size];
350         }
351         if (likely(rqe->flags == SIW_WQE_VALID)) {
352                 int num_sge = rqe->num_sge;
353
354                 if (likely(num_sge <= SIW_MAX_SGE)) {
355                         int i = 0;
356
357                         wqe = rx_wqe(&qp->rx_untagged);
358                         rx_type(wqe) = SIW_OP_RECEIVE;
359                         wqe->wr_status = SIW_WR_INPROGRESS;
360                         wqe->bytes = 0;
361                         wqe->processed = 0;
362
363                         wqe->rqe.id = rqe->id;
364                         wqe->rqe.num_sge = num_sge;
365
366                         while (i < num_sge) {
367                                 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr;
368                                 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey;
369                                 wqe->rqe.sge[i].length = rqe->sge[i].length;
370                                 wqe->bytes += wqe->rqe.sge[i].length;
371                                 wqe->mem[i] = NULL;
372                                 i++;
373                         }
374                         /* can be re-used by appl */
375                         smp_store_mb(rqe->flags, 0);
376                 } else {
377                         siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge);
378                         if (srq)
379                                 spin_unlock_irqrestore(&srq->lock, flags);
380                         return NULL;
381                 }
382                 if (!srq) {
383                         qp->rq_get++;
384                 } else {
385                         if (srq->armed) {
386                                 /* Test SRQ limit */
387                                 u32 off = (srq->rq_get + srq->limit) %
388                                           srq->num_rqe;
389                                 struct siw_rqe *rqe2 = &srq->recvq[off];
390
391                                 if (!(rqe2->flags & SIW_WQE_VALID)) {
392                                         srq->armed = false;
393                                         srq_event = true;
394                                 }
395                         }
396                         srq->rq_get++;
397                 }
398         }
399 out:
400         if (srq) {
401                 spin_unlock_irqrestore(&srq->lock, flags);
402                 if (srq_event)
403                         siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED);
404         }
405         return wqe;
406 }
407
408 /*
409  * siw_proc_send:
410  *
411  * Process one incoming SEND and place data into memory referenced by
412  * receive wqe.
413  *
414  * Function supports partially received sends (suspending/resuming
415  * current receive wqe processing)
416  *
417  * return value:
418  *      0:       reached the end of a DDP segment
419  *      -EAGAIN: to be called again to finish the DDP segment
420  */
421 int siw_proc_send(struct siw_qp *qp)
422 {
423         struct siw_rx_stream *srx = &qp->rx_stream;
424         struct siw_rx_fpdu *frx = &qp->rx_untagged;
425         struct siw_wqe *wqe;
426         u32 data_bytes; /* all data bytes available */
427         u32 rcvd_bytes; /* sum of data bytes rcvd */
428         int rv = 0;
429
430         if (frx->first_ddp_seg) {
431                 wqe = siw_rqe_get(qp);
432                 if (unlikely(!wqe)) {
433                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
434                                            DDP_ETYPE_UNTAGGED_BUF,
435                                            DDP_ECODE_UT_INVALID_MSN_NOBUF, 0);
436                         return -ENOENT;
437                 }
438         } else {
439                 wqe = rx_wqe(frx);
440         }
441         if (srx->state == SIW_GET_DATA_START) {
442                 rv = siw_send_check_ntoh(srx, frx);
443                 if (unlikely(rv)) {
444                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
445                         return rv;
446                 }
447                 if (!srx->fpdu_part_rem) /* zero length SEND */
448                         return 0;
449         }
450         data_bytes = min(srx->fpdu_part_rem, srx->skb_new);
451         rcvd_bytes = 0;
452
453         /* A zero length SEND will skip below loop */
454         while (data_bytes) {
455                 struct ib_pd *pd;
456                 struct siw_mem **mem, *mem_p;
457                 struct siw_sge *sge;
458                 u32 sge_bytes; /* data bytes avail for SGE */
459
460                 sge = &wqe->rqe.sge[frx->sge_idx];
461
462                 if (!sge->length) {
463                         /* just skip empty sge's */
464                         frx->sge_idx++;
465                         frx->sge_off = 0;
466                         frx->pbl_idx = 0;
467                         continue;
468                 }
469                 sge_bytes = min(data_bytes, sge->length - frx->sge_off);
470                 mem = &wqe->mem[frx->sge_idx];
471
472                 /*
473                  * check with QP's PD if no SRQ present, SRQ's PD otherwise
474                  */
475                 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd;
476
477                 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE,
478                                    frx->sge_off, sge_bytes);
479                 if (unlikely(rv)) {
480                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
481                                            DDP_ETYPE_CATASTROPHIC,
482                                            DDP_ECODE_CATASTROPHIC, 0);
483
484                         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
485                         break;
486                 }
487                 mem_p = *mem;
488                 if (mem_p->mem_obj == NULL)
489                         rv = siw_rx_kva(srx,
490                                 (void *)(uintptr_t)(sge->laddr + frx->sge_off),
491                                 sge_bytes);
492                 else if (!mem_p->is_pbl)
493                         rv = siw_rx_umem(srx, mem_p->umem,
494                                          sge->laddr + frx->sge_off, sge_bytes);
495                 else
496                         rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
497                                         sge->laddr + frx->sge_off, sge_bytes);
498
499                 if (unlikely(rv != sge_bytes)) {
500                         wqe->processed += rcvd_bytes;
501
502                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
503                                            DDP_ETYPE_CATASTROPHIC,
504                                            DDP_ECODE_CATASTROPHIC, 0);
505                         return -EINVAL;
506                 }
507                 frx->sge_off += rv;
508
509                 if (frx->sge_off == sge->length) {
510                         frx->sge_idx++;
511                         frx->sge_off = 0;
512                         frx->pbl_idx = 0;
513                 }
514                 data_bytes -= rv;
515                 rcvd_bytes += rv;
516
517                 srx->fpdu_part_rem -= rv;
518                 srx->fpdu_part_rcvd += rv;
519         }
520         wqe->processed += rcvd_bytes;
521
522         if (!srx->fpdu_part_rem)
523                 return 0;
524
525         return (rv < 0) ? rv : -EAGAIN;
526 }
527
528 /*
529  * siw_proc_write:
530  *
531  * Place incoming WRITE after referencing and checking target buffer
532
533  * Function supports partially received WRITEs (suspending/resuming
534  * current receive processing)
535  *
536  * return value:
537  *      0:       reached the end of a DDP segment
538  *      -EAGAIN: to be called again to finish the DDP segment
539  */
540 int siw_proc_write(struct siw_qp *qp)
541 {
542         struct siw_rx_stream *srx = &qp->rx_stream;
543         struct siw_rx_fpdu *frx = &qp->rx_tagged;
544         struct siw_mem *mem;
545         int bytes, rv;
546
547         if (srx->state == SIW_GET_DATA_START) {
548                 if (!srx->fpdu_part_rem) /* zero length WRITE */
549                         return 0;
550
551                 rv = siw_write_check_ntoh(srx, frx);
552                 if (unlikely(rv)) {
553                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
554                         return rv;
555                 }
556         }
557         bytes = min(srx->fpdu_part_rem, srx->skb_new);
558
559         if (frx->first_ddp_seg) {
560                 struct siw_wqe *wqe = rx_wqe(frx);
561
562                 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8);
563                 if (unlikely(!rx_mem(frx))) {
564                         siw_dbg_qp(qp,
565                                    "sink stag not found/invalid, stag 0x%08x\n",
566                                    srx->ddp_stag);
567
568                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
569                                            DDP_ETYPE_TAGGED_BUF,
570                                            DDP_ECODE_T_INVALID_STAG, 0);
571                         return -EINVAL;
572                 }
573                 wqe->rqe.num_sge = 1;
574                 rx_type(wqe) = SIW_OP_WRITE;
575                 wqe->wr_status = SIW_WR_INPROGRESS;
576         }
577         mem = rx_mem(frx);
578
579         /*
580          * Check if application re-registered memory with different
581          * key field of STag.
582          */
583         if (unlikely(mem->stag != srx->ddp_stag)) {
584                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
585                                    DDP_ETYPE_TAGGED_BUF,
586                                    DDP_ECODE_T_INVALID_STAG, 0);
587                 return -EINVAL;
588         }
589         rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd,
590                            IB_ACCESS_REMOTE_WRITE, bytes);
591         if (unlikely(rv)) {
592                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
593                                    DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv),
594                                    0);
595
596                 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
597
598                 return -EINVAL;
599         }
600
601         if (mem->mem_obj == NULL)
602                 rv = siw_rx_kva(srx,
603                         (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd),
604                         bytes);
605         else if (!mem->is_pbl)
606                 rv = siw_rx_umem(srx, mem->umem,
607                                  srx->ddp_to + srx->fpdu_part_rcvd, bytes);
608         else
609                 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
610                                 srx->ddp_to + srx->fpdu_part_rcvd, bytes);
611
612         if (unlikely(rv != bytes)) {
613                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
614                                    DDP_ETYPE_CATASTROPHIC,
615                                    DDP_ECODE_CATASTROPHIC, 0);
616                 return -EINVAL;
617         }
618         srx->fpdu_part_rem -= rv;
619         srx->fpdu_part_rcvd += rv;
620
621         if (!srx->fpdu_part_rem) {
622                 srx->ddp_to += srx->fpdu_part_rcvd;
623                 return 0;
624         }
625         return -EAGAIN;
626 }
627
628 /*
629  * Inbound RREQ's cannot carry user data.
630  */
631 int siw_proc_rreq(struct siw_qp *qp)
632 {
633         struct siw_rx_stream *srx = &qp->rx_stream;
634
635         if (!srx->fpdu_part_rem)
636                 return 0;
637
638         pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp),
639                 be16_to_cpu(srx->hdr.ctrl.mpa_len));
640
641         return -EPROTO;
642 }
643
644 /*
645  * siw_init_rresp:
646  *
647  * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE.
648  * Put it at the tail of the IRQ, if there is another WQE currently in
649  * transmit processing. If not, make it the current WQE to be processed
650  * and schedule transmit processing.
651  *
652  * Can be called from softirq context and from process
653  * context (RREAD socket loopback case!)
654  *
655  * return value:
656  *      0:      success,
657  *              failure code otherwise
658  */
659
660 static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx)
661 {
662         struct siw_wqe *tx_work = tx_wqe(qp);
663         struct siw_sqe *resp;
664
665         uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to),
666                  laddr = be64_to_cpu(srx->hdr.rreq.source_to);
667         uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size),
668                  lkey = be32_to_cpu(srx->hdr.rreq.source_stag),
669                  rkey = be32_to_cpu(srx->hdr.rreq.sink_stag),
670                  msn = be32_to_cpu(srx->hdr.rreq.ddp_msn);
671
672         int run_sq = 1, rv = 0;
673         unsigned long flags;
674
675         if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) {
676                 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
677                                    DDP_ETYPE_UNTAGGED_BUF,
678                                    DDP_ECODE_UT_INVALID_MSN_RANGE, 0);
679                 return -EPROTO;
680         }
681         spin_lock_irqsave(&qp->sq_lock, flags);
682
683         if (unlikely(!qp->attrs.irq_size)) {
684                 run_sq = 0;
685                 goto error_irq;
686         }
687         if (tx_work->wr_status == SIW_WR_IDLE) {
688                 /*
689                  * immediately schedule READ response w/o
690                  * consuming IRQ entry: IRQ must be empty.
691                  */
692                 tx_work->processed = 0;
693                 tx_work->mem[0] = NULL;
694                 tx_work->wr_status = SIW_WR_QUEUED;
695                 resp = &tx_work->sqe;
696         } else {
697                 resp = irq_alloc_free(qp);
698                 run_sq = 0;
699         }
700         if (likely(resp)) {
701                 resp->opcode = SIW_OP_READ_RESPONSE;
702
703                 resp->sge[0].length = length;
704                 resp->sge[0].laddr = laddr;
705                 resp->sge[0].lkey = lkey;
706
707                 /* Keep aside message sequence number for potential
708                  * error reporting during Read Response generation.
709                  */
710                 resp->sge[1].length = msn;
711
712                 resp->raddr = raddr;
713                 resp->rkey = rkey;
714                 resp->num_sge = length ? 1 : 0;
715
716                 /* RRESP now valid as current TX wqe or placed into IRQ */
717                 smp_store_mb(resp->flags, SIW_WQE_VALID);
718         } else {
719 error_irq:
720                 pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n",
721                         qp_id(qp), qp->attrs.irq_size);
722
723                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
724                                    RDMAP_ETYPE_REMOTE_OPERATION,
725                                    RDMAP_ECODE_CATASTROPHIC_STREAM, 0);
726                 rv = -EPROTO;
727         }
728
729         spin_unlock_irqrestore(&qp->sq_lock, flags);
730
731         if (run_sq)
732                 rv = siw_sq_start(qp);
733
734         return rv;
735 }
736
737 /*
738  * Only called at start of Read.Resonse processing.
739  * Transfer pending Read from tip of ORQ into currrent rx wqe,
740  * but keep ORQ entry valid until Read.Response processing done.
741  * No Queue locking needed.
742  */
743 static int siw_orqe_start_rx(struct siw_qp *qp)
744 {
745         struct siw_sqe *orqe;
746         struct siw_wqe *wqe = NULL;
747
748         if (unlikely(!qp->attrs.orq_size))
749                 return -EPROTO;
750
751         /* make sure ORQ indices are current */
752         smp_mb();
753
754         orqe = orq_get_current(qp);
755         if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) {
756                 /* RRESP is a TAGGED RDMAP operation */
757                 wqe = rx_wqe(&qp->rx_tagged);
758                 wqe->sqe.id = orqe->id;
759                 wqe->sqe.opcode = orqe->opcode;
760                 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr;
761                 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey;
762                 wqe->sqe.sge[0].length = orqe->sge[0].length;
763                 wqe->sqe.flags = orqe->flags;
764                 wqe->sqe.num_sge = 1;
765                 wqe->bytes = orqe->sge[0].length;
766                 wqe->processed = 0;
767                 wqe->mem[0] = NULL;
768                 /* make sure WQE is completely written before valid */
769                 smp_wmb();
770                 wqe->wr_status = SIW_WR_INPROGRESS;
771
772                 return 0;
773         }
774         return -EPROTO;
775 }
776
777 /*
778  * siw_proc_rresp:
779  *
780  * Place incoming RRESP data into memory referenced by RREQ WQE
781  * which is at the tip of the ORQ
782  *
783  * Function supports partially received RRESP's (suspending/resuming
784  * current receive processing)
785  */
786 int siw_proc_rresp(struct siw_qp *qp)
787 {
788         struct siw_rx_stream *srx = &qp->rx_stream;
789         struct siw_rx_fpdu *frx = &qp->rx_tagged;
790         struct siw_wqe *wqe = rx_wqe(frx);
791         struct siw_mem **mem, *mem_p;
792         struct siw_sge *sge;
793         int bytes, rv;
794
795         if (frx->first_ddp_seg) {
796                 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
797                         pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n",
798                                 qp_id(qp), wqe->wr_status, wqe->sqe.opcode);
799                         rv = -EPROTO;
800                         goto error_term;
801                 }
802                 /*
803                  * fetch pending RREQ from orq
804                  */
805                 rv = siw_orqe_start_rx(qp);
806                 if (rv) {
807                         pr_warn("siw: [QP %u]: ORQ empty, size %d\n",
808                                 qp_id(qp), qp->attrs.orq_size);
809                         goto error_term;
810                 }
811                 rv = siw_rresp_check_ntoh(srx, frx);
812                 if (unlikely(rv)) {
813                         siw_qp_event(qp, IB_EVENT_QP_FATAL);
814                         return rv;
815                 }
816         } else {
817                 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) {
818                         pr_warn("siw: [QP %u]: resume RRESP: status %d\n",
819                                 qp_id(qp), wqe->wr_status);
820                         rv = -EPROTO;
821                         goto error_term;
822                 }
823         }
824         if (!srx->fpdu_part_rem) /* zero length RRESPONSE */
825                 return 0;
826
827         sge = wqe->sqe.sge; /* there is only one */
828         mem = &wqe->mem[0];
829
830         if (!(*mem)) {
831                 /*
832                  * check target memory which resolves memory on first fragment
833                  */
834                 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0,
835                                    wqe->bytes);
836                 if (unlikely(rv)) {
837                         siw_dbg_qp(qp, "target mem check: %d\n", rv);
838                         wqe->wc_status = SIW_WC_LOC_PROT_ERR;
839
840                         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
841                                            DDP_ETYPE_TAGGED_BUF,
842                                            siw_tagged_error(-rv), 0);
843
844                         siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR);
845
846                         return -EINVAL;
847                 }
848         }
849         mem_p = *mem;
850
851         bytes = min(srx->fpdu_part_rem, srx->skb_new);
852
853         if (mem_p->mem_obj == NULL)
854                 rv = siw_rx_kva(srx,
855                         (void *)(uintptr_t)(sge->laddr + wqe->processed),
856                         bytes);
857         else if (!mem_p->is_pbl)
858                 rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
859                                  bytes);
860         else
861                 rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
862                                 sge->laddr + wqe->processed, bytes);
863         if (rv != bytes) {
864                 wqe->wc_status = SIW_WC_GENERAL_ERR;
865                 rv = -EINVAL;
866                 goto error_term;
867         }
868         srx->fpdu_part_rem -= rv;
869         srx->fpdu_part_rcvd += rv;
870         wqe->processed += rv;
871
872         if (!srx->fpdu_part_rem) {
873                 srx->ddp_to += srx->fpdu_part_rcvd;
874                 return 0;
875         }
876         return -EAGAIN;
877
878 error_term:
879         siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC,
880                            DDP_ECODE_CATASTROPHIC, 0);
881         return rv;
882 }
883
884 int siw_proc_terminate(struct siw_qp *qp)
885 {
886         struct siw_rx_stream *srx = &qp->rx_stream;
887         struct sk_buff *skb = srx->skb;
888         struct iwarp_terminate *term = &srx->hdr.terminate;
889         union iwarp_hdr term_info;
890         u8 *infop = (u8 *)&term_info;
891         enum rdma_opcode op;
892         u16 to_copy = sizeof(struct iwarp_ctrl);
893
894         pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n",
895                 __rdmap_term_layer(term), __rdmap_term_etype(term),
896                 __rdmap_term_ecode(term));
897
898         if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE ||
899             be32_to_cpu(term->ddp_msn) !=
900                     qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] ||
901             be32_to_cpu(term->ddp_mo) != 0) {
902                 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n",
903                         be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn),
904                         be32_to_cpu(term->ddp_mo));
905                 return -ECONNRESET;
906         }
907         /*
908          * Receive remaining pieces of TERM if indicated
909          */
910         if (!term->flag_m)
911                 return -ECONNRESET;
912
913         /* Do not take the effort to reassemble a network fragmented
914          * TERM message
915          */
916         if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged))
917                 return -ECONNRESET;
918
919         memset(infop, 0, sizeof(term_info));
920
921         skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
922
923         op = __rdmap_get_opcode(&term_info.ctrl);
924         if (op >= RDMAP_TERMINATE)
925                 goto out;
926
927         infop += to_copy;
928         srx->skb_offset += to_copy;
929         srx->skb_new -= to_copy;
930         srx->skb_copied += to_copy;
931         srx->fpdu_part_rcvd += to_copy;
932         srx->fpdu_part_rem -= to_copy;
933
934         to_copy = iwarp_pktinfo[op].hdr_len - to_copy;
935
936         /* Again, no network fragmented TERM's */
937         if (to_copy + MPA_CRC_SIZE > srx->skb_new)
938                 return -ECONNRESET;
939
940         skb_copy_bits(skb, srx->skb_offset, infop, to_copy);
941
942         if (term->flag_r) {
943                 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n",
944                            op, be16_to_cpu(term_info.ctrl.mpa_len),
945                            term->flag_m ? "valid" : "invalid");
946         } else if (term->flag_d) {
947                 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n",
948                            op, be16_to_cpu(term_info.ctrl.mpa_len),
949                            term->flag_m ? "valid" : "invalid");
950         }
951 out:
952         srx->skb_new -= to_copy;
953         srx->skb_offset += to_copy;
954         srx->skb_copied += to_copy;
955         srx->fpdu_part_rcvd += to_copy;
956         srx->fpdu_part_rem -= to_copy;
957
958         return -ECONNRESET;
959 }
960
961 static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
962 {
963         struct sk_buff *skb = srx->skb;
964         int avail = min(srx->skb_new, srx->fpdu_part_rem);
965         u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad;
966         __wsum crc_in, crc_own = 0;
967
968         siw_dbg_qp(qp, "expected %d, available %d, pad %u\n",
969                    srx->fpdu_part_rem, srx->skb_new, srx->pad);
970
971         skb_copy_bits(skb, srx->skb_offset, tbuf, avail);
972
973         srx->skb_new -= avail;
974         srx->skb_offset += avail;
975         srx->skb_copied += avail;
976         srx->fpdu_part_rem -= avail;
977
978         if (srx->fpdu_part_rem)
979                 return -EAGAIN;
980
981         if (!srx->mpa_crc_hd)
982                 return 0;
983
984         if (srx->pad)
985                 crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
986         /*
987          * CRC32 is computed, transmitted and received directly in NBO,
988          * so there's never a reason to convert byte order.
989          */
990         crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
991         crc_in = (__force __wsum)srx->trailer.crc;
992
993         if (unlikely(crc_in != crc_own)) {
994                 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n",
995                         crc_in, crc_own, qp->rx_stream.rdmap_op);
996
997                 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP,
998                                    LLP_ETYPE_MPA,
999                                    LLP_ECODE_RECEIVED_CRC, 0);
1000                 return -EINVAL;
1001         }
1002         return 0;
1003 }
1004
1005 #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged)
1006
1007 static int siw_get_hdr(struct siw_rx_stream *srx)
1008 {
1009         struct sk_buff *skb = srx->skb;
1010         struct siw_qp *qp = rx_qp(srx);
1011         struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl;
1012         struct siw_rx_fpdu *frx;
1013         u8 opcode;
1014         int bytes;
1015
1016         if (srx->fpdu_part_rcvd < MIN_DDP_HDR) {
1017                 /*
1018                  * copy a mimimum sized (tagged) DDP frame control part
1019                  */
1020                 bytes = min_t(int, srx->skb_new,
1021                               MIN_DDP_HDR - srx->fpdu_part_rcvd);
1022
1023                 skb_copy_bits(skb, srx->skb_offset,
1024                               (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1025
1026                 srx->fpdu_part_rcvd += bytes;
1027
1028                 srx->skb_new -= bytes;
1029                 srx->skb_offset += bytes;
1030                 srx->skb_copied += bytes;
1031
1032                 if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
1033                         return -EAGAIN;
1034
1035                 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) {
1036                         enum ddp_etype etype;
1037                         enum ddp_ecode ecode;
1038
1039                         pr_warn("siw: received ddp version unsupported %d\n",
1040                                 __ddp_get_version(c_hdr));
1041
1042                         if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) {
1043                                 etype = DDP_ETYPE_TAGGED_BUF;
1044                                 ecode = DDP_ECODE_T_VERSION;
1045                         } else {
1046                                 etype = DDP_ETYPE_UNTAGGED_BUF;
1047                                 ecode = DDP_ECODE_UT_VERSION;
1048                         }
1049                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP,
1050                                            etype, ecode, 0);
1051                         return -EINVAL;
1052                 }
1053                 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) {
1054                         pr_warn("siw: received rdmap version unsupported %d\n",
1055                                 __rdmap_get_version(c_hdr));
1056
1057                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1058                                            RDMAP_ETYPE_REMOTE_OPERATION,
1059                                            RDMAP_ECODE_VERSION, 0);
1060                         return -EINVAL;
1061                 }
1062                 opcode = __rdmap_get_opcode(c_hdr);
1063
1064                 if (opcode > RDMAP_TERMINATE) {
1065                         pr_warn("siw: received unknown packet type %u\n",
1066                                 opcode);
1067
1068                         siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP,
1069                                            RDMAP_ETYPE_REMOTE_OPERATION,
1070                                            RDMAP_ECODE_OPCODE, 0);
1071                         return -EINVAL;
1072                 }
1073                 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode);
1074         } else {
1075                 opcode = __rdmap_get_opcode(c_hdr);
1076         }
1077         set_rx_fpdu_context(qp, opcode);
1078         frx = qp->rx_fpdu;
1079
1080         /*
1081          * Figure out len of current hdr: variable length of
1082          * iwarp hdr may force us to copy hdr information in
1083          * two steps. Only tagged DDP messages are already
1084          * completely received.
1085          */
1086         if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) {
1087                 int hdrlen = iwarp_pktinfo[opcode].hdr_len;
1088
1089                 bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new);
1090
1091                 skb_copy_bits(skb, srx->skb_offset,
1092                               (char *)c_hdr + srx->fpdu_part_rcvd, bytes);
1093
1094                 srx->fpdu_part_rcvd += bytes;
1095
1096                 srx->skb_new -= bytes;
1097                 srx->skb_offset += bytes;
1098                 srx->skb_copied += bytes;
1099
1100                 if (srx->fpdu_part_rcvd < hdrlen)
1101                         return -EAGAIN;
1102         }
1103
1104         /*
1105          * DDP/RDMAP header receive completed. Check if the current
1106          * DDP segment starts a new RDMAP message or continues a previously
1107          * started RDMAP message.
1108          *
1109          * Alternating reception of DDP segments (or FPDUs) from incomplete
1110          * tagged and untagged RDMAP messages is supported, as long as
1111          * the current tagged or untagged message gets eventually completed
1112          * w/o intersection from another message of the same type
1113          * (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
1114          * but not by a READ RESPONSE etc.
1115          */
1116         if (srx->mpa_crc_hd) {
1117                 /*
1118                  * Restart CRC computation
1119                  */
1120                 crypto_shash_init(srx->mpa_crc_hd);
1121                 crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
1122                                     srx->fpdu_part_rcvd);
1123         }
1124         if (frx->more_ddp_segs) {
1125                 frx->first_ddp_seg = 0;
1126                 if (frx->prev_rdmap_op != opcode) {
1127                         pr_warn("siw: packet intersection: %u : %u\n",
1128                                 frx->prev_rdmap_op, opcode);
1129                         /*
1130                          * The last inbound RDMA operation of same type
1131                          * (tagged or untagged) is left unfinished.
1132                          * To complete it in error, make it the current
1133                          * operation again, even with the header already
1134                          * overwritten. For error handling, only the opcode
1135                          * and current rx context are relevant.
1136                          */
1137                         set_rx_fpdu_context(qp, frx->prev_rdmap_op);
1138                         __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op);
1139                         return -EPROTO;
1140                 }
1141         } else {
1142                 frx->prev_rdmap_op = opcode;
1143                 frx->first_ddp_seg = 1;
1144         }
1145         frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1;
1146
1147         return 0;
1148 }
1149
1150 static int siw_check_tx_fence(struct siw_qp *qp)
1151 {
1152         struct siw_wqe *tx_waiting = tx_wqe(qp);
1153         struct siw_sqe *rreq;
1154         int resume_tx = 0, rv = 0;
1155         unsigned long flags;
1156
1157         spin_lock_irqsave(&qp->orq_lock, flags);
1158
1159         /* free current orq entry */
1160         rreq = orq_get_current(qp);
1161         WRITE_ONCE(rreq->flags, 0);
1162
1163         qp->orq_get++;
1164
1165         if (qp->tx_ctx.orq_fence) {
1166                 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
1167                         pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
1168                                 qp_id(qp), tx_waiting->wr_status);
1169                         rv = -EPROTO;
1170                         goto out;
1171                 }
1172                 /* resume SQ processing, if possible */
1173                 if (tx_waiting->sqe.opcode == SIW_OP_READ ||
1174                     tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1175
1176                         /* SQ processing was stopped because of a full ORQ */
1177                         rreq = orq_get_free(qp);
1178                         if (unlikely(!rreq)) {
1179                                 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
1180                                 rv = -EPROTO;
1181                                 goto out;
1182                         }
1183                         siw_read_to_orq(rreq, &tx_waiting->sqe);
1184
1185                         qp->orq_put++;
1186                         qp->tx_ctx.orq_fence = 0;
1187                         resume_tx = 1;
1188
1189                 } else if (siw_orq_empty(qp)) {
1190                         /*
1191                          * SQ processing was stopped by fenced work request.
1192                          * Resume since all previous Read's are now completed.
1193                          */
1194                         qp->tx_ctx.orq_fence = 0;
1195                         resume_tx = 1;
1196                 }
1197         }
1198 out:
1199         spin_unlock_irqrestore(&qp->orq_lock, flags);
1200
1201         if (resume_tx)
1202                 rv = siw_sq_start(qp);
1203
1204         return rv;
1205 }
1206
1207 /*
1208  * siw_rdmap_complete()
1209  *
1210  * Complete processing of an RDMA message after receiving all
1211  * DDP segmens or ABort processing after encountering error case.
1212  *
1213  *   o SENDs + RRESPs will need for completion,
1214  *   o RREQs need for  READ RESPONSE initialization
1215  *   o WRITEs need memory dereferencing
1216  *
1217  * TODO: Failed WRITEs need local error to be surfaced.
1218  */
1219 static int siw_rdmap_complete(struct siw_qp *qp, int error)
1220 {
1221         struct siw_rx_stream *srx = &qp->rx_stream;
1222         struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu);
1223         enum siw_wc_status wc_status = wqe->wc_status;
1224         u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl);
1225         int rv = 0;
1226
1227         switch (opcode) {
1228         case RDMAP_SEND_SE:
1229         case RDMAP_SEND_SE_INVAL:
1230                 wqe->rqe.flags |= SIW_WQE_SOLICITED;
1231                 fallthrough;
1232
1233         case RDMAP_SEND:
1234         case RDMAP_SEND_INVAL:
1235                 if (wqe->wr_status == SIW_WR_IDLE)
1236                         break;
1237
1238                 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++;
1239
1240                 if (error != 0 && wc_status == SIW_WC_SUCCESS)
1241                         wc_status = SIW_WC_GENERAL_ERR;
1242                 /*
1243                  * Handle STag invalidation request
1244                  */
1245                 if (wc_status == SIW_WC_SUCCESS &&
1246                     (opcode == RDMAP_SEND_INVAL ||
1247                      opcode == RDMAP_SEND_SE_INVAL)) {
1248                         rv = siw_invalidate_stag(qp->pd, srx->inval_stag);
1249                         if (rv) {
1250                                 siw_init_terminate(
1251                                         qp, TERM_ERROR_LAYER_RDMAP,
1252                                         rv == -EACCES ?
1253                                                 RDMAP_ETYPE_REMOTE_PROTECTION :
1254                                                 RDMAP_ETYPE_REMOTE_OPERATION,
1255                                         RDMAP_ECODE_CANNOT_INVALIDATE, 0);
1256
1257                                 wc_status = SIW_WC_REM_INV_REQ_ERR;
1258                         }
1259                         rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1260                                               rv ? 0 : srx->inval_stag,
1261                                               wc_status);
1262                 } else {
1263                         rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed,
1264                                               0, wc_status);
1265                 }
1266                 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE);
1267                 break;
1268
1269         case RDMAP_RDMA_READ_RESP:
1270                 if (wqe->wr_status == SIW_WR_IDLE)
1271                         break;
1272
1273                 if (error != 0) {
1274                         if ((srx->state == SIW_GET_HDR &&
1275                              qp->rx_fpdu->first_ddp_seg) || error == -ENODATA)
1276                                 /* possible RREQ in ORQ left untouched */
1277                                 break;
1278
1279                         if (wc_status == SIW_WC_SUCCESS)
1280                                 wc_status = SIW_WC_GENERAL_ERR;
1281                 } else if (rdma_is_kernel_res(&qp->base_qp.res) &&
1282                            rx_type(wqe) == SIW_OP_READ_LOCAL_INV) {
1283                         /*
1284                          * Handle any STag invalidation request
1285                          */
1286                         rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey);
1287                         if (rv) {
1288                                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
1289                                                    RDMAP_ETYPE_CATASTROPHIC,
1290                                                    RDMAP_ECODE_UNSPECIFIED, 0);
1291
1292                                 if (wc_status == SIW_WC_SUCCESS) {
1293                                         wc_status = SIW_WC_GENERAL_ERR;
1294                                         error = rv;
1295                                 }
1296                         }
1297                 }
1298                 /*
1299                  * All errors turn the wqe into signalled.
1300                  */
1301                 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0)
1302                         rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed,
1303                                               wc_status);
1304                 siw_wqe_put_mem(wqe, SIW_OP_READ);
1305
1306                 if (!error) {
1307                         rv = siw_check_tx_fence(qp);
1308                 } else {
1309                         /* Disable current ORQ element */
1310                         if (qp->attrs.orq_size)
1311                                 WRITE_ONCE(orq_get_current(qp)->flags, 0);
1312                 }
1313                 break;
1314
1315         case RDMAP_RDMA_READ_REQ:
1316                 if (!error) {
1317                         rv = siw_init_rresp(qp, srx);
1318                         srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++;
1319                 }
1320                 break;
1321
1322         case RDMAP_RDMA_WRITE:
1323                 if (wqe->wr_status == SIW_WR_IDLE)
1324                         break;
1325
1326                 /*
1327                  * Free References from memory object if
1328                  * attached to receive context (inbound WRITE).
1329                  * While a zero-length WRITE is allowed,
1330                  * no memory reference got created.
1331                  */
1332                 if (rx_mem(&qp->rx_tagged)) {
1333                         siw_mem_put(rx_mem(&qp->rx_tagged));
1334                         rx_mem(&qp->rx_tagged) = NULL;
1335                 }
1336                 break;
1337
1338         default:
1339                 break;
1340         }
1341         wqe->wr_status = SIW_WR_IDLE;
1342
1343         return rv;
1344 }
1345
1346 /*
1347  * siw_tcp_rx_data()
1348  *
1349  * Main routine to consume inbound TCP payload
1350  *
1351  * @rd_desc:    read descriptor
1352  * @skb:        socket buffer
1353  * @off:        offset in skb
1354  * @len:        skb->len - offset : payload in skb
1355  */
1356 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb,
1357                     unsigned int off, size_t len)
1358 {
1359         struct siw_qp *qp = rd_desc->arg.data;
1360         struct siw_rx_stream *srx = &qp->rx_stream;
1361         int rv;
1362
1363         srx->skb = skb;
1364         srx->skb_new = skb->len - off;
1365         srx->skb_offset = off;
1366         srx->skb_copied = 0;
1367
1368         siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new);
1369
1370         while (srx->skb_new) {
1371                 int run_completion = 1;
1372
1373                 if (unlikely(srx->rx_suspend)) {
1374                         /* Do not process any more data */
1375                         srx->skb_copied += srx->skb_new;
1376                         break;
1377                 }
1378                 switch (srx->state) {
1379                 case SIW_GET_HDR:
1380                         rv = siw_get_hdr(srx);
1381                         if (!rv) {
1382                                 srx->fpdu_part_rem =
1383                                         be16_to_cpu(srx->hdr.ctrl.mpa_len) -
1384                                         srx->fpdu_part_rcvd + MPA_HDR_SIZE;
1385
1386                                 if (srx->fpdu_part_rem)
1387                                         srx->pad = -srx->fpdu_part_rem & 0x3;
1388                                 else
1389                                         srx->pad = 0;
1390
1391                                 srx->state = SIW_GET_DATA_START;
1392                                 srx->fpdu_part_rcvd = 0;
1393                         }
1394                         break;
1395
1396                 case SIW_GET_DATA_MORE:
1397                         /*
1398                          * Another data fragment of the same DDP segment.
1399                          * Setting first_ddp_seg = 0 avoids repeating
1400                          * initializations that shall occur only once per
1401                          * DDP segment.
1402                          */
1403                         qp->rx_fpdu->first_ddp_seg = 0;
1404                         fallthrough;
1405
1406                 case SIW_GET_DATA_START:
1407                         /*
1408                          * Headers will be checked by the opcode-specific
1409                          * data receive function below.
1410                          */
1411                         rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp);
1412                         if (!rv) {
1413                                 int mpa_len =
1414                                         be16_to_cpu(srx->hdr.ctrl.mpa_len)
1415                                         + MPA_HDR_SIZE;
1416
1417                                 srx->fpdu_part_rem = (-mpa_len & 0x3)
1418                                                       + MPA_CRC_SIZE;
1419                                 srx->fpdu_part_rcvd = 0;
1420                                 srx->state = SIW_GET_TRAILER;
1421                         } else {
1422                                 if (unlikely(rv == -ECONNRESET))
1423                                         run_completion = 0;
1424                                 else
1425                                         srx->state = SIW_GET_DATA_MORE;
1426                         }
1427                         break;
1428
1429                 case SIW_GET_TRAILER:
1430                         /*
1431                          * read CRC + any padding
1432                          */
1433                         rv = siw_get_trailer(qp, srx);
1434                         if (likely(!rv)) {
1435                                 /*
1436                                  * FPDU completed.
1437                                  * complete RDMAP message if last fragment
1438                                  */
1439                                 srx->state = SIW_GET_HDR;
1440                                 srx->fpdu_part_rcvd = 0;
1441
1442                                 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl &
1443                                       DDP_FLAG_LAST))
1444                                         /* more frags */
1445                                         break;
1446
1447                                 rv = siw_rdmap_complete(qp, 0);
1448                                 run_completion = 0;
1449                         }
1450                         break;
1451
1452                 default:
1453                         pr_warn("QP[%u]: RX out of state\n", qp_id(qp));
1454                         rv = -EPROTO;
1455                         run_completion = 0;
1456                 }
1457                 if (unlikely(rv != 0 && rv != -EAGAIN)) {
1458                         if ((srx->state > SIW_GET_HDR ||
1459                              qp->rx_fpdu->more_ddp_segs) && run_completion)
1460                                 siw_rdmap_complete(qp, rv);
1461
1462                         siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv,
1463                                    srx->state);
1464
1465                         siw_qp_cm_drop(qp, 1);
1466
1467                         break;
1468                 }
1469                 if (rv) {
1470                         siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n",
1471                                    srx->state, srx->fpdu_part_rem);
1472                         break;
1473                 }
1474         }
1475         return srx->skb_copied;
1476 }