2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
35 #include <linux/delay.h>
36 #include <linux/sched.h>
37 #include <linux/vmalloc.h>
41 #include "rxe_queue.h"
44 char *rxe_qp_state_name[] = {
45 [QP_STATE_RESET] = "RESET",
46 [QP_STATE_INIT] = "INIT",
47 [QP_STATE_READY] = "READY",
48 [QP_STATE_DRAIN] = "DRAIN",
49 [QP_STATE_DRAINED] = "DRAINED",
50 [QP_STATE_ERROR] = "ERROR",
53 static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
56 if (cap->max_send_wr > rxe->attr.max_qp_wr) {
57 pr_warn("invalid send wr = %d > %d\n",
58 cap->max_send_wr, rxe->attr.max_qp_wr);
62 if (cap->max_send_sge > rxe->attr.max_sge) {
63 pr_warn("invalid send sge = %d > %d\n",
64 cap->max_send_sge, rxe->attr.max_sge);
69 if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
70 pr_warn("invalid recv wr = %d > %d\n",
71 cap->max_recv_wr, rxe->attr.max_qp_wr);
75 if (cap->max_recv_sge > rxe->attr.max_sge) {
76 pr_warn("invalid recv sge = %d > %d\n",
77 cap->max_recv_sge, rxe->attr.max_sge);
82 if (cap->max_inline_data > rxe->max_inline_data) {
83 pr_warn("invalid max inline data = %d > %d\n",
84 cap->max_inline_data, rxe->max_inline_data);
94 int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
96 struct ib_qp_cap *cap = &init->cap;
97 struct rxe_port *port;
98 int port_num = init->port_num;
100 if (!init->recv_cq || !init->send_cq) {
101 pr_warn("missing cq\n");
105 if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
108 if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
110 pr_warn("invalid port = %d\n", port_num);
116 if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
117 pr_warn("SMI QP exists for port %d\n", port_num);
121 if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
122 pr_warn("GSI QP exists for port %d\n", port_num);
133 static int alloc_rd_atomic_resources(struct rxe_qp *qp, unsigned int n)
135 qp->resp.res_head = 0;
136 qp->resp.res_tail = 0;
137 qp->resp.resources = kcalloc(n, sizeof(struct resp_res), GFP_KERNEL);
139 if (!qp->resp.resources)
145 static void free_rd_atomic_resources(struct rxe_qp *qp)
147 if (qp->resp.resources) {
150 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
151 struct resp_res *res = &qp->resp.resources[i];
153 free_rd_atomic_resource(qp, res);
155 kfree(qp->resp.resources);
156 qp->resp.resources = NULL;
160 void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res)
162 if (res->type == RXE_ATOMIC_MASK) {
164 kfree_skb(res->atomic.skb);
165 } else if (res->type == RXE_READ_MASK) {
167 rxe_drop_ref(res->read.mr);
172 static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
175 struct resp_res *res;
177 if (qp->resp.resources) {
178 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
179 res = &qp->resp.resources[i];
180 free_rd_atomic_resource(qp, res);
185 static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
186 struct ib_qp_init_attr *init)
188 struct rxe_port *port;
191 qp->sq_sig_type = init->sq_sig_type;
192 qp->attr.path_mtu = 1;
193 qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu);
195 qpn = qp->pelem.index;
198 switch (init->qp_type) {
201 port->qp_smi_index = qpn;
202 qp->attr.port_num = init->port_num;
207 port->qp_gsi_index = qpn;
208 qp->attr.port_num = init->port_num;
212 qp->ibqp.qp_num = qpn;
216 INIT_LIST_HEAD(&qp->grp_list);
218 skb_queue_head_init(&qp->send_pkts);
220 spin_lock_init(&qp->grp_lock);
221 spin_lock_init(&qp->state_lock);
223 atomic_set(&qp->ssn, 0);
224 atomic_set(&qp->skb_out, 0);
227 static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
228 struct ib_qp_init_attr *init,
229 struct ib_ucontext *context, struct ib_udata *udata)
234 err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
237 qp->sk->sk->sk_user_data = qp;
239 qp->sq.max_wr = init->cap.max_send_wr;
240 qp->sq.max_sge = init->cap.max_send_sge;
241 qp->sq.max_inline = init->cap.max_inline_data;
243 wqe_size = max_t(int, sizeof(struct rxe_send_wqe) +
244 qp->sq.max_sge * sizeof(struct ib_sge),
245 sizeof(struct rxe_send_wqe) +
248 qp->sq.queue = rxe_queue_init(rxe,
254 err = do_mmap_info(rxe, udata, true,
255 context, qp->sq.queue->buf,
256 qp->sq.queue->buf_size, &qp->sq.queue->ip);
259 vfree(qp->sq.queue->buf);
265 qp->req.wqe_index = producer_index(qp->sq.queue);
266 qp->req.state = QP_STATE_RESET;
268 qp->comp.opcode = -1;
270 spin_lock_init(&qp->sq.sq_lock);
271 skb_queue_head_init(&qp->req_pkts);
273 rxe_init_task(rxe, &qp->req.task, qp,
274 rxe_requester, "req");
275 rxe_init_task(rxe, &qp->comp.task, qp,
276 rxe_completer, "comp");
278 init_timer(&qp->rnr_nak_timer);
279 qp->rnr_nak_timer.function = rnr_nak_timer;
280 qp->rnr_nak_timer.data = (unsigned long)qp;
282 init_timer(&qp->retrans_timer);
283 qp->retrans_timer.function = retransmit_timer;
284 qp->retrans_timer.data = (unsigned long)qp;
285 qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
290 static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
291 struct ib_qp_init_attr *init,
292 struct ib_ucontext *context, struct ib_udata *udata)
298 qp->rq.max_wr = init->cap.max_recv_wr;
299 qp->rq.max_sge = init->cap.max_recv_sge;
301 wqe_size = rcv_wqe_size(qp->rq.max_sge);
303 pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
304 qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
306 qp->rq.queue = rxe_queue_init(rxe,
312 err = do_mmap_info(rxe, udata, false, context,
314 qp->rq.queue->buf_size,
317 vfree(qp->rq.queue->buf);
324 spin_lock_init(&qp->rq.producer_lock);
325 spin_lock_init(&qp->rq.consumer_lock);
327 skb_queue_head_init(&qp->resp_pkts);
329 rxe_init_task(rxe, &qp->resp.task, qp,
330 rxe_responder, "resp");
332 qp->resp.opcode = OPCODE_NONE;
334 qp->resp.state = QP_STATE_RESET;
339 /* called by the create qp verb */
340 int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
341 struct ib_qp_init_attr *init, struct ib_udata *udata,
345 struct rxe_cq *rcq = to_rcq(init->recv_cq);
346 struct rxe_cq *scq = to_rcq(init->send_cq);
347 struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
348 struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
361 rxe_qp_init_misc(rxe, qp, init);
363 err = rxe_qp_init_req(rxe, qp, init, context, udata);
367 err = rxe_qp_init_resp(rxe, qp, init, context, udata);
371 qp->attr.qp_state = IB_QPS_RESET;
377 rxe_queue_cleanup(qp->sq.queue);
393 /* called by the query qp verb */
394 int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
396 init->event_handler = qp->ibqp.event_handler;
397 init->qp_context = qp->ibqp.qp_context;
398 init->send_cq = qp->ibqp.send_cq;
399 init->recv_cq = qp->ibqp.recv_cq;
400 init->srq = qp->ibqp.srq;
402 init->cap.max_send_wr = qp->sq.max_wr;
403 init->cap.max_send_sge = qp->sq.max_sge;
404 init->cap.max_inline_data = qp->sq.max_inline;
407 init->cap.max_recv_wr = qp->rq.max_wr;
408 init->cap.max_recv_sge = qp->rq.max_sge;
411 init->sq_sig_type = qp->sq_sig_type;
413 init->qp_type = qp->ibqp.qp_type;
419 /* called by the modify qp verb, this routine checks all the parameters before
422 int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
423 struct ib_qp_attr *attr, int mask)
425 enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
426 attr->cur_qp_state : qp->attr.qp_state;
427 enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
428 attr->qp_state : cur_state;
430 if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask,
431 IB_LINK_LAYER_ETHERNET)) {
432 pr_warn("invalid mask or state for qp\n");
436 if (mask & IB_QP_STATE) {
437 if (cur_state == IB_QPS_SQD) {
438 if (qp->req.state == QP_STATE_DRAIN &&
439 new_state != IB_QPS_ERR)
444 if (mask & IB_QP_PORT) {
445 if (attr->port_num != 1) {
446 pr_warn("invalid port %d\n", attr->port_num);
451 if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
454 if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
457 if (mask & IB_QP_ALT_PATH) {
458 if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
460 if (attr->alt_port_num != 1) {
461 pr_warn("invalid alt port %d\n", attr->alt_port_num);
464 if (attr->alt_timeout > 31) {
465 pr_warn("invalid QP alt timeout %d > 31\n",
471 if (mask & IB_QP_PATH_MTU) {
472 struct rxe_port *port = &rxe->port;
474 enum ib_mtu max_mtu = port->attr.max_mtu;
475 enum ib_mtu mtu = attr->path_mtu;
478 pr_debug("invalid mtu (%d) > (%d)\n",
479 ib_mtu_enum_to_int(mtu),
480 ib_mtu_enum_to_int(max_mtu));
485 if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
486 if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
487 pr_warn("invalid max_rd_atomic %d > %d\n",
489 rxe->attr.max_qp_rd_atom);
494 if (mask & IB_QP_TIMEOUT) {
495 if (attr->timeout > 31) {
496 pr_warn("invalid QP timeout %d > 31\n",
508 /* move the qp to the reset state */
509 static void rxe_qp_reset(struct rxe_qp *qp)
511 /* stop tasks from running */
512 rxe_disable_task(&qp->resp.task);
514 /* stop request/comp */
516 if (qp_type(qp) == IB_QPT_RC)
517 rxe_disable_task(&qp->comp.task);
518 rxe_disable_task(&qp->req.task);
521 /* move qp to the reset state */
522 qp->req.state = QP_STATE_RESET;
523 qp->resp.state = QP_STATE_RESET;
525 /* let state machines reset themselves drain work and packet queues
528 __rxe_do_task(&qp->resp.task);
531 __rxe_do_task(&qp->comp.task);
532 __rxe_do_task(&qp->req.task);
533 rxe_queue_reset(qp->sq.queue);
536 /* cleanup attributes */
537 atomic_set(&qp->ssn, 0);
539 qp->req.need_retry = 0;
540 qp->req.noack_pkts = 0;
542 qp->resp.opcode = -1;
543 qp->resp.drop_msg = 0;
544 qp->resp.goto_error = 0;
545 qp->resp.sent_psn_nak = 0;
548 rxe_drop_ref(qp->resp.mr);
552 cleanup_rd_atomic_resources(qp);
555 rxe_enable_task(&qp->resp.task);
558 if (qp_type(qp) == IB_QPT_RC)
559 rxe_enable_task(&qp->comp.task);
561 rxe_enable_task(&qp->req.task);
565 /* drain the send queue */
566 static void rxe_qp_drain(struct rxe_qp *qp)
569 if (qp->req.state != QP_STATE_DRAINED) {
570 qp->req.state = QP_STATE_DRAIN;
571 if (qp_type(qp) == IB_QPT_RC)
572 rxe_run_task(&qp->comp.task, 1);
574 __rxe_do_task(&qp->comp.task);
575 rxe_run_task(&qp->req.task, 1);
580 /* move the qp to the error state */
581 void rxe_qp_error(struct rxe_qp *qp)
583 qp->req.state = QP_STATE_ERROR;
584 qp->resp.state = QP_STATE_ERROR;
585 qp->attr.qp_state = IB_QPS_ERR;
587 /* drain work and packet queues */
588 rxe_run_task(&qp->resp.task, 1);
590 if (qp_type(qp) == IB_QPT_RC)
591 rxe_run_task(&qp->comp.task, 1);
593 __rxe_do_task(&qp->comp.task);
594 rxe_run_task(&qp->req.task, 1);
597 /* called by the modify qp verb */
598 int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
599 struct ib_udata *udata)
602 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
604 struct ib_gid_attr sgid_attr;
606 if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
607 int max_rd_atomic = attr->max_rd_atomic ?
608 roundup_pow_of_two(attr->max_rd_atomic) : 0;
610 qp->attr.max_rd_atomic = max_rd_atomic;
611 atomic_set(&qp->req.rd_atomic, max_rd_atomic);
614 if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
615 int max_dest_rd_atomic = attr->max_dest_rd_atomic ?
616 roundup_pow_of_two(attr->max_dest_rd_atomic) : 0;
618 qp->attr.max_dest_rd_atomic = max_dest_rd_atomic;
620 free_rd_atomic_resources(qp);
622 err = alloc_rd_atomic_resources(qp, max_dest_rd_atomic);
627 if (mask & IB_QP_CUR_STATE)
628 qp->attr.cur_qp_state = attr->qp_state;
630 if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
631 qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
633 if (mask & IB_QP_ACCESS_FLAGS)
634 qp->attr.qp_access_flags = attr->qp_access_flags;
636 if (mask & IB_QP_PKEY_INDEX)
637 qp->attr.pkey_index = attr->pkey_index;
639 if (mask & IB_QP_PORT)
640 qp->attr.port_num = attr->port_num;
642 if (mask & IB_QP_QKEY)
643 qp->attr.qkey = attr->qkey;
645 if (mask & IB_QP_AV) {
646 ib_get_cached_gid(&rxe->ib_dev, 1,
647 attr->ah_attr.grh.sgid_index, &sgid,
649 rxe_av_from_attr(rxe, attr->port_num, &qp->pri_av,
651 rxe_av_fill_ip_info(rxe, &qp->pri_av, &attr->ah_attr,
654 dev_put(sgid_attr.ndev);
657 if (mask & IB_QP_ALT_PATH) {
658 ib_get_cached_gid(&rxe->ib_dev, 1,
659 attr->alt_ah_attr.grh.sgid_index, &sgid,
662 rxe_av_from_attr(rxe, attr->alt_port_num, &qp->alt_av,
664 rxe_av_fill_ip_info(rxe, &qp->alt_av, &attr->alt_ah_attr,
667 dev_put(sgid_attr.ndev);
669 qp->attr.alt_port_num = attr->alt_port_num;
670 qp->attr.alt_pkey_index = attr->alt_pkey_index;
671 qp->attr.alt_timeout = attr->alt_timeout;
674 if (mask & IB_QP_PATH_MTU) {
675 qp->attr.path_mtu = attr->path_mtu;
676 qp->mtu = ib_mtu_enum_to_int(attr->path_mtu);
679 if (mask & IB_QP_TIMEOUT) {
680 qp->attr.timeout = attr->timeout;
681 if (attr->timeout == 0) {
682 qp->qp_timeout_jiffies = 0;
684 /* According to the spec, timeout = 4.096 * 2 ^ attr->timeout [us] */
685 int j = nsecs_to_jiffies(4096ULL << attr->timeout);
687 qp->qp_timeout_jiffies = j ? j : 1;
691 if (mask & IB_QP_RETRY_CNT) {
692 qp->attr.retry_cnt = attr->retry_cnt;
693 qp->comp.retry_cnt = attr->retry_cnt;
694 pr_debug("qp#%d set retry count = %d\n", qp_num(qp),
698 if (mask & IB_QP_RNR_RETRY) {
699 qp->attr.rnr_retry = attr->rnr_retry;
700 qp->comp.rnr_retry = attr->rnr_retry;
701 pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp),
705 if (mask & IB_QP_RQ_PSN) {
706 qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
707 qp->resp.psn = qp->attr.rq_psn;
708 pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp),
712 if (mask & IB_QP_MIN_RNR_TIMER) {
713 qp->attr.min_rnr_timer = attr->min_rnr_timer;
714 pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp),
715 attr->min_rnr_timer);
718 if (mask & IB_QP_SQ_PSN) {
719 qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
720 qp->req.psn = qp->attr.sq_psn;
721 qp->comp.psn = qp->attr.sq_psn;
722 pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn);
725 if (mask & IB_QP_PATH_MIG_STATE)
726 qp->attr.path_mig_state = attr->path_mig_state;
728 if (mask & IB_QP_DEST_QPN)
729 qp->attr.dest_qp_num = attr->dest_qp_num;
731 if (mask & IB_QP_STATE) {
732 qp->attr.qp_state = attr->qp_state;
734 switch (attr->qp_state) {
736 pr_debug("qp#%d state -> RESET\n", qp_num(qp));
741 pr_debug("qp#%d state -> INIT\n", qp_num(qp));
742 qp->req.state = QP_STATE_INIT;
743 qp->resp.state = QP_STATE_INIT;
747 pr_debug("qp#%d state -> RTR\n", qp_num(qp));
748 qp->resp.state = QP_STATE_READY;
752 pr_debug("qp#%d state -> RTS\n", qp_num(qp));
753 qp->req.state = QP_STATE_READY;
757 pr_debug("qp#%d state -> SQD\n", qp_num(qp));
762 pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp));
763 /* Not possible from modify_qp. */
767 pr_debug("qp#%d state -> ERR\n", qp_num(qp));
776 /* called by the query qp verb */
777 int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
779 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
783 attr->rq_psn = qp->resp.psn;
784 attr->sq_psn = qp->req.psn;
786 attr->cap.max_send_wr = qp->sq.max_wr;
787 attr->cap.max_send_sge = qp->sq.max_sge;
788 attr->cap.max_inline_data = qp->sq.max_inline;
791 attr->cap.max_recv_wr = qp->rq.max_wr;
792 attr->cap.max_recv_sge = qp->rq.max_sge;
795 rxe_av_to_attr(rxe, &qp->pri_av, &attr->ah_attr);
796 rxe_av_to_attr(rxe, &qp->alt_av, &attr->alt_ah_attr);
798 if (qp->req.state == QP_STATE_DRAIN) {
799 attr->sq_draining = 1;
800 /* applications that get this state
801 * typically spin on it. yield the
806 attr->sq_draining = 0;
809 pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
814 /* called by the destroy qp verb */
815 void rxe_qp_destroy(struct rxe_qp *qp)
818 qp->qp_timeout_jiffies = 0;
819 rxe_cleanup_task(&qp->resp.task);
821 del_timer_sync(&qp->retrans_timer);
822 del_timer_sync(&qp->rnr_nak_timer);
824 rxe_cleanup_task(&qp->req.task);
825 rxe_cleanup_task(&qp->comp.task);
827 /* flush out any receive wr's or pending requests */
828 __rxe_do_task(&qp->req.task);
830 __rxe_do_task(&qp->comp.task);
831 __rxe_do_task(&qp->req.task);
835 /* called when the last reference to the qp is dropped */
836 void rxe_qp_cleanup(void *arg)
838 struct rxe_qp *qp = arg;
840 rxe_drop_all_mcast_groups(qp);
843 rxe_queue_cleanup(qp->sq.queue);
846 rxe_drop_ref(qp->srq);
849 rxe_queue_cleanup(qp->rq.queue);
852 rxe_drop_ref(qp->scq);
854 rxe_drop_ref(qp->rcq);
856 rxe_drop_ref(qp->pd);
859 rxe_drop_ref(qp->resp.mr);
863 free_rd_atomic_resources(qp);
865 kernel_sock_shutdown(qp->sk, SHUT_RDWR);
866 sock_release(qp->sk);