2 * Copyright (c) 2016 Chelsio Communications, Inc.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9 #define DRV_NAME "cxgbit"
10 #define DRV_VERSION "1.0.0-ko"
11 #define pr_fmt(fmt) DRV_NAME ": " fmt
15 #ifdef CONFIG_CHELSIO_T4_DCB
16 #include <net/dcbevent.h>
17 #include "cxgb4_dcb.h"
20 LIST_HEAD(cdev_list_head);
22 DEFINE_MUTEX(cdev_list_lock);
24 void _cxgbit_free_cdev(struct kref *kref)
26 struct cxgbit_device *cdev;
28 cdev = container_of(kref, struct cxgbit_device, kref);
30 cxgbi_ppm_release(cdev2ppm(cdev));
34 static void cxgbit_set_mdsl(struct cxgbit_device *cdev)
36 struct cxgb4_lld_info *lldi = &cdev->lldi;
39 #define ULP2_MAX_PKT_LEN 16224
40 #define ISCSI_PDU_NONPAYLOAD_LEN 312
41 mdsl = min_t(u32, lldi->iscsi_iolen - ISCSI_PDU_NONPAYLOAD_LEN,
42 ULP2_MAX_PKT_LEN - ISCSI_PDU_NONPAYLOAD_LEN);
43 mdsl = min_t(u32, mdsl, 8192);
44 mdsl = min_t(u32, mdsl, (MAX_SKB_FRAGS - 1) * PAGE_SIZE);
49 static void *cxgbit_uld_add(const struct cxgb4_lld_info *lldi)
51 struct cxgbit_device *cdev;
53 if (is_t4(lldi->adapter_type))
54 return ERR_PTR(-ENODEV);
56 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
58 return ERR_PTR(-ENOMEM);
60 kref_init(&cdev->kref);
61 spin_lock_init(&cdev->np_lock);
65 cxgbit_set_mdsl(cdev);
67 if (cxgbit_ddp_init(cdev) < 0) {
69 return ERR_PTR(-EINVAL);
72 if (!test_bit(CDEV_DDP_ENABLE, &cdev->flags))
73 pr_info("cdev %s ddp init failed\n",
74 pci_name(lldi->pdev));
76 if (lldi->fw_vers >= 0x10d2b00)
77 set_bit(CDEV_ISO_ENABLE, &cdev->flags);
79 spin_lock_init(&cdev->cskq.lock);
80 INIT_LIST_HEAD(&cdev->cskq.list);
82 mutex_lock(&cdev_list_lock);
83 list_add_tail(&cdev->list, &cdev_list_head);
84 mutex_unlock(&cdev_list_lock);
86 pr_info("cdev %s added for iSCSI target transport\n",
87 pci_name(lldi->pdev));
92 static void cxgbit_close_conn(struct cxgbit_device *cdev)
94 struct cxgbit_sock *csk;
96 bool wakeup_thread = false;
98 spin_lock_bh(&cdev->cskq.lock);
99 list_for_each_entry(csk, &cdev->cskq.list, list) {
100 skb = alloc_skb(0, GFP_ATOMIC);
104 spin_lock_bh(&csk->rxq.lock);
105 __skb_queue_tail(&csk->rxq, skb);
106 if (skb_queue_len(&csk->rxq) == 1)
107 wakeup_thread = true;
108 spin_unlock_bh(&csk->rxq.lock);
111 wake_up(&csk->waitq);
112 wakeup_thread = false;
115 spin_unlock_bh(&cdev->cskq.lock);
118 static void cxgbit_detach_cdev(struct cxgbit_device *cdev)
120 bool free_cdev = false;
122 spin_lock_bh(&cdev->cskq.lock);
123 if (list_empty(&cdev->cskq.list))
125 spin_unlock_bh(&cdev->cskq.lock);
128 mutex_lock(&cdev_list_lock);
129 list_del(&cdev->list);
130 mutex_unlock(&cdev_list_lock);
132 cxgbit_put_cdev(cdev);
134 cxgbit_close_conn(cdev);
138 static int cxgbit_uld_state_change(void *handle, enum cxgb4_state state)
140 struct cxgbit_device *cdev = handle;
144 set_bit(CDEV_STATE_UP, &cdev->flags);
145 pr_info("cdev %s state UP.\n", pci_name(cdev->lldi.pdev));
147 case CXGB4_STATE_START_RECOVERY:
148 clear_bit(CDEV_STATE_UP, &cdev->flags);
149 cxgbit_close_conn(cdev);
150 pr_info("cdev %s state RECOVERY.\n", pci_name(cdev->lldi.pdev));
152 case CXGB4_STATE_DOWN:
153 pr_info("cdev %s state DOWN.\n", pci_name(cdev->lldi.pdev));
155 case CXGB4_STATE_DETACH:
156 clear_bit(CDEV_STATE_UP, &cdev->flags);
157 pr_info("cdev %s state DETACH.\n", pci_name(cdev->lldi.pdev));
158 cxgbit_detach_cdev(cdev);
161 pr_info("cdev %s unknown state %d.\n",
162 pci_name(cdev->lldi.pdev), state);
169 cxgbit_proc_ddp_status(unsigned int tid, struct cpl_rx_data_ddp *cpl,
170 struct cxgbit_lro_pdu_cb *pdu_cb)
172 unsigned int status = ntohl(cpl->ddpvld);
174 pdu_cb->flags |= PDUCBF_RX_STATUS;
175 pdu_cb->ddigest = ntohl(cpl->ulp_crc);
176 pdu_cb->pdulen = ntohs(cpl->len);
178 if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT)) {
179 pr_info("tid 0x%x, status 0x%x, hcrc bad.\n", tid, status);
180 pdu_cb->flags |= PDUCBF_RX_HCRC_ERR;
183 if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT)) {
184 pr_info("tid 0x%x, status 0x%x, dcrc bad.\n", tid, status);
185 pdu_cb->flags |= PDUCBF_RX_DCRC_ERR;
188 if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT))
189 pr_info("tid 0x%x, status 0x%x, pad bad.\n", tid, status);
191 if ((status & (1 << CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT)) &&
192 (!(pdu_cb->flags & PDUCBF_RX_DATA))) {
193 pdu_cb->flags |= PDUCBF_RX_DATA_DDPD;
198 cxgbit_lro_add_packet_rsp(struct sk_buff *skb, u8 op, const __be64 *rsp)
200 struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb);
201 struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb,
203 struct cpl_rx_iscsi_ddp *cpl = (struct cpl_rx_iscsi_ddp *)(rsp + 1);
205 cxgbit_proc_ddp_status(lro_cb->csk->tid, cpl, pdu_cb);
207 if (pdu_cb->flags & PDUCBF_RX_HDR)
208 pdu_cb->complete = true;
210 lro_cb->complete = true;
211 lro_cb->pdu_totallen += pdu_cb->pdulen;
216 cxgbit_copy_frags(struct sk_buff *skb, const struct pkt_gl *gl,
219 u8 skb_frag_idx = skb_shinfo(skb)->nr_frags;
222 /* usually there's just one frag */
223 __skb_fill_page_desc(skb, skb_frag_idx, gl->frags[0].page,
224 gl->frags[0].offset + offset,
225 gl->frags[0].size - offset);
226 for (i = 1; i < gl->nfrags; i++)
227 __skb_fill_page_desc(skb, skb_frag_idx + i,
232 skb_shinfo(skb)->nr_frags += gl->nfrags;
234 /* get a reference to the last page, we don't own it */
235 get_page(gl->frags[gl->nfrags - 1].page);
239 cxgbit_lro_add_packet_gl(struct sk_buff *skb, u8 op, const struct pkt_gl *gl)
241 struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb);
242 struct cxgbit_lro_pdu_cb *pdu_cb = cxgbit_skb_lro_pdu_cb(skb,
246 if (op == CPL_ISCSI_HDR) {
247 struct cpl_iscsi_hdr *cpl = (struct cpl_iscsi_hdr *)gl->va;
249 offset = sizeof(struct cpl_iscsi_hdr);
250 pdu_cb->flags |= PDUCBF_RX_HDR;
251 pdu_cb->seq = ntohl(cpl->seq);
252 len = ntohs(cpl->len);
253 pdu_cb->hdr = gl->va + offset;
255 pdu_cb->hfrag_idx = skb_shinfo(skb)->nr_frags;
257 if (unlikely(gl->nfrags > 1))
258 cxgbit_skcb_flags(skb) = 0;
260 lro_cb->complete = false;
262 struct cpl_iscsi_data *cpl = (struct cpl_iscsi_data *)gl->va;
264 offset = sizeof(struct cpl_iscsi_data);
265 pdu_cb->flags |= PDUCBF_RX_DATA;
266 len = ntohs(cpl->len);
268 pdu_cb->doffset = lro_cb->offset;
269 pdu_cb->nr_dfrags = gl->nfrags;
270 pdu_cb->dfrag_idx = skb_shinfo(skb)->nr_frags;
273 cxgbit_copy_frags(skb, gl, offset);
275 pdu_cb->frags += gl->nfrags;
276 lro_cb->offset += len;
278 skb->data_len += len;
279 skb->truesize += len;
282 static struct sk_buff *
283 cxgbit_lro_init_skb(struct cxgbit_sock *csk, u8 op, const struct pkt_gl *gl,
284 const __be64 *rsp, struct napi_struct *napi)
287 struct cxgbit_lro_cb *lro_cb;
289 skb = napi_alloc_skb(napi, LRO_SKB_MAX_HEADROOM);
294 memset(skb->data, 0, LRO_SKB_MAX_HEADROOM);
296 cxgbit_skcb_flags(skb) |= SKCBF_RX_LRO;
298 lro_cb = cxgbit_skb_lro_cb(skb);
307 static void cxgbit_queue_lro_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
309 bool wakeup_thread = false;
311 spin_lock(&csk->rxq.lock);
312 __skb_queue_tail(&csk->rxq, skb);
313 if (skb_queue_len(&csk->rxq) == 1)
314 wakeup_thread = true;
315 spin_unlock(&csk->rxq.lock);
318 wake_up(&csk->waitq);
321 static void cxgbit_lro_flush(struct t4_lro_mgr *lro_mgr, struct sk_buff *skb)
323 struct cxgbit_lro_cb *lro_cb = cxgbit_skb_lro_cb(skb);
324 struct cxgbit_sock *csk = lro_cb->csk;
328 __skb_unlink(skb, &lro_mgr->lroq);
329 cxgbit_queue_lro_skb(csk, skb);
334 lro_mgr->lro_session_cnt--;
337 static void cxgbit_uld_lro_flush(struct t4_lro_mgr *lro_mgr)
341 while ((skb = skb_peek(&lro_mgr->lroq)))
342 cxgbit_lro_flush(lro_mgr, skb);
346 cxgbit_lro_receive(struct cxgbit_sock *csk, u8 op, const __be64 *rsp,
347 const struct pkt_gl *gl, struct t4_lro_mgr *lro_mgr,
348 struct napi_struct *napi)
351 struct cxgbit_lro_cb *lro_cb;
354 pr_err("%s: csk NULL, op 0x%x.\n", __func__, op);
362 if (lro_mgr->lro_session_cnt >= MAX_LRO_SESSIONS) {
363 cxgbit_uld_lro_flush(lro_mgr);
367 skb = cxgbit_lro_init_skb(csk, op, gl, rsp, napi);
373 __skb_queue_tail(&lro_mgr->lroq, skb);
374 lro_mgr->lro_session_cnt++;
378 lro_cb = cxgbit_skb_lro_cb(skb);
380 if ((gl && (((skb_shinfo(skb)->nr_frags + gl->nfrags) >
381 MAX_SKB_FRAGS) || (lro_cb->pdu_totallen >= LRO_FLUSH_LEN_MAX))) ||
382 (lro_cb->pdu_idx >= MAX_SKB_FRAGS)) {
383 cxgbit_lro_flush(lro_mgr, skb);
388 cxgbit_lro_add_packet_gl(skb, op, gl);
390 cxgbit_lro_add_packet_rsp(skb, op, rsp);
392 lro_mgr->lro_merged++;
401 cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
402 const struct pkt_gl *gl, struct t4_lro_mgr *lro_mgr,
403 struct napi_struct *napi)
405 struct cxgbit_device *cdev = hndl;
406 struct cxgb4_lld_info *lldi = &cdev->lldi;
407 struct cpl_tx_data *rpl = NULL;
408 struct cxgbit_sock *csk = NULL;
409 unsigned int tid = 0;
411 unsigned int op = *(u8 *)rsp;
412 bool lro_flush = true;
417 case CPL_RX_ISCSI_DDP:
420 case CPL_ABORT_RPL_RSS:
421 case CPL_PASS_ESTABLISH:
423 case CPL_CLOSE_CON_RPL:
424 case CPL_ABORT_REQ_RSS:
425 case CPL_SET_TCB_RPL:
427 rpl = gl ? (struct cpl_tx_data *)gl->va :
428 (struct cpl_tx_data *)(rsp + 1);
430 csk = lookup_tid(lldi->tids, tid);
436 if (csk && csk->lro_skb && lro_flush)
437 cxgbit_lro_flush(lro_mgr, csk->lro_skb);
442 if (op == CPL_RX_ISCSI_DDP) {
443 if (!cxgbit_lro_receive(csk, op, rsp, NULL, lro_mgr,
448 len = 64 - sizeof(struct rsp_ctrl) - 8;
449 skb = napi_alloc_skb(napi, len);
453 skb_copy_to_linear_data(skb, &rsp[1], len);
455 if (unlikely(op != *(u8 *)gl->va)) {
456 pr_info("? FL 0x%p,RSS%#llx,FL %#llx,len %u.\n",
457 gl->va, be64_to_cpu(*rsp),
458 be64_to_cpu(*(u64 *)gl->va),
463 if (op == CPL_ISCSI_HDR || op == CPL_ISCSI_DATA) {
464 if (!cxgbit_lro_receive(csk, op, rsp, gl, lro_mgr,
469 #define RX_PULL_LEN 128
470 skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
475 rpl = (struct cpl_tx_data *)skb->data;
477 cxgbit_skcb_rx_opcode(skb) = op;
479 pr_debug("cdev %p, opcode 0x%x(0x%x,0x%x), skb %p.\n",
480 cdev, op, rpl->ot.opcode_tid,
481 ntohl(rpl->ot.opcode_tid), skb);
483 if (op < NUM_CPL_CMDS && cxgbit_cplhandlers[op]) {
484 cxgbit_cplhandlers[op](cdev, skb);
486 pr_err("No handler for opcode 0x%x.\n", op);
491 pr_err("%s OOM bailing out.\n", __func__);
495 #ifdef CONFIG_CHELSIO_T4_DCB
496 struct cxgbit_dcb_work {
497 struct dcb_app_type dcb_app;
498 struct work_struct work;
502 cxgbit_update_dcb_priority(struct cxgbit_device *cdev, u8 port_id,
503 u8 dcb_priority, u16 port_num)
505 struct cxgbit_sock *csk;
508 bool wakeup_thread = false;
510 spin_lock_bh(&cdev->cskq.lock);
511 list_for_each_entry(csk, &cdev->cskq.list, list) {
512 if (csk->port_id != port_id)
515 if (csk->com.local_addr.ss_family == AF_INET6) {
516 struct sockaddr_in6 *sock_in6;
518 sock_in6 = (struct sockaddr_in6 *)&csk->com.local_addr;
519 local_port = ntohs(sock_in6->sin6_port);
521 struct sockaddr_in *sock_in;
523 sock_in = (struct sockaddr_in *)&csk->com.local_addr;
524 local_port = ntohs(sock_in->sin_port);
527 if (local_port != port_num)
530 if (csk->dcb_priority == dcb_priority)
533 skb = alloc_skb(0, GFP_ATOMIC);
537 spin_lock(&csk->rxq.lock);
538 __skb_queue_tail(&csk->rxq, skb);
539 if (skb_queue_len(&csk->rxq) == 1)
540 wakeup_thread = true;
541 spin_unlock(&csk->rxq.lock);
544 wake_up(&csk->waitq);
545 wakeup_thread = false;
548 spin_unlock_bh(&cdev->cskq.lock);
551 static void cxgbit_dcb_workfn(struct work_struct *work)
553 struct cxgbit_dcb_work *dcb_work;
554 struct net_device *ndev;
555 struct cxgbit_device *cdev = NULL;
556 struct dcb_app_type *iscsi_app;
557 u8 priority, port_id = 0xff;
559 dcb_work = container_of(work, struct cxgbit_dcb_work, work);
560 iscsi_app = &dcb_work->dcb_app;
562 if (iscsi_app->dcbx & DCB_CAP_DCBX_VER_IEEE) {
563 if (iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_ANY)
566 priority = iscsi_app->app.priority;
568 } else if (iscsi_app->dcbx & DCB_CAP_DCBX_VER_CEE) {
569 if (iscsi_app->app.selector != DCB_APP_IDTYPE_PORTNUM)
572 if (!iscsi_app->app.priority)
575 priority = ffs(iscsi_app->app.priority) - 1;
580 pr_debug("priority for ifid %d is %u\n",
581 iscsi_app->ifindex, priority);
583 ndev = dev_get_by_index(&init_net, iscsi_app->ifindex);
588 mutex_lock(&cdev_list_lock);
589 cdev = cxgbit_find_device(ndev, &port_id);
594 mutex_unlock(&cdev_list_lock);
598 cxgbit_update_dcb_priority(cdev, port_id, priority,
599 iscsi_app->app.protocol);
600 mutex_unlock(&cdev_list_lock);
606 cxgbit_dcbevent_notify(struct notifier_block *nb, unsigned long action,
609 struct cxgbit_dcb_work *dcb_work;
610 struct dcb_app_type *dcb_app = data;
612 dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
616 dcb_work->dcb_app = *dcb_app;
617 INIT_WORK(&dcb_work->work, cxgbit_dcb_workfn);
618 schedule_work(&dcb_work->work);
623 static enum target_prot_op cxgbit_get_sup_prot_ops(struct iscsi_conn *conn)
625 return TARGET_PROT_NORMAL;
628 static struct iscsit_transport cxgbit_transport = {
630 .transport_type = ISCSI_CXGBIT,
631 .rdma_shutdown = false,
632 .priv_size = sizeof(struct cxgbit_cmd),
633 .owner = THIS_MODULE,
634 .iscsit_setup_np = cxgbit_setup_np,
635 .iscsit_accept_np = cxgbit_accept_np,
636 .iscsit_free_np = cxgbit_free_np,
637 .iscsit_free_conn = cxgbit_free_conn,
638 .iscsit_get_login_rx = cxgbit_get_login_rx,
639 .iscsit_put_login_tx = cxgbit_put_login_tx,
640 .iscsit_immediate_queue = iscsit_immediate_queue,
641 .iscsit_response_queue = iscsit_response_queue,
642 .iscsit_get_dataout = iscsit_build_r2ts_for_cmd,
643 .iscsit_queue_data_in = iscsit_queue_rsp,
644 .iscsit_queue_status = iscsit_queue_rsp,
645 .iscsit_xmit_pdu = cxgbit_xmit_pdu,
646 .iscsit_get_r2t_ttt = cxgbit_get_r2t_ttt,
647 .iscsit_get_rx_pdu = cxgbit_get_rx_pdu,
648 .iscsit_validate_params = cxgbit_validate_params,
649 .iscsit_release_cmd = cxgbit_release_cmd,
650 .iscsit_aborted_task = iscsit_aborted_task,
651 .iscsit_get_sup_prot_ops = cxgbit_get_sup_prot_ops,
654 static struct cxgb4_uld_info cxgbit_uld_info = {
656 .nrxq = MAX_ULD_QSETS,
659 .add = cxgbit_uld_add,
660 .state_change = cxgbit_uld_state_change,
661 .lro_rx_handler = cxgbit_uld_lro_rx_handler,
662 .lro_flush = cxgbit_uld_lro_flush,
665 #ifdef CONFIG_CHELSIO_T4_DCB
666 static struct notifier_block cxgbit_dcbevent_nb = {
667 .notifier_call = cxgbit_dcbevent_notify,
671 static int __init cxgbit_init(void)
673 cxgb4_register_uld(CXGB4_ULD_ISCSIT, &cxgbit_uld_info);
674 iscsit_register_transport(&cxgbit_transport);
676 #ifdef CONFIG_CHELSIO_T4_DCB
677 pr_info("%s dcb enabled.\n", DRV_NAME);
678 register_dcbevent_notifier(&cxgbit_dcbevent_nb);
680 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) <
681 sizeof(union cxgbit_skb_cb));
685 static void __exit cxgbit_exit(void)
687 struct cxgbit_device *cdev, *tmp;
689 #ifdef CONFIG_CHELSIO_T4_DCB
690 unregister_dcbevent_notifier(&cxgbit_dcbevent_nb);
692 mutex_lock(&cdev_list_lock);
693 list_for_each_entry_safe(cdev, tmp, &cdev_list_head, list) {
694 list_del(&cdev->list);
695 cxgbit_put_cdev(cdev);
697 mutex_unlock(&cdev_list_lock);
698 iscsit_unregister_transport(&cxgbit_transport);
699 cxgb4_unregister_uld(CXGB4_ULD_ISCSIT);
702 module_init(cxgbit_init);
703 module_exit(cxgbit_exit);
705 MODULE_DESCRIPTION("Chelsio iSCSI target offload driver");
706 MODULE_AUTHOR("Chelsio Communications");
707 MODULE_VERSION(DRV_VERSION);
708 MODULE_LICENSE("GPL");