1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for more
15 /* This driver lives in a spar partition, and registers to ethernet io
16 * channels from the visorbus driver. It creates netdev devices and
17 * forwards transmit to the IO channel and accepts rcvs from the IO
18 * Partition via the IO channel.
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
29 #include "iochannel.h"
31 #define VISORNIC_INFINITE_RSP_WAIT 0
33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36 #define MAX_BUF 163840
37 #define NAPI_WEIGHT 64
39 /* GUIDS for director channel type supported by this driver. */
40 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
41 #define VISOR_VNIC_CHANNEL_GUID \
42 GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
43 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
44 #define VISOR_VNIC_CHANNEL_GUID_STR \
45 "8cd5994d-c58e-11da-95a9-00e08161165f"
47 static struct visor_channeltype_descriptor visornic_channel_types[] = {
48 /* Note that the only channel type we expect to be reported by the
49 * bus driver is the VISOR_VNIC channel.
51 { VISOR_VNIC_CHANNEL_GUID, "ultravnic" },
54 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
55 /* FIXME XXX: This next line of code must be fixed and removed before
56 * acceptance into the 'normal' part of the kernel. It is only here as a place
57 * holder to get module autoloading functionality working for visorbus. Code
58 * must be added to scripts/mode/file2alias.c, etc., to get this working
61 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
64 unsigned long got_rcv;
65 unsigned long got_enbdisack;
66 unsigned long got_xmit_done;
67 unsigned long xmit_fail;
68 unsigned long sent_enbdis;
69 unsigned long sent_promisc;
70 unsigned long sent_post;
71 unsigned long sent_post_failed;
72 unsigned long sent_xmit;
73 unsigned long reject_count;
74 unsigned long extra_rcvbufs_sent;
77 /* struct visornic_devdata
78 * @enabled: 0 disabled 1 enabled to receive.
79 * @enab_dis_acked: NET_RCV_ENABLE/DISABLE acked by IOPART.
86 * @incarnation_id: incarnation_id lets IOPART know about
88 * @old_flags: flags as they were prior to
90 * @usage: count of users.
91 * @num_rcv_bufs: number of rcv buffers the vnic will post.
92 * @num_rcv_bufs_could_not_alloc:
93 * @num_rcvbuf_in_iovm:
94 * @alloc_failed_in_if_needed_cnt:
95 * @alloc_failed_in_repost_rtn_cnt:
96 * @max_outstanding_net_xmits: absolute max number of outstanding xmits
97 * - should never hit this.
98 * @upper_threshold_net_xmits: high water mark for calling
100 * @lower_threshold_net_xmits: high water mark for calling
101 * netif_wake_queue().
102 * @struct xmitbufhead: xmitbufhead - head of the xmit buffer list
103 * sent to the IOPART end.
104 * @server_down_complete_func:
105 * @struct timeout_reset:
106 * @struct *cmdrsp_rcv: cmdrsp_rcv is used for posting/unposting rcv
108 * @struct *xmit_cmdrsp: xmit_cmdrsp - issues NET_XMIT - only one
109 * active xmit at a time.
110 * @server_down: IOPART is down.
111 * @server_change_state: Processing SERVER_CHANGESTATE msg.
112 * @going_away: device is being torn down.
113 * @struct *eth_debugfs_dir:
116 * @interrupts_disabled:
118 * @priv_lock: spinlock to access devdata structures.
119 * @flow_control_upper_hits:
120 * @flow_control_lower_hits:
121 * @n_rcv0: # rcvs of 0 buffers.
122 * @n_rcv1: # rcvs of 1 buffers.
123 * @n_rcv2: # rcvs of 2 buffers.
124 * @n_rcvx: # rcvs of >2 buffers.
125 * @found_repost_rcvbuf_cnt: # repost_rcvbuf_cnt.
126 * @repost_found_skb_cnt: # of found the skb.
127 * @n_repost_deficit: # of lost rcv buffers.
128 * @bad_rcv_buf: # of unknown rcv skb not freed.
129 * @n_rcv_packets_not_accepted: # bogs rcv packets.
130 * @queuefullmsg_logged:
132 * @struct irq_poll_timer:
136 struct visornic_devdata {
137 unsigned short enabled;
138 unsigned short enab_dis_acked;
140 struct visor_device *dev;
141 struct net_device *netdev;
142 struct net_device_stats net_stats;
143 atomic_t interrupt_rcvd;
144 wait_queue_head_t rsp_queue;
145 struct sk_buff **rcvbuf;
147 unsigned short old_flags;
151 int num_rcv_bufs_could_not_alloc;
152 atomic_t num_rcvbuf_in_iovm;
153 unsigned long alloc_failed_in_if_needed_cnt;
154 unsigned long alloc_failed_in_repost_rtn_cnt;
156 unsigned long max_outstanding_net_xmits;
157 unsigned long upper_threshold_net_xmits;
158 unsigned long lower_threshold_net_xmits;
159 struct sk_buff_head xmitbufhead;
161 visorbus_state_complete_func server_down_complete_func;
162 struct work_struct timeout_reset;
163 struct uiscmdrsp *cmdrsp_rcv;
164 struct uiscmdrsp *xmit_cmdrsp;
166 bool server_change_state;
168 struct dentry *eth_debugfs_dir;
170 u64 interrupts_notme;
171 u64 interrupts_disabled;
173 /* spinlock to access devdata structures. */
174 spinlock_t priv_lock;
176 /* flow control counter */
177 u64 flow_control_upper_hits;
178 u64 flow_control_lower_hits;
181 unsigned long n_rcv0;
182 unsigned long n_rcv1;
183 unsigned long n_rcv2;
184 unsigned long n_rcvx;
185 unsigned long found_repost_rcvbuf_cnt;
186 unsigned long repost_found_skb_cnt;
187 unsigned long n_repost_deficit;
188 unsigned long bad_rcv_buf;
189 unsigned long n_rcv_packets_not_accepted;
191 int queuefullmsg_logged;
192 struct chanstat chstat;
193 struct timer_list irq_poll_timer;
194 struct napi_struct napi;
195 struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
198 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
199 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
200 u16 index, u16 max_pi_arr_entries,
201 struct phys_info pi_arr[])
203 u16 i, len, firstlen;
205 firstlen = PI_PAGE_SIZE - inp_off;
206 if (inp_len <= firstlen) {
207 /* The input entry spans only one page - add as is. */
208 if (index >= max_pi_arr_entries)
210 pi_arr[index].pi_pfn = inp_pfn;
211 pi_arr[index].pi_off = (u16)inp_off;
212 pi_arr[index].pi_len = (u16)inp_len;
216 /* This entry spans multiple pages. */
217 for (len = inp_len, i = 0; len;
218 len -= pi_arr[index + i].pi_len, i++) {
219 if (index + i >= max_pi_arr_entries)
221 pi_arr[index + i].pi_pfn = inp_pfn + i;
223 pi_arr[index].pi_off = inp_off;
224 pi_arr[index].pi_len = firstlen;
226 pi_arr[index + i].pi_off = 0;
227 pi_arr[index + i].pi_len = min_t(u16, len,
234 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
235 * array that the IOPART understands
236 * @skb: Skbuff that we are pulling the frags from.
237 * @firstfraglen: Length of first fragment in skb.
238 * @frags_max: Max len of frags array.
239 * @frags: Frags array filled in on output.
241 * Return: Positive integer indicating number of entries filled in frags on
242 * success, negative integer on error.
244 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
245 unsigned int firstfraglen,
246 unsigned int frags_max,
247 struct phys_info frags[])
249 unsigned int count = 0, frag, size, offset = 0, numfrags;
250 unsigned int total_count;
252 numfrags = skb_shinfo(skb)->nr_frags;
254 /* Compute the number of fragments this skb has, and if its more than
255 * frag array can hold, linearize the skb
257 total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
258 if (firstfraglen % PI_PAGE_SIZE)
261 if (total_count > frags_max) {
262 if (skb_linearize(skb))
264 numfrags = skb_shinfo(skb)->nr_frags;
268 while (firstfraglen) {
269 if (count == frags_max)
272 frags[count].pi_pfn =
273 page_to_pfn(virt_to_page(skb->data + offset));
274 frags[count].pi_off =
275 (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
276 size = min_t(unsigned int, firstfraglen,
277 PI_PAGE_SIZE - frags[count].pi_off);
279 /* can take smallest of firstfraglen (what's left) OR
280 * bytes left in the page
282 frags[count].pi_len = size;
283 firstfraglen -= size;
288 if ((count + numfrags) > frags_max)
291 for (frag = 0; frag < numfrags; frag++) {
292 count = add_physinfo_entries(page_to_pfn(
293 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
294 skb_shinfo(skb)->frags[frag].page_offset,
295 skb_shinfo(skb)->frags[frag].size, count,
297 /* add_physinfo_entries only returns
298 * zero if the frags array is out of room
299 * That should never happen because we
300 * fail above, if count+numfrags > frags_max.
306 if (skb_shinfo(skb)->frag_list) {
307 struct sk_buff *skbinlist;
310 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
311 skbinlist = skbinlist->next) {
312 c = visor_copy_fragsinfo_from_skb(skbinlist,
325 static ssize_t enable_ints_write(struct file *file,
326 const char __user *buffer,
327 size_t count, loff_t *ppos)
329 /* Don't want to break ABI here by having a debugfs
330 * file that no longer exists or is writable, so
331 * lets just make this a vestigual function
336 static const struct file_operations debugfs_enable_ints_fops = {
337 .write = enable_ints_write,
340 /* visornic_serverdown_complete - pause device following IOPART going down
341 * @devdata: Device managed by IOPART.
343 * The IO partition has gone down, and we need to do some cleanup for when it
344 * comes back. Treat the IO partition as the link being down.
346 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
348 struct net_device *netdev = devdata->netdev;
350 /* Stop polling for interrupts */
351 del_timer_sync(&devdata->irq_poll_timer);
357 atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
358 devdata->chstat.sent_xmit = 0;
359 devdata->chstat.got_xmit_done = 0;
361 if (devdata->server_down_complete_func)
362 (*devdata->server_down_complete_func)(devdata->dev, 0);
364 devdata->server_down = true;
365 devdata->server_change_state = false;
366 devdata->server_down_complete_func = NULL;
369 /* visornic_serverdown - Command has notified us that IOPART is down
370 * @devdata: Device managed by IOPART.
371 * @complete_func: Function to call when finished.
373 * Schedule the work needed to handle the server down request. Make sure we
374 * haven't already handled the server change state event.
376 * Return: 0 if we scheduled the work, negative integer on error.
378 static int visornic_serverdown(struct visornic_devdata *devdata,
379 visorbus_state_complete_func complete_func)
384 spin_lock_irqsave(&devdata->priv_lock, flags);
385 if (devdata->server_change_state) {
386 dev_dbg(&devdata->dev->device, "%s changing state\n",
391 if (devdata->server_down) {
392 dev_dbg(&devdata->dev->device, "%s already down\n",
397 if (devdata->going_away) {
398 dev_dbg(&devdata->dev->device,
399 "%s aborting because device removal pending\n",
404 devdata->server_change_state = true;
405 devdata->server_down_complete_func = complete_func;
406 spin_unlock_irqrestore(&devdata->priv_lock, flags);
408 visornic_serverdown_complete(devdata);
412 spin_unlock_irqrestore(&devdata->priv_lock, flags);
416 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
417 * @netdev: Network adapter the rcv bufs are attached too.
419 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
420 * so that it can write rcv data into our memory space.
422 * Return: Pointer to sk_buff.
424 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
428 /* NOTE: the first fragment in each rcv buffer is pointed to by
429 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
430 * in length, so the first frag is large enough to hold 1514.
432 skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
436 /* current value of mtu doesn't come into play here; large
437 * packets will just end up using multiple rcv buffers all of
440 skb->len = RCVPOST_BUF_SIZE;
441 /* alloc_skb already zeroes it out for clarification. */
446 /* post_skb - post a skb to the IO Partition
447 * @cmdrsp: Cmdrsp packet to be send to the IO Partition.
448 * @devdata: visornic_devdata to post the skb to.
449 * @skb: Skb to give to the IO partition.
451 * Return: 0 on success, negative integer on error.
453 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
458 cmdrsp->net.buf = skb;
459 cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
460 cmdrsp->net.rcvpost.frag.pi_off =
461 (unsigned long)skb->data & PI_PAGE_MASK;
462 cmdrsp->net.rcvpost.frag.pi_len = skb->len;
463 cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
465 if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
468 cmdrsp->net.type = NET_RCV_POST;
469 cmdrsp->cmdtype = CMD_NET_TYPE;
470 err = visorchannel_signalinsert(devdata->dev->visorchannel,
474 devdata->chstat.sent_post_failed++;
478 atomic_inc(&devdata->num_rcvbuf_in_iovm);
479 devdata->chstat.sent_post++;
483 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
484 * @netdev: Netdevice we are enabling/disabling, used as context return value.
485 * @state: Enable = 1/disable = 0.
486 * @devdata: Visornic device we are enabling/disabling.
488 * Send the enable/disable message to the IO Partition.
490 * Return: 0 on success, negative integer on error.
492 static int send_enbdis(struct net_device *netdev, int state,
493 struct visornic_devdata *devdata)
497 devdata->cmdrsp_rcv->net.enbdis.enable = state;
498 devdata->cmdrsp_rcv->net.enbdis.context = netdev;
499 devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
500 devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
501 err = visorchannel_signalinsert(devdata->dev->visorchannel,
503 devdata->cmdrsp_rcv);
506 devdata->chstat.sent_enbdis++;
510 /* visornic_disable_with_timeout - disable network adapter
511 * @netdev: netdevice to disable.
512 * @timeout: Timeout to wait for disable.
514 * Disable the network adapter and inform the IO Partition that we are disabled.
515 * Reclaim memory from rcv bufs.
517 * Return: 0 on success, negative integer on failure of IO Partition responding.
519 static int visornic_disable_with_timeout(struct net_device *netdev,
522 struct visornic_devdata *devdata = netdev_priv(netdev);
528 /* send a msg telling the other end we are stopping incoming pkts */
529 spin_lock_irqsave(&devdata->priv_lock, flags);
530 devdata->enabled = 0;
531 /* must wait for ack */
532 devdata->enab_dis_acked = 0;
533 spin_unlock_irqrestore(&devdata->priv_lock, flags);
535 /* send disable and wait for ack -- don't hold lock when sending
536 * disable because if the queue is full, insert might sleep.
537 * If an error occurs, don't wait for the timeout.
539 err = send_enbdis(netdev, 0, devdata);
543 /* wait for ack to arrive before we try to free rcv buffers
544 * NOTE: the other end automatically unposts the rcv buffers when
545 * when it gets a disable.
547 spin_lock_irqsave(&devdata->priv_lock, flags);
548 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
550 if (devdata->enab_dis_acked)
552 if (devdata->server_down || devdata->server_change_state) {
553 dev_dbg(&netdev->dev, "%s server went away\n",
557 set_current_state(TASK_INTERRUPTIBLE);
558 spin_unlock_irqrestore(&devdata->priv_lock, flags);
559 wait += schedule_timeout(msecs_to_jiffies(10));
560 spin_lock_irqsave(&devdata->priv_lock, flags);
563 /* Wait for usage to go to 1 (no other users) before freeing
566 if (atomic_read(&devdata->usage) > 1) {
568 set_current_state(TASK_INTERRUPTIBLE);
569 spin_unlock_irqrestore(&devdata->priv_lock, flags);
570 schedule_timeout(msecs_to_jiffies(10));
571 spin_lock_irqsave(&devdata->priv_lock, flags);
572 if (atomic_read(&devdata->usage))
576 /* we've set enabled to 0, so we can give up the lock. */
577 spin_unlock_irqrestore(&devdata->priv_lock, flags);
579 /* stop the transmit queue so nothing more can be transmitted */
580 netif_stop_queue(netdev);
582 napi_disable(&devdata->napi);
584 skb_queue_purge(&devdata->xmitbufhead);
586 /* Free rcv buffers - other end has automatically unposed them on
589 for (i = 0; i < devdata->num_rcv_bufs; i++) {
590 if (devdata->rcvbuf[i]) {
591 kfree_skb(devdata->rcvbuf[i]);
592 devdata->rcvbuf[i] = NULL;
599 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
600 * @netdev: struct netdevice.
601 * @devdata: visornic_devdata.
603 * Allocate rcv buffers and post them to the IO Partition.
605 * Return: 0 on success, negative integer on failure.
607 static int init_rcv_bufs(struct net_device *netdev,
608 struct visornic_devdata *devdata)
610 int i, j, count, err;
612 /* allocate fixed number of receive buffers to post to uisnic
613 * post receive buffers after we've allocated a required amount
615 for (i = 0; i < devdata->num_rcv_bufs; i++) {
616 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
617 /* if we failed to allocate one let us stop */
618 if (!devdata->rcvbuf[i])
621 /* couldn't even allocate one -- bail out */
626 /* Ensure we can alloc 2/3rd of the requested number of buffers.
627 * 2/3 is an arbitrary choice; used also in ndis init.c
629 if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
630 /* free receive buffers we did alloc and then bail out */
631 for (i = 0; i < count; i++) {
632 kfree_skb(devdata->rcvbuf[i]);
633 devdata->rcvbuf[i] = NULL;
638 /* post receive buffers to receive incoming input - without holding
639 * lock - we've not enabled nor started the queue so there shouldn't
640 * be any rcv or xmit activity
642 for (i = 0; i < count; i++) {
643 err = post_skb(devdata->cmdrsp_rcv, devdata,
649 * If we posted at least one skb, we should return success,
650 * but need to free the resources that we have not successfully
653 for (j = i; j < count; j++) {
654 kfree_skb(devdata->rcvbuf[j]);
655 devdata->rcvbuf[j] = NULL;
665 /* visornic_enable_with_timeout - send enable to IO Partition
666 * @netdev: struct net_device.
667 * @timeout: Time to wait for the ACK from the enable.
669 * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
670 * defined in msecs (timeout of 0 specifies infinite wait).
672 * Return: 0 on success, negative integer on failure.
674 static int visornic_enable_with_timeout(struct net_device *netdev,
678 struct visornic_devdata *devdata = netdev_priv(netdev);
682 napi_enable(&devdata->napi);
684 /* NOTE: the other end automatically unposts the rcv buffers when it
687 err = init_rcv_bufs(netdev, devdata);
689 dev_err(&netdev->dev,
690 "%s failed to init rcv bufs\n", __func__);
694 spin_lock_irqsave(&devdata->priv_lock, flags);
695 devdata->enabled = 1;
696 devdata->enab_dis_acked = 0;
698 /* now we're ready, let's send an ENB to uisnic but until we get
699 * an ACK back from uisnic, we'll drop the packets
701 devdata->n_rcv_packets_not_accepted = 0;
702 spin_unlock_irqrestore(&devdata->priv_lock, flags);
704 /* send enable and wait for ack -- don't hold lock when sending enable
705 * because if the queue is full, insert might sleep. If an error
708 err = send_enbdis(netdev, 1, devdata);
712 spin_lock_irqsave(&devdata->priv_lock, flags);
713 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
715 if (devdata->enab_dis_acked)
717 if (devdata->server_down || devdata->server_change_state) {
718 dev_dbg(&netdev->dev, "%s server went away\n",
722 set_current_state(TASK_INTERRUPTIBLE);
723 spin_unlock_irqrestore(&devdata->priv_lock, flags);
724 wait += schedule_timeout(msecs_to_jiffies(10));
725 spin_lock_irqsave(&devdata->priv_lock, flags);
728 spin_unlock_irqrestore(&devdata->priv_lock, flags);
730 if (!devdata->enab_dis_acked) {
731 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
735 netif_start_queue(netdev);
739 /* visornic_timeout_reset - handle xmit timeout resets
740 * @work: Work item that scheduled the work.
742 * Transmit timeouts are typically handled by resetting the device for our
743 * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
744 * respond, we will trigger a serverdown.
746 static void visornic_timeout_reset(struct work_struct *work)
748 struct visornic_devdata *devdata;
749 struct net_device *netdev;
752 devdata = container_of(work, struct visornic_devdata, timeout_reset);
753 netdev = devdata->netdev;
756 if (!netif_running(netdev)) {
761 response = visornic_disable_with_timeout(netdev,
762 VISORNIC_INFINITE_RSP_WAIT);
764 goto call_serverdown;
766 response = visornic_enable_with_timeout(netdev,
767 VISORNIC_INFINITE_RSP_WAIT);
769 goto call_serverdown;
776 visornic_serverdown(devdata, NULL);
780 /* visornic_open - enable the visornic device and mark the queue started
781 * @netdev: netdevice to start.
783 * Enable the device and start the transmit queue.
785 * Return: 0 on success.
787 static int visornic_open(struct net_device *netdev)
789 visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
793 /* visornic_close - disables the visornic device and stops the queues
794 * @netdev: netdevice to stop.
796 * Disable the device and stop the transmit queue.
798 * Return 0 on success.
800 static int visornic_close(struct net_device *netdev)
802 visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
806 /* devdata_xmits_outstanding - compute outstanding xmits
807 * @devdata: visornic_devdata for device
809 * Return: Long integer representing the number of outstanding xmits.
811 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
813 if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
814 return devdata->chstat.sent_xmit -
815 devdata->chstat.got_xmit_done;
816 return (ULONG_MAX - devdata->chstat.got_xmit_done
817 + devdata->chstat.sent_xmit + 1);
820 /* vnic_hit_high_watermark
821 * @devdata: Indicates visornic device we are checking.
822 * @high_watermark: Max num of unacked xmits we will tolerate before we will
825 * Return: True iff the number of unacked xmits sent to the IO Partition is >=
826 * high_watermark. False otherwise.
828 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
829 ulong high_watermark)
831 return (devdata_xmits_outstanding(devdata) >= high_watermark);
834 /* vnic_hit_low_watermark
835 * @devdata: Indicates visornic device we are checking.
836 * @low_watermark: We will wait until the num of unacked xmits drops to this
837 * value or lower before we start transmitting again.
839 * Return: True iff the number of unacked xmits sent to the IO Partition is <=
842 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
845 return (devdata_xmits_outstanding(devdata) <= low_watermark);
848 /* visornic_xmit - send a packet to the IO Partition
849 * @skb: Packet to be sent.
850 * @netdev: Net device the packet is being sent from.
852 * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
853 * the XMIT command to the IO Partition for processing. This function is
854 * protected from concurrent calls by a spinlock xmit_lock in the net_device
855 * struct. As soon as the function returns, it can be called again.
857 * Return: NETDEV_TX_OK.
859 static int visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
861 struct visornic_devdata *devdata;
862 int len, firstfraglen, padlen;
863 struct uiscmdrsp *cmdrsp = NULL;
867 devdata = netdev_priv(netdev);
868 spin_lock_irqsave(&devdata->priv_lock, flags);
870 if (netif_queue_stopped(netdev) || devdata->server_down ||
871 devdata->server_change_state) {
872 spin_unlock_irqrestore(&devdata->priv_lock, flags);
874 dev_dbg(&netdev->dev,
875 "%s busy - queue stopped\n", __func__);
880 /* sk_buff struct is used to host network data throughout all the
881 * linux network subsystems
885 /* skb->len is the FULL length of data (including fragmentary portion)
886 * skb->data_len is the length of the fragment portion in frags
887 * skb->len - skb->data_len is size of the 1st fragment in skb->data
888 * calculate the length of the first fragment that skb->data is
891 firstfraglen = skb->len - skb->data_len;
892 if (firstfraglen < ETH_HLEN) {
893 spin_unlock_irqrestore(&devdata->priv_lock, flags);
895 dev_err(&netdev->dev,
896 "%s busy - first frag too small (%d)\n",
897 __func__, firstfraglen);
902 if ((len < ETH_MIN_PACKET_SIZE) &&
903 ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
904 /* pad the packet out to minimum size */
905 padlen = ETH_MIN_PACKET_SIZE - len;
906 memset(&skb->data[len], 0, padlen);
910 firstfraglen += padlen;
913 cmdrsp = devdata->xmit_cmdrsp;
915 memset(cmdrsp, 0, SIZEOF_CMDRSP);
916 cmdrsp->net.type = NET_XMIT;
917 cmdrsp->cmdtype = CMD_NET_TYPE;
919 /* save the pointer to skb -- we'll need it for completion */
920 cmdrsp->net.buf = skb;
922 if (vnic_hit_high_watermark(devdata,
923 devdata->max_outstanding_net_xmits)) {
924 /* extra NET_XMITs queued over to IOVM - need to wait */
925 devdata->chstat.reject_count++;
926 if (!devdata->queuefullmsg_logged &&
927 ((devdata->chstat.reject_count & 0x3ff) == 1))
928 devdata->queuefullmsg_logged = 1;
929 netif_stop_queue(netdev);
930 spin_unlock_irqrestore(&devdata->priv_lock, flags);
932 dev_dbg(&netdev->dev,
933 "%s busy - waiting for iovm to catch up\n",
938 if (devdata->queuefullmsg_logged)
939 devdata->queuefullmsg_logged = 0;
941 if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
942 cmdrsp->net.xmt.lincsum.valid = 1;
943 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
944 if (skb_transport_header(skb) > skb->data) {
945 cmdrsp->net.xmt.lincsum.hrawoff =
946 skb_transport_header(skb) - skb->data;
947 cmdrsp->net.xmt.lincsum.hrawoff = 1;
949 if (skb_network_header(skb) > skb->data) {
950 cmdrsp->net.xmt.lincsum.nhrawoff =
951 skb_network_header(skb) - skb->data;
952 cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
954 cmdrsp->net.xmt.lincsum.csum = skb->csum;
956 cmdrsp->net.xmt.lincsum.valid = 0;
959 /* save off the length of the entire data packet */
960 cmdrsp->net.xmt.len = len;
962 /* copy ethernet header from first frag into ocmdrsp
963 * - everything else will be pass in frags & DMA'ed
965 memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
967 /* copy frags info - from skb->data we need to only provide access
970 cmdrsp->net.xmt.num_frags =
971 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
973 cmdrsp->net.xmt.frags);
974 if (cmdrsp->net.xmt.num_frags < 0) {
975 spin_unlock_irqrestore(&devdata->priv_lock, flags);
977 dev_err(&netdev->dev,
978 "%s busy - copy frags failed\n", __func__);
983 err = visorchannel_signalinsert(devdata->dev->visorchannel,
984 IOCHAN_TO_IOPART, cmdrsp);
986 netif_stop_queue(netdev);
987 spin_unlock_irqrestore(&devdata->priv_lock, flags);
989 dev_dbg(&netdev->dev,
990 "%s busy - signalinsert failed\n", __func__);
995 /* Track the skbs that have been sent to the IOVM for XMIT */
996 skb_queue_head(&devdata->xmitbufhead, skb);
998 /* update xmt stats */
999 devdata->net_stats.tx_packets++;
1000 devdata->net_stats.tx_bytes += skb->len;
1001 devdata->chstat.sent_xmit++;
1003 /* check if we have hit the high watermark for netif_stop_queue() */
1004 if (vnic_hit_high_watermark(devdata,
1005 devdata->upper_threshold_net_xmits)) {
1006 /* extra NET_XMITs queued over to IOVM - need to wait */
1007 /* stop queue - call netif_wake_queue() after lower threshold */
1008 netif_stop_queue(netdev);
1009 dev_dbg(&netdev->dev,
1010 "%s busy - invoking iovm flow control\n",
1012 devdata->flow_control_upper_hits++;
1014 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1016 /* skb will be freed when we get back NET_XMIT_DONE */
1017 return NETDEV_TX_OK;
1020 /* visornic_get_stats - returns net_stats of the visornic device
1021 * @netdev: netdevice.
1023 * Return: Pointer to the net_device_stats struct for the device.
1025 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1027 struct visornic_devdata *devdata = netdev_priv(netdev);
1029 return &devdata->net_stats;
1032 /* visornic_change_mtu - changes mtu of device
1033 * @netdev: netdevice.
1034 * @new_mtu: Value of new mtu.
1036 * The device's MTU cannot be changed by system; it must be changed via a
1037 * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1038 * for everything to work. Currently not supported.
1042 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1047 /* visornic_set_multi - set visornic device flags
1048 * @netdev: netdevice.
1050 * The only flag we currently support is IFF_PROMISC.
1052 static void visornic_set_multi(struct net_device *netdev)
1054 struct uiscmdrsp *cmdrsp;
1055 struct visornic_devdata *devdata = netdev_priv(netdev);
1058 if (devdata->old_flags == netdev->flags)
1061 if ((netdev->flags & IFF_PROMISC) ==
1062 (devdata->old_flags & IFF_PROMISC))
1063 goto out_save_flags;
1065 cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1068 cmdrsp->cmdtype = CMD_NET_TYPE;
1069 cmdrsp->net.type = NET_RCV_PROMISC;
1070 cmdrsp->net.enbdis.context = netdev;
1071 cmdrsp->net.enbdis.enable =
1072 netdev->flags & IFF_PROMISC;
1073 err = visorchannel_signalinsert(devdata->dev->visorchannel,
1081 devdata->old_flags = netdev->flags;
1084 /* visornic_xmit_timeout - request to timeout the xmit
1085 * @netdev: netdevice.
1087 * Queue the work and return. Make sure we have not already been informed that
1088 * the IO Partition is gone; if so, we will have already timed-out the xmits.
1090 static void visornic_xmit_timeout(struct net_device *netdev)
1092 struct visornic_devdata *devdata = netdev_priv(netdev);
1093 unsigned long flags;
1095 spin_lock_irqsave(&devdata->priv_lock, flags);
1096 if (devdata->going_away) {
1097 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1098 dev_dbg(&devdata->dev->device,
1099 "%s aborting because device removal pending\n",
1104 /* Ensure that a ServerDown message hasn't been received */
1105 if (!devdata->enabled ||
1106 (devdata->server_down && !devdata->server_change_state)) {
1107 dev_dbg(&netdev->dev, "%s no processing\n",
1109 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1112 schedule_work(&devdata->timeout_reset);
1113 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1116 /* repost_return - repost rcv bufs that have come back
1117 * @cmdrsp: IO channel command struct to post.
1118 * @devdata: Visornic devdata for the device.
1119 * @skb: Socket buffer.
1120 * @netdev: netdevice.
1122 * Repost rcv buffers that have been returned to us when we are finished
1125 * Return: 0 for success, negative integer on error.
1127 static int repost_return(struct uiscmdrsp *cmdrsp,
1128 struct visornic_devdata *devdata,
1129 struct sk_buff *skb, struct net_device *netdev)
1131 struct net_pkt_rcv copy;
1132 int i = 0, cc, numreposted;
1136 copy = cmdrsp->net.rcv;
1137 switch (copy.numrcvbufs) {
1151 for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1152 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1153 if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1156 if ((skb) && devdata->rcvbuf[i] == skb) {
1157 devdata->found_repost_rcvbuf_cnt++;
1159 devdata->repost_found_skb_cnt++;
1161 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1162 if (!devdata->rcvbuf[i]) {
1163 devdata->num_rcv_bufs_could_not_alloc++;
1164 devdata->alloc_failed_in_repost_rtn_cnt++;
1168 status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1170 kfree_skb(devdata->rcvbuf[i]);
1171 devdata->rcvbuf[i] = NULL;
1178 if (numreposted != copy.numrcvbufs) {
1179 devdata->n_repost_deficit++;
1187 devdata->bad_rcv_buf++;
1193 /* visornic_rx - handle receive packets coming back from IO Partition
1194 * @cmdrsp: Receive packet returned from IO Partition.
1196 * Got a receive packet back from the IO Partition; handle it and send it up
1199 * Return: 1 iff an skb was received, otherwise 0.
1201 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1203 struct visornic_devdata *devdata;
1204 struct sk_buff *skb, *prev, *curr;
1205 struct net_device *netdev;
1206 int cc, currsize, off;
1208 unsigned long flags;
1210 /* post new rcv buf to the other end using the cmdrsp we have at hand
1211 * post it without holding lock - but we'll use the signal lock to
1212 * synchronize the queue insert the cmdrsp that contains the net.rcv
1213 * is the one we are using to repost, so copy the info we need from it.
1215 skb = cmdrsp->net.buf;
1218 devdata = netdev_priv(netdev);
1220 spin_lock_irqsave(&devdata->priv_lock, flags);
1221 atomic_dec(&devdata->num_rcvbuf_in_iovm);
1223 /* set length to how much was ACTUALLY received -
1224 * NOTE: rcv_done_len includes actual length of data rcvd
1227 skb->len = cmdrsp->net.rcv.rcv_done_len;
1229 /* update rcv stats - call it with priv_lock held */
1230 devdata->net_stats.rx_packets++;
1231 devdata->net_stats.rx_bytes += skb->len;
1233 /* test enabled while holding lock */
1234 if (!(devdata->enabled && devdata->enab_dis_acked)) {
1235 /* don't process it unless we're in enable mode and until
1236 * we've gotten an ACK saying the other end got our RCV enable
1238 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1239 repost_return(cmdrsp, devdata, skb, netdev);
1243 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1245 /* when skb was allocated, skb->dev, skb->data, skb->len and
1246 * skb->data_len were setup. AND, data has already put into the
1247 * skb (both first frag and in frags pages)
1248 * NOTE: firstfragslen is the amount of data in skb->data and that
1249 * which is not in nr_frags or frag_list. This is now simply
1250 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1251 * firstfrag & set data_len to show rest see if we have to chain
1254 /* do PRECAUTIONARY check */
1255 if (skb->len > RCVPOST_BUF_SIZE) {
1256 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1257 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1258 dev_err(&devdata->netdev->dev,
1259 "repost_return failed");
1262 /* length rcvd is greater than firstfrag in this skb rcv buf */
1263 /* amount in skb->data */
1264 skb->tail += RCVPOST_BUF_SIZE;
1265 /* amount that will be in frag_list */
1266 skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1268 /* data fits in this skb - no chaining - do
1269 * PRECAUTIONARY check
1272 if (cmdrsp->net.rcv.numrcvbufs != 1) {
1273 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1274 dev_err(&devdata->netdev->dev,
1275 "repost_return failed");
1278 skb->tail += skb->len;
1279 /* nothing rcvd in frag_list */
1282 off = skb_tail_pointer(skb) - skb->data;
1284 /* amount we bumped tail by in the head skb
1285 * it is used to calculate the size of each chained skb below
1286 * it is also used to index into bufline to continue the copy
1287 * (for chansocktwopc)
1288 * if necessary chain the rcv skbs together.
1289 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1290 * chain the rest to that one.
1291 * - do PRECAUTIONARY check
1293 if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1294 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1295 dev_err(&devdata->netdev->dev, "repost_return failed");
1299 if (cmdrsp->net.rcv.numrcvbufs > 1) {
1300 /* chain the various rcv buffers into the skb's frag_list. */
1301 /* Note: off was initialized above */
1302 for (cc = 1, prev = NULL;
1303 cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1304 curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1306 /* start of list- set head */
1308 skb_shinfo(skb)->frag_list = curr;
1313 /* should we set skb->len and skb->data_len for each
1314 * buffer being chained??? can't hurt!
1316 currsize = min(skb->len - off,
1317 (unsigned int)RCVPOST_BUF_SIZE);
1318 curr->len = currsize;
1319 curr->tail += currsize;
1323 /* assert skb->len == off */
1324 if (skb->len != off) {
1325 netdev_err(devdata->netdev,
1326 "something wrong; skb->len:%d != off:%d\n",
1331 /* set up packet's protocol type using ethernet header - this
1332 * sets up skb->pkt_type & it also PULLS out the eth header
1334 skb->protocol = eth_type_trans(skb, netdev);
1337 skb->ip_summed = CHECKSUM_NONE;
1340 /* accept all packets */
1341 if (netdev->flags & IFF_PROMISC)
1343 if (skb->pkt_type == PACKET_BROADCAST) {
1344 /* accept all broadcast packets */
1345 if (netdev->flags & IFF_BROADCAST)
1347 } else if (skb->pkt_type == PACKET_MULTICAST) {
1348 if ((netdev->flags & IFF_MULTICAST) &&
1349 (netdev_mc_count(netdev))) {
1350 struct netdev_hw_addr *ha;
1353 /* only accept multicast packets that we can
1354 * find in our multicast address list
1356 netdev_for_each_mc_addr(ha, netdev) {
1357 if (ether_addr_equal(eth->h_dest,
1363 /* accept pkt, dest matches a multicast addr */
1367 /* accept packet, h_dest must match vnic mac address */
1368 } else if (skb->pkt_type == PACKET_HOST) {
1370 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1371 /* something is not right */
1372 dev_err(&devdata->netdev->dev,
1373 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1374 netdev->name, eth->h_dest, netdev->dev_addr);
1376 /* drop packet - don't forward it up to OS */
1377 devdata->n_rcv_packets_not_accepted++;
1378 repost_return(cmdrsp, devdata, skb, netdev);
1382 netif_receive_skb(skb);
1383 /* netif_rx returns various values, but "in practice most drivers
1384 * ignore the return value
1388 /* whether the packet got dropped or handled, the skb is freed by
1389 * kernel code, so we shouldn't free it. but we should repost a
1392 repost_return(cmdrsp, devdata, skb, netdev);
1396 /* devdata_initialize - initialize devdata structure
1397 * @devdata: visornic_devdata structure to initialize.
1398 * @dev: visorbus_device it belongs to.
1400 * Setup initial values for the visornic, based on channel and default values.
1402 * Return: A pointer to the devdata structure.
1404 static struct visornic_devdata *devdata_initialize(
1405 struct visornic_devdata *devdata,
1406 struct visor_device *dev)
1409 devdata->incarnation_id = get_jiffies_64();
1413 /* devdata_release - free up references in devdata
1414 * @devdata: Struct to clean up.
1416 static void devdata_release(struct visornic_devdata *devdata)
1418 kfree(devdata->rcvbuf);
1419 kfree(devdata->cmdrsp_rcv);
1420 kfree(devdata->xmit_cmdrsp);
1423 static const struct net_device_ops visornic_dev_ops = {
1424 .ndo_open = visornic_open,
1425 .ndo_stop = visornic_close,
1426 .ndo_start_xmit = visornic_xmit,
1427 .ndo_get_stats = visornic_get_stats,
1428 .ndo_change_mtu = visornic_change_mtu,
1429 .ndo_tx_timeout = visornic_xmit_timeout,
1430 .ndo_set_rx_mode = visornic_set_multi,
1434 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1435 size_t len, loff_t *offset)
1437 ssize_t bytes_read = 0;
1439 struct visornic_devdata *devdata;
1440 struct net_device *dev;
1445 vbuf = kzalloc(len, GFP_KERNEL);
1449 /* for each vnic channel dump out channel specific data */
1451 for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1452 /* Only consider netdevs that are visornic, and are open */
1453 if ((dev->netdev_ops != &visornic_dev_ops) ||
1454 (!netif_queue_stopped(dev)))
1457 devdata = netdev_priv(dev);
1458 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459 "netdev = %s (0x%p), MAC Addr %pM\n",
1463 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1464 "VisorNic Dev Info = 0x%p\n", devdata);
1465 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466 " num_rcv_bufs = %d\n",
1467 devdata->num_rcv_bufs);
1468 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469 " max_outstanding_next_xmits = %lu\n",
1470 devdata->max_outstanding_net_xmits);
1471 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472 " upper_threshold_net_xmits = %lu\n",
1473 devdata->upper_threshold_net_xmits);
1474 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475 " lower_threshold_net_xmits = %lu\n",
1476 devdata->lower_threshold_net_xmits);
1477 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1478 " queuefullmsg_logged = %d\n",
1479 devdata->queuefullmsg_logged);
1480 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1481 " chstat.got_rcv = %lu\n",
1482 devdata->chstat.got_rcv);
1483 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1484 " chstat.got_enbdisack = %lu\n",
1485 devdata->chstat.got_enbdisack);
1486 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1487 " chstat.got_xmit_done = %lu\n",
1488 devdata->chstat.got_xmit_done);
1489 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1490 " chstat.xmit_fail = %lu\n",
1491 devdata->chstat.xmit_fail);
1492 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1493 " chstat.sent_enbdis = %lu\n",
1494 devdata->chstat.sent_enbdis);
1495 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1496 " chstat.sent_promisc = %lu\n",
1497 devdata->chstat.sent_promisc);
1498 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1499 " chstat.sent_post = %lu\n",
1500 devdata->chstat.sent_post);
1501 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1502 " chstat.sent_post_failed = %lu\n",
1503 devdata->chstat.sent_post_failed);
1504 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505 " chstat.sent_xmit = %lu\n",
1506 devdata->chstat.sent_xmit);
1507 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1508 " chstat.reject_count = %lu\n",
1509 devdata->chstat.reject_count);
1510 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511 " chstat.extra_rcvbufs_sent = %lu\n",
1512 devdata->chstat.extra_rcvbufs_sent);
1513 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1514 " n_rcv0 = %lu\n", devdata->n_rcv0);
1515 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516 " n_rcv1 = %lu\n", devdata->n_rcv1);
1517 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518 " n_rcv2 = %lu\n", devdata->n_rcv2);
1519 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520 " n_rcvx = %lu\n", devdata->n_rcvx);
1521 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522 " num_rcvbuf_in_iovm = %d\n",
1523 atomic_read(&devdata->num_rcvbuf_in_iovm));
1524 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1525 " alloc_failed_in_if_needed_cnt = %lu\n",
1526 devdata->alloc_failed_in_if_needed_cnt);
1527 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528 " alloc_failed_in_repost_rtn_cnt = %lu\n",
1529 devdata->alloc_failed_in_repost_rtn_cnt);
1530 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531 * " inner_loop_limit_reached_cnt = %lu\n",
1532 * devdata->inner_loop_limit_reached_cnt);
1534 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1535 " found_repost_rcvbuf_cnt = %lu\n",
1536 devdata->found_repost_rcvbuf_cnt);
1537 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1538 " repost_found_skb_cnt = %lu\n",
1539 devdata->repost_found_skb_cnt);
1540 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1541 " n_repost_deficit = %lu\n",
1542 devdata->n_repost_deficit);
1543 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1544 " bad_rcv_buf = %lu\n",
1545 devdata->bad_rcv_buf);
1546 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1547 " n_rcv_packets_not_accepted = %lu\n",
1548 devdata->n_rcv_packets_not_accepted);
1549 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1550 " interrupts_rcvd = %llu\n",
1551 devdata->interrupts_rcvd);
1552 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1553 " interrupts_notme = %llu\n",
1554 devdata->interrupts_notme);
1555 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1556 " interrupts_disabled = %llu\n",
1557 devdata->interrupts_disabled);
1558 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1559 " busy_cnt = %llu\n",
1561 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1562 " flow_control_upper_hits = %llu\n",
1563 devdata->flow_control_upper_hits);
1564 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1565 " flow_control_lower_hits = %llu\n",
1566 devdata->flow_control_lower_hits);
1567 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1568 " netif_queue = %s\n",
1569 netif_queue_stopped(devdata->netdev) ?
1570 "stopped" : "running");
1571 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1572 " xmits_outstanding = %lu\n",
1573 devdata_xmits_outstanding(devdata));
1576 bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1581 static struct dentry *visornic_debugfs_dir;
1582 static const struct file_operations debugfs_info_fops = {
1583 .read = info_debugfs_read,
1586 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1587 * @devdata: Visornic device.
1589 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1592 struct net_device *netdev;
1593 struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1594 int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1597 /* don't do this until vnic is marked ready */
1598 if (!(devdata->enabled && devdata->enab_dis_acked))
1601 netdev = devdata->netdev;
1602 rcv_bufs_allocated = 0;
1603 /* this code is trying to prevent getting stuck here forever,
1604 * but still retry it if you cant allocate them all this time.
1606 cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1607 while (cur_num_rcv_bufs_to_alloc > 0) {
1608 cur_num_rcv_bufs_to_alloc--;
1609 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1610 if (devdata->rcvbuf[i])
1612 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1613 if (!devdata->rcvbuf[i]) {
1614 devdata->alloc_failed_in_if_needed_cnt++;
1617 rcv_bufs_allocated++;
1618 err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1620 kfree_skb(devdata->rcvbuf[i]);
1621 devdata->rcvbuf[i] = NULL;
1624 devdata->chstat.extra_rcvbufs_sent++;
1627 devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1630 /* drain_resp_queue - drains and ignores all messages from the resp queue
1631 * @cmdrsp: IO channel command response message.
1632 * @devdata: Visornic device to drain.
1634 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1635 struct visornic_devdata *devdata)
1637 while (!visorchannel_signalremove(devdata->dev->visorchannel,
1643 /* service_resp_queue - drain the response queue
1644 * @cmdrsp: IO channel command response message.
1645 * @devdata: Visornic device to drain.
1649 * Drain the response queue of any responses from the IO Partition. Process the
1650 * responses as we get them.
1652 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1653 struct visornic_devdata *devdata,
1654 int *rx_work_done, int budget)
1656 unsigned long flags;
1657 struct net_device *netdev;
1659 while (*rx_work_done < budget) {
1660 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1664 if (visorchannel_signalremove(devdata->dev->visorchannel,
1669 switch (cmdrsp->net.type) {
1671 devdata->chstat.got_rcv++;
1672 /* process incoming packet */
1673 *rx_work_done += visornic_rx(cmdrsp);
1676 spin_lock_irqsave(&devdata->priv_lock, flags);
1677 devdata->chstat.got_xmit_done++;
1678 if (cmdrsp->net.xmtdone.xmt_done_result)
1679 devdata->chstat.xmit_fail++;
1680 /* only call queue wake if we stopped it */
1681 netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1682 /* ASSERT netdev == vnicinfo->netdev; */
1683 if ((netdev == devdata->netdev) &&
1684 netif_queue_stopped(netdev)) {
1685 /* check if we have crossed the lower watermark
1686 * for netif_wake_queue()
1688 if (vnic_hit_low_watermark
1690 devdata->lower_threshold_net_xmits)) {
1691 /* enough NET_XMITs completed
1692 * so can restart netif queue
1694 netif_wake_queue(netdev);
1695 devdata->flow_control_lower_hits++;
1698 skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1699 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1700 kfree_skb(cmdrsp->net.buf);
1702 case NET_RCV_ENBDIS_ACK:
1703 devdata->chstat.got_enbdisack++;
1704 netdev = (struct net_device *)
1705 cmdrsp->net.enbdis.context;
1706 spin_lock_irqsave(&devdata->priv_lock, flags);
1707 devdata->enab_dis_acked = 1;
1708 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1710 if (devdata->server_down &&
1711 devdata->server_change_state) {
1712 /* Inform Linux that the link is up */
1713 devdata->server_down = false;
1714 devdata->server_change_state = false;
1715 netif_wake_queue(netdev);
1716 netif_carrier_on(netdev);
1719 case NET_CONNECT_STATUS:
1720 netdev = devdata->netdev;
1721 if (cmdrsp->net.enbdis.enable == 1) {
1722 spin_lock_irqsave(&devdata->priv_lock, flags);
1723 devdata->enabled = cmdrsp->net.enbdis.enable;
1724 spin_unlock_irqrestore(&devdata->priv_lock,
1726 netif_wake_queue(netdev);
1727 netif_carrier_on(netdev);
1729 netif_stop_queue(netdev);
1730 netif_carrier_off(netdev);
1731 spin_lock_irqsave(&devdata->priv_lock, flags);
1732 devdata->enabled = cmdrsp->net.enbdis.enable;
1733 spin_unlock_irqrestore(&devdata->priv_lock,
1740 /* cmdrsp is now available for reuse */
1744 static int visornic_poll(struct napi_struct *napi, int budget)
1746 struct visornic_devdata *devdata = container_of(napi,
1747 struct visornic_devdata,
1751 send_rcv_posts_if_needed(devdata);
1752 service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1754 /* If there aren't any more packets to receive stop the poll */
1755 if (rx_count < budget)
1756 napi_complete_done(napi, rx_count);
1761 /* poll_for_irq - checks the status of the response queue
1762 * @v: Void pointer to the visronic devdata struct.
1764 * Main function of the vnic_incoming thread. Periodically check the response
1765 * queue and drain it if needed.
1767 static void poll_for_irq(unsigned long v)
1769 struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1771 if (!visorchannel_signalempty(
1772 devdata->dev->visorchannel,
1773 IOCHAN_FROM_IOPART))
1774 napi_schedule(&devdata->napi);
1776 atomic_set(&devdata->interrupt_rcvd, 0);
1778 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1781 /* visornic_probe - probe function for visornic devices
1782 * @dev: The visor device discovered.
1784 * Called when visorbus discovers a visornic device on its bus. It creates a new
1785 * visornic ethernet adapter.
1787 * Return: 0 on success, or negative integer on error.
1789 static int visornic_probe(struct visor_device *dev)
1791 struct visornic_devdata *devdata = NULL;
1792 struct net_device *netdev = NULL;
1794 int channel_offset = 0;
1797 netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1799 dev_err(&dev->device,
1800 "%s alloc_etherdev failed\n", __func__);
1804 netdev->netdev_ops = &visornic_dev_ops;
1805 netdev->watchdog_timeo = 5 * HZ;
1806 SET_NETDEV_DEV(netdev, &dev->device);
1808 /* Get MAC address from channel and read it into the device. */
1809 netdev->addr_len = ETH_ALEN;
1810 channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1811 err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1814 dev_err(&dev->device,
1815 "%s failed to get mac addr from chan (%d)\n",
1817 goto cleanup_netdev;
1820 devdata = devdata_initialize(netdev_priv(netdev), dev);
1822 dev_err(&dev->device,
1823 "%s devdata_initialize failed\n", __func__);
1825 goto cleanup_netdev;
1827 /* don't trust messages laying around in the channel */
1828 drain_resp_queue(devdata->cmdrsp, devdata);
1830 devdata->netdev = netdev;
1831 dev_set_drvdata(&dev->device, devdata);
1832 init_waitqueue_head(&devdata->rsp_queue);
1833 spin_lock_init(&devdata->priv_lock);
1835 devdata->enabled = 0;
1836 atomic_set(&devdata->usage, 1);
1838 /* Setup rcv bufs */
1839 channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1840 err = visorbus_read_channel(dev, channel_offset,
1841 &devdata->num_rcv_bufs, 4);
1843 dev_err(&dev->device,
1844 "%s failed to get #rcv bufs from chan (%d)\n",
1846 goto cleanup_netdev;
1849 devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1850 sizeof(struct sk_buff *), GFP_KERNEL);
1851 if (!devdata->rcvbuf) {
1853 goto cleanup_netdev;
1856 /* set the net_xmit outstanding threshold
1857 * always leave two slots open but you should have 3 at a minimum
1858 * note that max_outstanding_net_xmits must be > 0
1860 devdata->max_outstanding_net_xmits =
1861 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1862 devdata->upper_threshold_net_xmits =
1863 max_t(unsigned long,
1864 2, (devdata->max_outstanding_net_xmits - 1));
1865 devdata->lower_threshold_net_xmits =
1866 max_t(unsigned long,
1867 1, (devdata->max_outstanding_net_xmits / 2));
1869 skb_queue_head_init(&devdata->xmitbufhead);
1871 /* create a cmdrsp we can use to post and unpost rcv buffers */
1872 devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1873 if (!devdata->cmdrsp_rcv) {
1875 goto cleanup_rcvbuf;
1877 devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1878 if (!devdata->xmit_cmdrsp) {
1880 goto cleanup_cmdrsp_rcv;
1882 INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1883 devdata->server_down = false;
1884 devdata->server_change_state = false;
1886 /*set the default mtu */
1887 channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1888 err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1890 dev_err(&dev->device,
1891 "%s failed to get mtu from chan (%d)\n",
1893 goto cleanup_xmit_cmdrsp;
1896 /* TODO: Setup Interrupt information */
1897 /* Let's start our threads to get responses */
1898 netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1900 setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1901 (unsigned long)devdata);
1902 /* Note: This time has to start running before the while
1903 * loop below because the napi routine is responsible for
1904 * setting enab_dis_acked
1906 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1908 channel_offset = offsetof(struct visor_io_channel,
1909 channel_header.features);
1910 err = visorbus_read_channel(dev, channel_offset, &features, 8);
1912 dev_err(&dev->device,
1913 "%s failed to get features from chan (%d)\n",
1915 goto cleanup_napi_add;
1918 features |= VISOR_CHANNEL_IS_POLLING;
1919 features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1920 err = visorbus_write_channel(dev, channel_offset, &features, 8);
1922 dev_err(&dev->device,
1923 "%s failed to set features in chan (%d)\n",
1925 goto cleanup_napi_add;
1928 /* Note: Interrupts have to be enable before the while
1929 * loop below because the napi routine is responsible for
1930 * setting enab_dis_acked
1932 visorbus_enable_channel_interrupts(dev);
1934 err = register_netdev(netdev);
1936 dev_err(&dev->device,
1937 "%s register_netdev failed (%d)\n", __func__, err);
1938 goto cleanup_napi_add;
1941 /* create debug/sysfs directories */
1942 devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1943 visornic_debugfs_dir);
1944 if (!devdata->eth_debugfs_dir) {
1945 dev_err(&dev->device,
1946 "%s debugfs_create_dir %s failed\n",
1947 __func__, netdev->name);
1949 goto cleanup_register_netdev;
1952 dev_info(&dev->device, "%s success netdev=%s\n",
1953 __func__, netdev->name);
1956 cleanup_register_netdev:
1957 unregister_netdev(netdev);
1960 del_timer_sync(&devdata->irq_poll_timer);
1961 netif_napi_del(&devdata->napi);
1963 cleanup_xmit_cmdrsp:
1964 kfree(devdata->xmit_cmdrsp);
1967 kfree(devdata->cmdrsp_rcv);
1970 kfree(devdata->rcvbuf);
1973 free_netdev(netdev);
1977 /* host_side_disappeared - IO Partition is gone
1978 * @devdata: Device object.
1980 * IO partition servicing this device is gone; do cleanup.
1982 static void host_side_disappeared(struct visornic_devdata *devdata)
1984 unsigned long flags;
1986 spin_lock_irqsave(&devdata->priv_lock, flags);
1987 /* indicate device destroyed */
1988 devdata->dev = NULL;
1989 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1992 /* visornic_remove - called when visornic dev goes away
1993 * @dev: Visornic device that is being removed.
1995 * Called when DEVICE_DESTROY gets called to remove device.
1997 static void visornic_remove(struct visor_device *dev)
1999 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2000 struct net_device *netdev;
2001 unsigned long flags;
2004 dev_err(&dev->device, "%s no devdata\n", __func__);
2007 spin_lock_irqsave(&devdata->priv_lock, flags);
2008 if (devdata->going_away) {
2009 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2010 dev_err(&dev->device, "%s already being removed\n", __func__);
2013 devdata->going_away = true;
2014 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2015 netdev = devdata->netdev;
2017 dev_err(&dev->device, "%s not net device\n", __func__);
2021 /* going_away prevents new items being added to the workqueues */
2022 cancel_work_sync(&devdata->timeout_reset);
2024 debugfs_remove_recursive(devdata->eth_debugfs_dir);
2025 /* this will call visornic_close() */
2026 unregister_netdev(netdev);
2028 del_timer_sync(&devdata->irq_poll_timer);
2029 netif_napi_del(&devdata->napi);
2031 dev_set_drvdata(&dev->device, NULL);
2032 host_side_disappeared(devdata);
2033 devdata_release(devdata);
2034 free_netdev(netdev);
2037 /* visornic_pause - called when IO Part disappears
2038 * @dev: Visornic device that is being serviced.
2039 * @complete_func: Call when finished.
2041 * Called when the IO Partition has gone down. Need to free up resources and
2042 * wait for IO partition to come back. Mark link as down and don't attempt any
2043 * DMA. When we have freed memory, call the complete_func so that Command knows
2044 * we are done. If we don't call complete_func, the IO Partition will never
2047 * Return: 0 on success.
2049 static int visornic_pause(struct visor_device *dev,
2050 visorbus_state_complete_func complete_func)
2052 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2054 visornic_serverdown(devdata, complete_func);
2058 /* visornic_resume - called when IO Partition has recovered
2059 * @dev: Visornic device that is being serviced.
2060 * @compelte_func: Call when finished.
2062 * Called when the IO partition has recovered. Re-establish connection to the IO
2063 * Partition and set the link up. Okay to do DMA again.
2065 * Returns 0 for success, negative integer on error.
2067 static int visornic_resume(struct visor_device *dev,
2068 visorbus_state_complete_func complete_func)
2070 struct visornic_devdata *devdata;
2071 struct net_device *netdev;
2072 unsigned long flags;
2074 devdata = dev_get_drvdata(&dev->device);
2076 dev_err(&dev->device, "%s no devdata\n", __func__);
2080 netdev = devdata->netdev;
2082 spin_lock_irqsave(&devdata->priv_lock, flags);
2083 if (devdata->server_change_state) {
2084 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2085 dev_err(&dev->device, "%s server already changing state\n",
2089 if (!devdata->server_down) {
2090 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2091 dev_err(&dev->device, "%s server not down\n", __func__);
2092 complete_func(dev, 0);
2095 devdata->server_change_state = true;
2096 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2098 /* Must transition channel to ATTACHED state BEFORE
2099 * we can start using the device again.
2100 * TODO: State transitions
2102 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2108 complete_func(dev, 0);
2112 /* This is used to tell the visorbus driver which types of visor devices
2113 * we support, and what functions to call when a visor device that we support
2114 * is attached or removed.
2116 static struct visor_driver visornic_driver = {
2118 .owner = THIS_MODULE,
2119 .channel_types = visornic_channel_types,
2120 .probe = visornic_probe,
2121 .remove = visornic_remove,
2122 .pause = visornic_pause,
2123 .resume = visornic_resume,
2124 .channel_interrupt = NULL,
2127 /* visornic_init - init function
2129 * Init function for the visornic driver. Do initial driver setup and wait
2132 * Return: 0 on success, negative integer on error.
2134 static int visornic_init(void)
2139 visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2140 if (!visornic_debugfs_dir)
2143 ret = debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2144 &debugfs_info_fops);
2146 goto cleanup_debugfs;
2147 ret = debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir,
2148 NULL, &debugfs_enable_ints_fops);
2150 goto cleanup_debugfs;
2152 err = visorbus_register_visor_driver(&visornic_driver);
2154 goto cleanup_debugfs;
2159 debugfs_remove_recursive(visornic_debugfs_dir);
2163 /* visornic_cleanup - driver exit routine
2165 * Unregister driver from the bus and free up memory.
2167 static void visornic_cleanup(void)
2169 visorbus_unregister_visor_driver(&visornic_driver);
2170 debugfs_remove_recursive(visornic_debugfs_dir);
2173 module_init(visornic_init);
2174 module_exit(visornic_cleanup);
2176 MODULE_AUTHOR("Unisys");
2177 MODULE_LICENSE("GPL");
2178 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");