GNU Linux-libre 4.14.303-gnu1
[releases.git] / drivers / staging / unisys / visornic / visornic_main.c
1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
2  * All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  * NON INFRINGEMENT.  See the GNU General Public License for more
12  * details.
13  */
14
15 /* This driver lives in a spar partition, and registers to ethernet io
16  * channels from the visorbus driver. It creates netdev devices and
17  * forwards transmit to the IO channel and accepts rcvs from the IO
18  * Partition via the IO channel.
19  */
20
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
27
28 #include "visorbus.h"
29 #include "iochannel.h"
30
31 #define VISORNIC_INFINITE_RSP_WAIT 0
32
33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
34  *         = 163840 bytes
35  */
36 #define MAX_BUF 163840
37 #define NAPI_WEIGHT 64
38
39 /* GUIDS for director channel type supported by this driver.  */
40 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
41 #define VISOR_VNIC_CHANNEL_GUID \
42         GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
43                 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
44 #define VISOR_VNIC_CHANNEL_GUID_STR \
45         "8cd5994d-c58e-11da-95a9-00e08161165f"
46
47 static struct visor_channeltype_descriptor visornic_channel_types[] = {
48         /* Note that the only channel type we expect to be reported by the
49          * bus driver is the VISOR_VNIC channel.
50          */
51         { VISOR_VNIC_CHANNEL_GUID, "ultravnic" },
52         {}
53 };
54 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
55 /* FIXME XXX: This next line of code must be fixed and removed before
56  * acceptance into the 'normal' part of the kernel.  It is only here as a place
57  * holder to get module autoloading functionality working for visorbus.  Code
58  * must be added to scripts/mode/file2alias.c, etc., to get this working
59  * properly.
60  */
61 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
62
63 struct chanstat {
64         unsigned long got_rcv;
65         unsigned long got_enbdisack;
66         unsigned long got_xmit_done;
67         unsigned long xmit_fail;
68         unsigned long sent_enbdis;
69         unsigned long sent_promisc;
70         unsigned long sent_post;
71         unsigned long sent_post_failed;
72         unsigned long sent_xmit;
73         unsigned long reject_count;
74         unsigned long extra_rcvbufs_sent;
75 };
76
77 /* struct visornic_devdata
78  * @enabled:                        0 disabled 1 enabled to receive.
79  * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
80  * @struct *dev:
81  * @struct *netdev:
82  * @struct net_stats:
83  * @interrupt_rcvd:
84  * @rsp_queue:
85  * @struct **rcvbuf:
86  * @incarnation_id:                 incarnation_id lets IOPART know about
87  *                                  re-birth.
88  * @old_flags:                      flags as they were prior to
89  *                                  set_multicast_list.
90  * @usage:                          count of users.
91  * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
92  * @num_rcv_bufs_could_not_alloc:
93  * @num_rcvbuf_in_iovm:
94  * @alloc_failed_in_if_needed_cnt:
95  * @alloc_failed_in_repost_rtn_cnt:
96  * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
97  *                                  - should never hit this.
98  * @upper_threshold_net_xmits:      high water mark for calling
99  *                                  netif_stop_queue().
100  * @lower_threshold_net_xmits:      high water mark for calling
101  *                                  netif_wake_queue().
102  * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
103  *                                  sent to the IOPART end.
104  * @server_down_complete_func:
105  * @struct timeout_reset:
106  * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
107  *                                  buffers.
108  * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
109  *                                  active xmit at a time.
110  * @server_down:                    IOPART is down.
111  * @server_change_state:            Processing SERVER_CHANGESTATE msg.
112  * @going_away:                     device is being torn down.
113  * @struct *eth_debugfs_dir:
114  * @interrupts_rcvd:
115  * @interrupts_notme:
116  * @interrupts_disabled:
117  * @busy_cnt:
118  * @priv_lock:                      spinlock to access devdata structures.
119  * @flow_control_upper_hits:
120  * @flow_control_lower_hits:
121  * @n_rcv0:                         # rcvs of 0 buffers.
122  * @n_rcv1:                         # rcvs of 1 buffers.
123  * @n_rcv2:                         # rcvs of 2 buffers.
124  * @n_rcvx:                         # rcvs of >2 buffers.
125  * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
126  * @repost_found_skb_cnt:           # of found the skb.
127  * @n_repost_deficit:               # of lost rcv buffers.
128  * @bad_rcv_buf:                    # of unknown rcv skb not freed.
129  * @n_rcv_packets_not_accepted:     # bogs rcv packets.
130  * @queuefullmsg_logged:
131  * @struct chstat:
132  * @struct irq_poll_timer:
133  * @struct napi:
134  * @struct cmdrsp:
135  */
136 struct visornic_devdata {
137         unsigned short enabled;
138         unsigned short enab_dis_acked;
139
140         struct visor_device *dev;
141         struct net_device *netdev;
142         struct net_device_stats net_stats;
143         atomic_t interrupt_rcvd;
144         wait_queue_head_t rsp_queue;
145         struct sk_buff **rcvbuf;
146         u64 incarnation_id;
147         unsigned short old_flags;
148         atomic_t usage;
149
150         int num_rcv_bufs;
151         int num_rcv_bufs_could_not_alloc;
152         atomic_t num_rcvbuf_in_iovm;
153         unsigned long alloc_failed_in_if_needed_cnt;
154         unsigned long alloc_failed_in_repost_rtn_cnt;
155
156         unsigned long max_outstanding_net_xmits;
157         unsigned long upper_threshold_net_xmits;
158         unsigned long lower_threshold_net_xmits;
159         struct sk_buff_head xmitbufhead;
160
161         visorbus_state_complete_func server_down_complete_func;
162         struct work_struct timeout_reset;
163         struct uiscmdrsp *cmdrsp_rcv;
164         struct uiscmdrsp *xmit_cmdrsp;
165         bool server_down;
166         bool server_change_state;
167         bool going_away;
168         struct dentry *eth_debugfs_dir;
169         u64 interrupts_rcvd;
170         u64 interrupts_notme;
171         u64 interrupts_disabled;
172         u64 busy_cnt;
173         /* spinlock to access devdata structures. */
174         spinlock_t priv_lock;
175
176         /* flow control counter */
177         u64 flow_control_upper_hits;
178         u64 flow_control_lower_hits;
179
180         /* debug counters */
181         unsigned long n_rcv0;
182         unsigned long n_rcv1;
183         unsigned long n_rcv2;
184         unsigned long n_rcvx;
185         unsigned long found_repost_rcvbuf_cnt;
186         unsigned long repost_found_skb_cnt;
187         unsigned long n_repost_deficit;
188         unsigned long bad_rcv_buf;
189         unsigned long n_rcv_packets_not_accepted;
190
191         int queuefullmsg_logged;
192         struct chanstat chstat;
193         struct timer_list irq_poll_timer;
194         struct napi_struct napi;
195         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
196 };
197
198 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
199 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
200                                 u16 index, u16 max_pi_arr_entries,
201                                 struct phys_info pi_arr[])
202 {
203         u16 i, len, firstlen;
204
205         firstlen = PI_PAGE_SIZE - inp_off;
206         if (inp_len <= firstlen) {
207                 /* The input entry spans only one page - add as is. */
208                 if (index >= max_pi_arr_entries)
209                         return 0;
210                 pi_arr[index].pi_pfn = inp_pfn;
211                 pi_arr[index].pi_off = (u16)inp_off;
212                 pi_arr[index].pi_len = (u16)inp_len;
213                 return index + 1;
214         }
215
216         /* This entry spans multiple pages. */
217         for (len = inp_len, i = 0; len;
218                 len -= pi_arr[index + i].pi_len, i++) {
219                 if (index + i >= max_pi_arr_entries)
220                         return 0;
221                 pi_arr[index + i].pi_pfn = inp_pfn + i;
222                 if (i == 0) {
223                         pi_arr[index].pi_off = inp_off;
224                         pi_arr[index].pi_len = firstlen;
225                 } else {
226                         pi_arr[index + i].pi_off = 0;
227                         pi_arr[index + i].pi_len = min_t(u16, len,
228                                                          PI_PAGE_SIZE);
229                 }
230         }
231         return index + i;
232 }
233
234 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
235  *                                 array that the IOPART understands
236  * @skb:          Skbuff that we are pulling the frags from.
237  * @firstfraglen: Length of first fragment in skb.
238  * @frags_max:    Max len of frags array.
239  * @frags:        Frags array filled in on output.
240  *
241  * Return: Positive integer indicating number of entries filled in frags on
242  *         success, negative integer on error.
243  */
244 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
245                                          unsigned int firstfraglen,
246                                          unsigned int frags_max,
247                                          struct phys_info frags[])
248 {
249         unsigned int count = 0, frag, size, offset = 0, numfrags;
250         unsigned int total_count;
251
252         numfrags = skb_shinfo(skb)->nr_frags;
253
254         /* Compute the number of fragments this skb has, and if its more than
255          * frag array can hold, linearize the skb
256          */
257         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
258         if (firstfraglen % PI_PAGE_SIZE)
259                 total_count++;
260
261         if (total_count > frags_max) {
262                 if (skb_linearize(skb))
263                         return -EINVAL;
264                 numfrags = skb_shinfo(skb)->nr_frags;
265                 firstfraglen = 0;
266         }
267
268         while (firstfraglen) {
269                 if (count == frags_max)
270                         return -EINVAL;
271
272                 frags[count].pi_pfn =
273                         page_to_pfn(virt_to_page(skb->data + offset));
274                 frags[count].pi_off =
275                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
276                 size = min_t(unsigned int, firstfraglen,
277                              PI_PAGE_SIZE - frags[count].pi_off);
278
279                 /* can take smallest of firstfraglen (what's left) OR
280                  * bytes left in the page
281                  */
282                 frags[count].pi_len = size;
283                 firstfraglen -= size;
284                 offset += size;
285                 count++;
286         }
287         if (numfrags) {
288                 if ((count + numfrags) > frags_max)
289                         return -EINVAL;
290
291                 for (frag = 0; frag < numfrags; frag++) {
292                         count = add_physinfo_entries(page_to_pfn(
293                                   skb_frag_page(&skb_shinfo(skb)->frags[frag])),
294                                   skb_shinfo(skb)->frags[frag].page_offset,
295                                   skb_shinfo(skb)->frags[frag].size, count,
296                                   frags_max, frags);
297                         /* add_physinfo_entries only returns
298                          * zero if the frags array is out of room
299                          * That should never happen because we
300                          * fail above, if count+numfrags > frags_max.
301                          */
302                         if (!count)
303                                 return -EINVAL;
304                 }
305         }
306         if (skb_shinfo(skb)->frag_list) {
307                 struct sk_buff *skbinlist;
308                 int c;
309
310                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
311                      skbinlist = skbinlist->next) {
312                         c = visor_copy_fragsinfo_from_skb(skbinlist,
313                                                           skbinlist->len -
314                                                           skbinlist->data_len,
315                                                           frags_max - count,
316                                                           &frags[count]);
317                         if (c < 0)
318                                 return c;
319                         count += c;
320                 }
321         }
322         return count;
323 }
324
325 static ssize_t enable_ints_write(struct file *file,
326                                  const char __user *buffer,
327                                  size_t count, loff_t *ppos)
328 {
329         /* Don't want to break ABI here by having a debugfs
330          * file that no longer exists or is writable, so
331          * lets just make this a vestigual function
332          */
333         return count;
334 }
335
336 static const struct file_operations debugfs_enable_ints_fops = {
337         .write = enable_ints_write,
338 };
339
340 /* visornic_serverdown_complete - pause device following IOPART going down
341  * @devdata: Device managed by IOPART.
342  *
343  * The IO partition has gone down, and we need to do some cleanup for when it
344  * comes back. Treat the IO partition as the link being down.
345  */
346 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
347 {
348         struct net_device *netdev = devdata->netdev;
349
350         /* Stop polling for interrupts */
351         del_timer_sync(&devdata->irq_poll_timer);
352
353         rtnl_lock();
354         dev_close(netdev);
355         rtnl_unlock();
356
357         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
358         devdata->chstat.sent_xmit = 0;
359         devdata->chstat.got_xmit_done = 0;
360
361         if (devdata->server_down_complete_func)
362                 (*devdata->server_down_complete_func)(devdata->dev, 0);
363
364         devdata->server_down = true;
365         devdata->server_change_state = false;
366         devdata->server_down_complete_func = NULL;
367 }
368
369 /* visornic_serverdown - Command has notified us that IOPART is down
370  * @devdata:       Device managed by IOPART.
371  * @complete_func: Function to call when finished.
372  *
373  * Schedule the work needed to handle the server down request. Make sure we
374  * haven't already handled the server change state event.
375  *
376  * Return: 0 if we scheduled the work, negative integer on error.
377  */
378 static int visornic_serverdown(struct visornic_devdata *devdata,
379                                visorbus_state_complete_func complete_func)
380 {
381         unsigned long flags;
382         int err;
383
384         spin_lock_irqsave(&devdata->priv_lock, flags);
385         if (devdata->server_change_state) {
386                 dev_dbg(&devdata->dev->device, "%s changing state\n",
387                         __func__);
388                 err = -EINVAL;
389                 goto err_unlock;
390         }
391         if (devdata->server_down) {
392                 dev_dbg(&devdata->dev->device, "%s already down\n",
393                         __func__);
394                 err = -EINVAL;
395                 goto err_unlock;
396         }
397         if (devdata->going_away) {
398                 dev_dbg(&devdata->dev->device,
399                         "%s aborting because device removal pending\n",
400                         __func__);
401                 err = -ENODEV;
402                 goto err_unlock;
403         }
404         devdata->server_change_state = true;
405         devdata->server_down_complete_func = complete_func;
406         spin_unlock_irqrestore(&devdata->priv_lock, flags);
407
408         visornic_serverdown_complete(devdata);
409         return 0;
410
411 err_unlock:
412         spin_unlock_irqrestore(&devdata->priv_lock, flags);
413         return err;
414 }
415
416 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
417  * @netdev: Network adapter the rcv bufs are attached too.
418  *
419  * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
420  * so that it can write rcv data into our memory space.
421  *
422  * Return: Pointer to sk_buff.
423  */
424 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
425 {
426         struct sk_buff *skb;
427
428         /* NOTE: the first fragment in each rcv buffer is pointed to by
429          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
430          * in length, so the first frag is large enough to hold 1514.
431          */
432         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
433         if (!skb)
434                 return NULL;
435         skb->dev = netdev;
436         /* current value of mtu doesn't come into play here; large
437          * packets will just end up using multiple rcv buffers all of
438          * same size.
439          */
440         skb->len = RCVPOST_BUF_SIZE;
441         /* alloc_skb already zeroes it out for clarification. */
442         skb->data_len = 0;
443         return skb;
444 }
445
446 /* post_skb - post a skb to the IO Partition
447  * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
448  * @devdata: visornic_devdata to post the skb to.
449  * @skb:     Skb to give to the IO partition.
450  *
451  * Return: 0 on success, negative integer on error.
452  */
453 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
454                     struct sk_buff *skb)
455 {
456         int err;
457
458         cmdrsp->net.buf = skb;
459         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
460         cmdrsp->net.rcvpost.frag.pi_off =
461                 (unsigned long)skb->data & PI_PAGE_MASK;
462         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
463         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
464
465         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
466                 return -EINVAL;
467
468         cmdrsp->net.type = NET_RCV_POST;
469         cmdrsp->cmdtype = CMD_NET_TYPE;
470         err = visorchannel_signalinsert(devdata->dev->visorchannel,
471                                         IOCHAN_TO_IOPART,
472                                         cmdrsp);
473         if (err) {
474                 devdata->chstat.sent_post_failed++;
475                 return err;
476         }
477
478         atomic_inc(&devdata->num_rcvbuf_in_iovm);
479         devdata->chstat.sent_post++;
480         return 0;
481 }
482
483 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
484  * @netdev:  Netdevice we are enabling/disabling, used as context return value.
485  * @state:   Enable = 1/disable = 0.
486  * @devdata: Visornic device we are enabling/disabling.
487  *
488  * Send the enable/disable message to the IO Partition.
489  *
490  * Return: 0 on success, negative integer on error.
491  */
492 static int send_enbdis(struct net_device *netdev, int state,
493                        struct visornic_devdata *devdata)
494 {
495         int err;
496
497         devdata->cmdrsp_rcv->net.enbdis.enable = state;
498         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
499         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
500         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
501         err = visorchannel_signalinsert(devdata->dev->visorchannel,
502                                         IOCHAN_TO_IOPART,
503                                         devdata->cmdrsp_rcv);
504         if (err)
505                 return err;
506         devdata->chstat.sent_enbdis++;
507         return 0;
508 }
509
510 /* visornic_disable_with_timeout - disable network adapter
511  * @netdev:  netdevice to disable.
512  * @timeout: Timeout to wait for disable.
513  *
514  * Disable the network adapter and inform the IO Partition that we are disabled.
515  * Reclaim memory from rcv bufs.
516  *
517  * Return: 0 on success, negative integer on failure of IO Partition responding.
518  */
519 static int visornic_disable_with_timeout(struct net_device *netdev,
520                                          const int timeout)
521 {
522         struct visornic_devdata *devdata = netdev_priv(netdev);
523         int i;
524         unsigned long flags;
525         int wait = 0;
526         int err;
527
528         /* send a msg telling the other end we are stopping incoming pkts */
529         spin_lock_irqsave(&devdata->priv_lock, flags);
530         devdata->enabled = 0;
531         /* must wait for ack */
532         devdata->enab_dis_acked = 0;
533         spin_unlock_irqrestore(&devdata->priv_lock, flags);
534
535         /* send disable and wait for ack -- don't hold lock when sending
536          * disable because if the queue is full, insert might sleep.
537          * If an error occurs, don't wait for the timeout.
538          */
539         err = send_enbdis(netdev, 0, devdata);
540         if (err)
541                 return err;
542
543         /* wait for ack to arrive before we try to free rcv buffers
544          * NOTE: the other end automatically unposts the rcv buffers when
545          * when it gets a disable.
546          */
547         spin_lock_irqsave(&devdata->priv_lock, flags);
548         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
549                (wait < timeout)) {
550                 if (devdata->enab_dis_acked)
551                         break;
552                 if (devdata->server_down || devdata->server_change_state) {
553                         dev_dbg(&netdev->dev, "%s server went away\n",
554                                 __func__);
555                         break;
556                 }
557                 set_current_state(TASK_INTERRUPTIBLE);
558                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
559                 wait += schedule_timeout(msecs_to_jiffies(10));
560                 spin_lock_irqsave(&devdata->priv_lock, flags);
561         }
562
563         /* Wait for usage to go to 1 (no other users) before freeing
564          * rcv buffers
565          */
566         if (atomic_read(&devdata->usage) > 1) {
567                 while (1) {
568                         set_current_state(TASK_INTERRUPTIBLE);
569                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
570                         schedule_timeout(msecs_to_jiffies(10));
571                         spin_lock_irqsave(&devdata->priv_lock, flags);
572                         if (atomic_read(&devdata->usage))
573                                 break;
574                 }
575         }
576         /* we've set enabled to 0, so we can give up the lock. */
577         spin_unlock_irqrestore(&devdata->priv_lock, flags);
578
579         /* stop the transmit queue so nothing more can be transmitted */
580         netif_stop_queue(netdev);
581
582         napi_disable(&devdata->napi);
583
584         skb_queue_purge(&devdata->xmitbufhead);
585
586         /* Free rcv buffers - other end has automatically unposed them on
587          * disable
588          */
589         for (i = 0; i < devdata->num_rcv_bufs; i++) {
590                 if (devdata->rcvbuf[i]) {
591                         kfree_skb(devdata->rcvbuf[i]);
592                         devdata->rcvbuf[i] = NULL;
593                 }
594         }
595
596         return 0;
597 }
598
599 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
600  * @netdev:  struct netdevice.
601  * @devdata: visornic_devdata.
602  *
603  * Allocate rcv buffers and post them to the IO Partition.
604  *
605  * Return: 0 on success, negative integer on failure.
606  */
607 static int init_rcv_bufs(struct net_device *netdev,
608                          struct visornic_devdata *devdata)
609 {
610         int i, j, count, err;
611
612         /* allocate fixed number of receive buffers to post to uisnic
613          * post receive buffers after we've allocated a required amount
614          */
615         for (i = 0; i < devdata->num_rcv_bufs; i++) {
616                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
617                 /* if we failed to allocate one let us stop */
618                 if (!devdata->rcvbuf[i])
619                         break;
620         }
621         /* couldn't even allocate one -- bail out */
622         if (i == 0)
623                 return -ENOMEM;
624         count = i;
625
626         /* Ensure we can alloc 2/3rd of the requested number of buffers.
627          * 2/3 is an arbitrary choice; used also in ndis init.c
628          */
629         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
630                 /* free receive buffers we did alloc and then bail out */
631                 for (i = 0; i < count; i++) {
632                         kfree_skb(devdata->rcvbuf[i]);
633                         devdata->rcvbuf[i] = NULL;
634                 }
635                 return -ENOMEM;
636         }
637
638         /* post receive buffers to receive incoming input - without holding
639          * lock - we've not enabled nor started the queue so there shouldn't
640          * be any rcv or xmit activity
641          */
642         for (i = 0; i < count; i++) {
643                 err = post_skb(devdata->cmdrsp_rcv, devdata,
644                                devdata->rcvbuf[i]);
645                 if (!err)
646                         continue;
647
648                 /* Error handling -
649                  * If we posted at least one skb, we should return success,
650                  * but need to free the resources that we have not successfully
651                  * posted.
652                  */
653                 for (j = i; j < count; j++) {
654                         kfree_skb(devdata->rcvbuf[j]);
655                         devdata->rcvbuf[j] = NULL;
656                 }
657                 if (i == 0)
658                         return err;
659                 break;
660         }
661
662         return 0;
663 }
664
665 /* visornic_enable_with_timeout - send enable to IO Partition
666  * @netdev:  struct net_device.
667  * @timeout: Time to wait for the ACK from the enable.
668  *
669  * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
670  * defined in msecs (timeout of 0 specifies infinite wait).
671  *
672  * Return: 0 on success, negative integer on failure.
673  */
674 static int visornic_enable_with_timeout(struct net_device *netdev,
675                                         const int timeout)
676 {
677         int err = 0;
678         struct visornic_devdata *devdata = netdev_priv(netdev);
679         unsigned long flags;
680         int wait = 0;
681
682         napi_enable(&devdata->napi);
683
684         /* NOTE: the other end automatically unposts the rcv buffers when it
685          * gets a disable.
686          */
687         err = init_rcv_bufs(netdev, devdata);
688         if (err < 0) {
689                 dev_err(&netdev->dev,
690                         "%s failed to init rcv bufs\n", __func__);
691                 return err;
692         }
693
694         spin_lock_irqsave(&devdata->priv_lock, flags);
695         devdata->enabled = 1;
696         devdata->enab_dis_acked = 0;
697
698         /* now we're ready, let's send an ENB to uisnic but until we get
699          * an ACK back from uisnic, we'll drop the packets
700          */
701         devdata->n_rcv_packets_not_accepted = 0;
702         spin_unlock_irqrestore(&devdata->priv_lock, flags);
703
704         /* send enable and wait for ack -- don't hold lock when sending enable
705          * because if the queue is full, insert might sleep. If an error
706          * occurs error out.
707          */
708         err = send_enbdis(netdev, 1, devdata);
709         if (err)
710                 return err;
711
712         spin_lock_irqsave(&devdata->priv_lock, flags);
713         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
714                (wait < timeout)) {
715                 if (devdata->enab_dis_acked)
716                         break;
717                 if (devdata->server_down || devdata->server_change_state) {
718                         dev_dbg(&netdev->dev, "%s server went away\n",
719                                 __func__);
720                         break;
721                 }
722                 set_current_state(TASK_INTERRUPTIBLE);
723                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
724                 wait += schedule_timeout(msecs_to_jiffies(10));
725                 spin_lock_irqsave(&devdata->priv_lock, flags);
726         }
727
728         spin_unlock_irqrestore(&devdata->priv_lock, flags);
729
730         if (!devdata->enab_dis_acked) {
731                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
732                 return -EIO;
733         }
734
735         netif_start_queue(netdev);
736         return 0;
737 }
738
739 /* visornic_timeout_reset - handle xmit timeout resets
740  * @work: Work item that scheduled the work.
741  *
742  * Transmit timeouts are typically handled by resetting the device for our
743  * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
744  * respond, we will trigger a serverdown.
745  */
746 static void visornic_timeout_reset(struct work_struct *work)
747 {
748         struct visornic_devdata *devdata;
749         struct net_device *netdev;
750         int response = 0;
751
752         devdata = container_of(work, struct visornic_devdata, timeout_reset);
753         netdev = devdata->netdev;
754
755         rtnl_lock();
756         if (!netif_running(netdev)) {
757                 rtnl_unlock();
758                 return;
759         }
760
761         response = visornic_disable_with_timeout(netdev,
762                                                  VISORNIC_INFINITE_RSP_WAIT);
763         if (response)
764                 goto call_serverdown;
765
766         response = visornic_enable_with_timeout(netdev,
767                                                 VISORNIC_INFINITE_RSP_WAIT);
768         if (response)
769                 goto call_serverdown;
770
771         rtnl_unlock();
772
773         return;
774
775 call_serverdown:
776         visornic_serverdown(devdata, NULL);
777         rtnl_unlock();
778 }
779
780 /* visornic_open - enable the visornic device and mark the queue started
781  * @netdev: netdevice to start.
782  *
783  * Enable the device and start the transmit queue.
784  *
785  * Return: 0 on success.
786  */
787 static int visornic_open(struct net_device *netdev)
788 {
789         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
790         return 0;
791 }
792
793 /* visornic_close - disables the visornic device and stops the queues
794  * @netdev: netdevice to stop.
795  *
796  * Disable the device and stop the transmit queue.
797  *
798  * Return 0 on success.
799  */
800 static int visornic_close(struct net_device *netdev)
801 {
802         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
803         return 0;
804 }
805
806 /* devdata_xmits_outstanding - compute outstanding xmits
807  * @devdata: visornic_devdata for device
808  *
809  * Return: Long integer representing the number of outstanding xmits.
810  */
811 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
812 {
813         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
814                 return devdata->chstat.sent_xmit -
815                         devdata->chstat.got_xmit_done;
816         return (ULONG_MAX - devdata->chstat.got_xmit_done
817                 + devdata->chstat.sent_xmit + 1);
818 }
819
820 /* vnic_hit_high_watermark
821  * @devdata:        Indicates visornic device we are checking.
822  * @high_watermark: Max num of unacked xmits we will tolerate before we will
823  *                  start throttling.
824  *
825  * Return: True iff the number of unacked xmits sent to the IO Partition is >=
826  *         high_watermark. False otherwise.
827  */
828 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
829                                     ulong high_watermark)
830 {
831         return (devdata_xmits_outstanding(devdata) >= high_watermark);
832 }
833
834 /* vnic_hit_low_watermark
835  * @devdata:       Indicates visornic device we are checking.
836  * @low_watermark: We will wait until the num of unacked xmits drops to this
837  *                 value or lower before we start transmitting again.
838  *
839  * Return: True iff the number of unacked xmits sent to the IO Partition is <=
840  *         low_watermark.
841  */
842 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
843                                    ulong low_watermark)
844 {
845         return (devdata_xmits_outstanding(devdata) <= low_watermark);
846 }
847
848 /* visornic_xmit - send a packet to the IO Partition
849  * @skb:    Packet to be sent.
850  * @netdev: Net device the packet is being sent from.
851  *
852  * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
853  * the XMIT command to the IO Partition for processing. This function is
854  * protected from concurrent calls by a spinlock xmit_lock in the net_device
855  * struct. As soon as the function returns, it can be called again.
856  *
857  * Return: NETDEV_TX_OK.
858  */
859 static int visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
860 {
861         struct visornic_devdata *devdata;
862         int len, firstfraglen, padlen;
863         struct uiscmdrsp *cmdrsp = NULL;
864         unsigned long flags;
865         int err;
866
867         devdata = netdev_priv(netdev);
868         spin_lock_irqsave(&devdata->priv_lock, flags);
869
870         if (netif_queue_stopped(netdev) || devdata->server_down ||
871             devdata->server_change_state) {
872                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
873                 devdata->busy_cnt++;
874                 dev_dbg(&netdev->dev,
875                         "%s busy - queue stopped\n", __func__);
876                 kfree_skb(skb);
877                 return NETDEV_TX_OK;
878         }
879
880         /* sk_buff struct is used to host network data throughout all the
881          * linux network subsystems
882          */
883         len = skb->len;
884
885         /* skb->len is the FULL length of data (including fragmentary portion)
886          * skb->data_len is the length of the fragment portion in frags
887          * skb->len - skb->data_len is size of the 1st fragment in skb->data
888          * calculate the length of the first fragment that skb->data is
889          * pointing to
890          */
891         firstfraglen = skb->len - skb->data_len;
892         if (firstfraglen < ETH_HLEN) {
893                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
894                 devdata->busy_cnt++;
895                 dev_err(&netdev->dev,
896                         "%s busy - first frag too small (%d)\n",
897                         __func__, firstfraglen);
898                 kfree_skb(skb);
899                 return NETDEV_TX_OK;
900         }
901
902         if ((len < ETH_MIN_PACKET_SIZE) &&
903             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
904                 /* pad the packet out to minimum size */
905                 padlen = ETH_MIN_PACKET_SIZE - len;
906                 memset(&skb->data[len], 0, padlen);
907                 skb->tail += padlen;
908                 skb->len += padlen;
909                 len += padlen;
910                 firstfraglen += padlen;
911         }
912
913         cmdrsp = devdata->xmit_cmdrsp;
914         /* clear cmdrsp */
915         memset(cmdrsp, 0, SIZEOF_CMDRSP);
916         cmdrsp->net.type = NET_XMIT;
917         cmdrsp->cmdtype = CMD_NET_TYPE;
918
919         /* save the pointer to skb -- we'll need it for completion */
920         cmdrsp->net.buf = skb;
921
922         if (vnic_hit_high_watermark(devdata,
923                                     devdata->max_outstanding_net_xmits)) {
924                 /* extra NET_XMITs queued over to IOVM - need to wait */
925                 devdata->chstat.reject_count++;
926                 if (!devdata->queuefullmsg_logged &&
927                     ((devdata->chstat.reject_count & 0x3ff) == 1))
928                         devdata->queuefullmsg_logged = 1;
929                 netif_stop_queue(netdev);
930                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
931                 devdata->busy_cnt++;
932                 dev_dbg(&netdev->dev,
933                         "%s busy - waiting for iovm to catch up\n",
934                         __func__);
935                 kfree_skb(skb);
936                 return NETDEV_TX_OK;
937         }
938         if (devdata->queuefullmsg_logged)
939                 devdata->queuefullmsg_logged = 0;
940
941         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
942                 cmdrsp->net.xmt.lincsum.valid = 1;
943                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
944                 if (skb_transport_header(skb) > skb->data) {
945                         cmdrsp->net.xmt.lincsum.hrawoff =
946                                 skb_transport_header(skb) - skb->data;
947                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
948                 }
949                 if (skb_network_header(skb) > skb->data) {
950                         cmdrsp->net.xmt.lincsum.nhrawoff =
951                                 skb_network_header(skb) - skb->data;
952                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
953                 }
954                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
955         } else {
956                 cmdrsp->net.xmt.lincsum.valid = 0;
957         }
958
959         /* save off the length of the entire data packet */
960         cmdrsp->net.xmt.len = len;
961
962         /* copy ethernet header from first frag into ocmdrsp
963          * - everything else will be pass in frags & DMA'ed
964          */
965         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
966
967         /* copy frags info - from skb->data we need to only provide access
968          * beyond eth header
969          */
970         cmdrsp->net.xmt.num_frags =
971                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
972                                               MAX_PHYS_INFO,
973                                               cmdrsp->net.xmt.frags);
974         if (cmdrsp->net.xmt.num_frags < 0) {
975                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
976                 devdata->busy_cnt++;
977                 dev_err(&netdev->dev,
978                         "%s busy - copy frags failed\n", __func__);
979                 kfree_skb(skb);
980                 return NETDEV_TX_OK;
981         }
982
983         err = visorchannel_signalinsert(devdata->dev->visorchannel,
984                                         IOCHAN_TO_IOPART, cmdrsp);
985         if (err) {
986                 netif_stop_queue(netdev);
987                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
988                 devdata->busy_cnt++;
989                 dev_dbg(&netdev->dev,
990                         "%s busy - signalinsert failed\n", __func__);
991                 kfree_skb(skb);
992                 return NETDEV_TX_OK;
993         }
994
995         /* Track the skbs that have been sent to the IOVM for XMIT */
996         skb_queue_head(&devdata->xmitbufhead, skb);
997
998         /* update xmt stats */
999         devdata->net_stats.tx_packets++;
1000         devdata->net_stats.tx_bytes += skb->len;
1001         devdata->chstat.sent_xmit++;
1002
1003         /* check if we have hit the high watermark for netif_stop_queue() */
1004         if (vnic_hit_high_watermark(devdata,
1005                                     devdata->upper_threshold_net_xmits)) {
1006                 /* extra NET_XMITs queued over to IOVM - need to wait */
1007                 /* stop queue - call netif_wake_queue() after lower threshold */
1008                 netif_stop_queue(netdev);
1009                 dev_dbg(&netdev->dev,
1010                         "%s busy - invoking iovm flow control\n",
1011                         __func__);
1012                 devdata->flow_control_upper_hits++;
1013         }
1014         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1015
1016         /* skb will be freed when we get back NET_XMIT_DONE */
1017         return NETDEV_TX_OK;
1018 }
1019
1020 /* visornic_get_stats - returns net_stats of the visornic device
1021  * @netdev: netdevice.
1022  *
1023  * Return: Pointer to the net_device_stats struct for the device.
1024  */
1025 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1026 {
1027         struct visornic_devdata *devdata = netdev_priv(netdev);
1028
1029         return &devdata->net_stats;
1030 }
1031
1032 /* visornic_change_mtu - changes mtu of device
1033  * @netdev: netdevice.
1034  * @new_mtu: Value of new mtu.
1035  *
1036  * The device's MTU cannot be changed by system; it must be changed via a
1037  * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1038  * for everything to work. Currently not supported.
1039  *
1040  * Return: -EINVAL.
1041  */
1042 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1043 {
1044         return -EINVAL;
1045 }
1046
1047 /* visornic_set_multi - set visornic device flags
1048  * @netdev: netdevice.
1049  *
1050  * The only flag we currently support is IFF_PROMISC.
1051  */
1052 static void visornic_set_multi(struct net_device *netdev)
1053 {
1054         struct uiscmdrsp *cmdrsp;
1055         struct visornic_devdata *devdata = netdev_priv(netdev);
1056         int err = 0;
1057
1058         if (devdata->old_flags == netdev->flags)
1059                 return;
1060
1061         if ((netdev->flags & IFF_PROMISC) ==
1062             (devdata->old_flags & IFF_PROMISC))
1063                 goto out_save_flags;
1064
1065         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1066         if (!cmdrsp)
1067                 return;
1068         cmdrsp->cmdtype = CMD_NET_TYPE;
1069         cmdrsp->net.type = NET_RCV_PROMISC;
1070         cmdrsp->net.enbdis.context = netdev;
1071         cmdrsp->net.enbdis.enable =
1072                 netdev->flags & IFF_PROMISC;
1073         err = visorchannel_signalinsert(devdata->dev->visorchannel,
1074                                         IOCHAN_TO_IOPART,
1075                                         cmdrsp);
1076         kfree(cmdrsp);
1077         if (err)
1078                 return;
1079
1080 out_save_flags:
1081         devdata->old_flags = netdev->flags;
1082 }
1083
1084 /* visornic_xmit_timeout - request to timeout the xmit
1085  * @netdev: netdevice.
1086  *
1087  * Queue the work and return. Make sure we have not already been informed that
1088  * the IO Partition is gone; if so, we will have already timed-out the xmits.
1089  */
1090 static void visornic_xmit_timeout(struct net_device *netdev)
1091 {
1092         struct visornic_devdata *devdata = netdev_priv(netdev);
1093         unsigned long flags;
1094
1095         spin_lock_irqsave(&devdata->priv_lock, flags);
1096         if (devdata->going_away) {
1097                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1098                 dev_dbg(&devdata->dev->device,
1099                         "%s aborting because device removal pending\n",
1100                         __func__);
1101                 return;
1102         }
1103
1104         /* Ensure that a ServerDown message hasn't been received */
1105         if (!devdata->enabled ||
1106             (devdata->server_down && !devdata->server_change_state)) {
1107                 dev_dbg(&netdev->dev, "%s no processing\n",
1108                         __func__);
1109                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1110                 return;
1111         }
1112         schedule_work(&devdata->timeout_reset);
1113         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1114 }
1115
1116 /* repost_return - repost rcv bufs that have come back
1117  * @cmdrsp: IO channel command struct to post.
1118  * @devdata: Visornic devdata for the device.
1119  * @skb: Socket buffer.
1120  * @netdev: netdevice.
1121  *
1122  * Repost rcv buffers that have been returned to us when we are finished
1123  * with them.
1124  *
1125  * Return: 0 for success, negative integer on error.
1126  */
1127 static int repost_return(struct uiscmdrsp *cmdrsp,
1128                          struct visornic_devdata *devdata,
1129                          struct sk_buff *skb, struct net_device *netdev)
1130 {
1131         struct net_pkt_rcv copy;
1132         int i = 0, cc, numreposted;
1133         int found_skb = 0;
1134         int status = 0;
1135
1136         copy = cmdrsp->net.rcv;
1137         switch (copy.numrcvbufs) {
1138         case 0:
1139                 devdata->n_rcv0++;
1140                 break;
1141         case 1:
1142                 devdata->n_rcv1++;
1143                 break;
1144         case 2:
1145                 devdata->n_rcv2++;
1146                 break;
1147         default:
1148                 devdata->n_rcvx++;
1149                 break;
1150         }
1151         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1152                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1153                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1154                                 continue;
1155
1156                         if ((skb) && devdata->rcvbuf[i] == skb) {
1157                                 devdata->found_repost_rcvbuf_cnt++;
1158                                 found_skb = 1;
1159                                 devdata->repost_found_skb_cnt++;
1160                         }
1161                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1162                         if (!devdata->rcvbuf[i]) {
1163                                 devdata->num_rcv_bufs_could_not_alloc++;
1164                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1165                                 status = -ENOMEM;
1166                                 break;
1167                         }
1168                         status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1169                         if (status) {
1170                                 kfree_skb(devdata->rcvbuf[i]);
1171                                 devdata->rcvbuf[i] = NULL;
1172                                 break;
1173                         }
1174                         numreposted++;
1175                         break;
1176                 }
1177         }
1178         if (numreposted != copy.numrcvbufs) {
1179                 devdata->n_repost_deficit++;
1180                 status = -EINVAL;
1181         }
1182         if (skb) {
1183                 if (found_skb) {
1184                         kfree_skb(skb);
1185                 } else {
1186                         status = -EINVAL;
1187                         devdata->bad_rcv_buf++;
1188                 }
1189         }
1190         return status;
1191 }
1192
1193 /* visornic_rx - handle receive packets coming back from IO Partition
1194  * @cmdrsp: Receive packet returned from IO Partition.
1195  *
1196  * Got a receive packet back from the IO Partition; handle it and send it up
1197  * the stack.
1198
1199  * Return: 1 iff an skb was received, otherwise 0.
1200  */
1201 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1202 {
1203         struct visornic_devdata *devdata;
1204         struct sk_buff *skb, *prev, *curr;
1205         struct net_device *netdev;
1206         int cc, currsize, off;
1207         struct ethhdr *eth;
1208         unsigned long flags;
1209
1210         /* post new rcv buf to the other end using the cmdrsp we have at hand
1211          * post it without holding lock - but we'll use the signal lock to
1212          * synchronize the queue insert the cmdrsp that contains the net.rcv
1213          * is the one we are using to repost, so copy the info we need from it.
1214          */
1215         skb = cmdrsp->net.buf;
1216         netdev = skb->dev;
1217
1218         devdata = netdev_priv(netdev);
1219
1220         spin_lock_irqsave(&devdata->priv_lock, flags);
1221         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1222
1223         /* set length to how much was ACTUALLY received -
1224          * NOTE: rcv_done_len includes actual length of data rcvd
1225          * including ethhdr
1226          */
1227         skb->len = cmdrsp->net.rcv.rcv_done_len;
1228
1229         /* update rcv stats - call it with priv_lock held */
1230         devdata->net_stats.rx_packets++;
1231         devdata->net_stats.rx_bytes += skb->len;
1232
1233         /* test enabled while holding lock */
1234         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1235                 /* don't process it unless we're in enable mode and until
1236                  * we've gotten an ACK saying the other end got our RCV enable
1237                  */
1238                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1239                 repost_return(cmdrsp, devdata, skb, netdev);
1240                 return 0;
1241         }
1242
1243         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1244
1245         /* when skb was allocated, skb->dev, skb->data, skb->len and
1246          * skb->data_len were setup. AND, data has already put into the
1247          * skb (both first frag and in frags pages)
1248          * NOTE: firstfragslen is the amount of data in skb->data and that
1249          * which is not in nr_frags or frag_list. This is now simply
1250          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1251          * firstfrag & set data_len to show rest see if we have to chain
1252          * frag_list.
1253          */
1254         /* do PRECAUTIONARY check */
1255         if (skb->len > RCVPOST_BUF_SIZE) {
1256                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1257                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1258                                 dev_err(&devdata->netdev->dev,
1259                                         "repost_return failed");
1260                         return 0;
1261                 }
1262                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1263                 /* amount in skb->data */
1264                 skb->tail += RCVPOST_BUF_SIZE;
1265                 /* amount that will be in frag_list */
1266                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1267         } else {
1268                 /* data fits in this skb - no chaining - do
1269                  * PRECAUTIONARY check
1270                  */
1271                 /* should be 1 */
1272                 if (cmdrsp->net.rcv.numrcvbufs != 1) {
1273                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1274                                 dev_err(&devdata->netdev->dev,
1275                                         "repost_return failed");
1276                         return 0;
1277                 }
1278                 skb->tail += skb->len;
1279                 /* nothing rcvd in frag_list */
1280                 skb->data_len = 0;
1281         }
1282         off = skb_tail_pointer(skb) - skb->data;
1283
1284         /* amount we bumped tail by in the head skb
1285          * it is used to calculate the size of each chained skb below
1286          * it is also used to index into bufline to continue the copy
1287          * (for chansocktwopc)
1288          * if necessary chain the rcv skbs together.
1289          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1290          * chain the rest to that one.
1291          * - do PRECAUTIONARY check
1292          */
1293         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1294                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1295                         dev_err(&devdata->netdev->dev, "repost_return failed");
1296                 return 0;
1297         }
1298
1299         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1300                 /* chain the various rcv buffers into the skb's frag_list. */
1301                 /* Note: off was initialized above  */
1302                 for (cc = 1, prev = NULL;
1303                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1304                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1305                         curr->next = NULL;
1306                         /* start of list- set head */
1307                         if (!prev)
1308                                 skb_shinfo(skb)->frag_list = curr;
1309                         else
1310                                 prev->next = curr;
1311                         prev = curr;
1312
1313                         /* should we set skb->len and skb->data_len for each
1314                          * buffer being chained??? can't hurt!
1315                          */
1316                         currsize = min(skb->len - off,
1317                                        (unsigned int)RCVPOST_BUF_SIZE);
1318                         curr->len = currsize;
1319                         curr->tail += currsize;
1320                         curr->data_len = 0;
1321                         off += currsize;
1322                 }
1323                 /* assert skb->len == off */
1324                 if (skb->len != off) {
1325                         netdev_err(devdata->netdev,
1326                                    "something wrong; skb->len:%d != off:%d\n",
1327                                    skb->len, off);
1328                 }
1329         }
1330
1331         /* set up packet's protocol type using ethernet header - this
1332          * sets up skb->pkt_type & it also PULLS out the eth header
1333          */
1334         skb->protocol = eth_type_trans(skb, netdev);
1335         eth = eth_hdr(skb);
1336         skb->csum = 0;
1337         skb->ip_summed = CHECKSUM_NONE;
1338
1339         do {
1340                 /* accept all packets */
1341                 if (netdev->flags & IFF_PROMISC)
1342                         break;
1343                 if (skb->pkt_type == PACKET_BROADCAST) {
1344                         /* accept all broadcast packets */
1345                         if (netdev->flags & IFF_BROADCAST)
1346                                 break;
1347                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1348                         if ((netdev->flags & IFF_MULTICAST) &&
1349                             (netdev_mc_count(netdev))) {
1350                                 struct netdev_hw_addr *ha;
1351                                 int found_mc = 0;
1352
1353                                 /* only accept multicast packets that we can
1354                                  * find in our multicast address list
1355                                  */
1356                                 netdev_for_each_mc_addr(ha, netdev) {
1357                                         if (ether_addr_equal(eth->h_dest,
1358                                                              ha->addr)) {
1359                                                 found_mc = 1;
1360                                                 break;
1361                                         }
1362                                 }
1363                                 /* accept pkt, dest matches a multicast addr */
1364                                 if (found_mc)
1365                                         break;
1366                         }
1367                 /* accept packet, h_dest must match vnic  mac address */
1368                 } else if (skb->pkt_type == PACKET_HOST) {
1369                         break;
1370                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1371                         /* something is not right */
1372                         dev_err(&devdata->netdev->dev,
1373                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1374                                 netdev->name, eth->h_dest, netdev->dev_addr);
1375                 }
1376                 /* drop packet - don't forward it up to OS */
1377                 devdata->n_rcv_packets_not_accepted++;
1378                 repost_return(cmdrsp, devdata, skb, netdev);
1379                 return 0;
1380         } while (0);
1381
1382         netif_receive_skb(skb);
1383         /* netif_rx returns various values, but "in practice most drivers
1384          * ignore the return value
1385          */
1386
1387         skb = NULL;
1388         /* whether the packet got dropped or handled, the skb is freed by
1389          * kernel code, so we shouldn't free it. but we should repost a
1390          * new rcv buffer.
1391          */
1392         repost_return(cmdrsp, devdata, skb, netdev);
1393         return 1;
1394 }
1395
1396 /* devdata_initialize - initialize devdata structure
1397  * @devdata: visornic_devdata structure to initialize.
1398  * @dev:     visorbus_device it belongs to.
1399  *
1400  * Setup initial values for the visornic, based on channel and default values.
1401  *
1402  * Return: A pointer to the devdata structure.
1403  */
1404 static struct visornic_devdata *devdata_initialize(
1405                                         struct visornic_devdata *devdata,
1406                                         struct visor_device *dev)
1407 {
1408         devdata->dev = dev;
1409         devdata->incarnation_id = get_jiffies_64();
1410         return devdata;
1411 }
1412
1413 /* devdata_release - free up references in devdata
1414  * @devdata: Struct to clean up.
1415  */
1416 static void devdata_release(struct visornic_devdata *devdata)
1417 {
1418         kfree(devdata->rcvbuf);
1419         kfree(devdata->cmdrsp_rcv);
1420         kfree(devdata->xmit_cmdrsp);
1421 }
1422
1423 static const struct net_device_ops visornic_dev_ops = {
1424         .ndo_open = visornic_open,
1425         .ndo_stop = visornic_close,
1426         .ndo_start_xmit = visornic_xmit,
1427         .ndo_get_stats = visornic_get_stats,
1428         .ndo_change_mtu = visornic_change_mtu,
1429         .ndo_tx_timeout = visornic_xmit_timeout,
1430         .ndo_set_rx_mode = visornic_set_multi,
1431 };
1432
1433 /* DebugFS code */
1434 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1435                                  size_t len, loff_t *offset)
1436 {
1437         ssize_t bytes_read = 0;
1438         int str_pos = 0;
1439         struct visornic_devdata *devdata;
1440         struct net_device *dev;
1441         char *vbuf;
1442
1443         if (len > MAX_BUF)
1444                 len = MAX_BUF;
1445         vbuf = kzalloc(len, GFP_KERNEL);
1446         if (!vbuf)
1447                 return -ENOMEM;
1448
1449         /* for each vnic channel dump out channel specific data */
1450         rcu_read_lock();
1451         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1452                 /* Only consider netdevs that are visornic, and are open */
1453                 if ((dev->netdev_ops != &visornic_dev_ops) ||
1454                     (!netif_queue_stopped(dev)))
1455                         continue;
1456
1457                 devdata = netdev_priv(dev);
1458                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1460                                      dev->name,
1461                                      dev,
1462                                      dev->dev_addr);
1463                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1464                                      "VisorNic Dev Info = 0x%p\n", devdata);
1465                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466                                      " num_rcv_bufs = %d\n",
1467                                      devdata->num_rcv_bufs);
1468                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469                                      " max_outstanding_next_xmits = %lu\n",
1470                                     devdata->max_outstanding_net_xmits);
1471                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472                                      " upper_threshold_net_xmits = %lu\n",
1473                                      devdata->upper_threshold_net_xmits);
1474                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475                                      " lower_threshold_net_xmits = %lu\n",
1476                                      devdata->lower_threshold_net_xmits);
1477                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1478                                      " queuefullmsg_logged = %d\n",
1479                                      devdata->queuefullmsg_logged);
1480                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1481                                      " chstat.got_rcv = %lu\n",
1482                                      devdata->chstat.got_rcv);
1483                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1484                                      " chstat.got_enbdisack = %lu\n",
1485                                      devdata->chstat.got_enbdisack);
1486                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1487                                      " chstat.got_xmit_done = %lu\n",
1488                                      devdata->chstat.got_xmit_done);
1489                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1490                                      " chstat.xmit_fail = %lu\n",
1491                                      devdata->chstat.xmit_fail);
1492                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1493                                      " chstat.sent_enbdis = %lu\n",
1494                                      devdata->chstat.sent_enbdis);
1495                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1496                                      " chstat.sent_promisc = %lu\n",
1497                                      devdata->chstat.sent_promisc);
1498                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1499                                      " chstat.sent_post = %lu\n",
1500                                      devdata->chstat.sent_post);
1501                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1502                                      " chstat.sent_post_failed = %lu\n",
1503                                      devdata->chstat.sent_post_failed);
1504                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505                                      " chstat.sent_xmit = %lu\n",
1506                                      devdata->chstat.sent_xmit);
1507                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1508                                      " chstat.reject_count = %lu\n",
1509                                      devdata->chstat.reject_count);
1510                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511                                      " chstat.extra_rcvbufs_sent = %lu\n",
1512                                      devdata->chstat.extra_rcvbufs_sent);
1513                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1514                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1515                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1517                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1519                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1521                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522                                      " num_rcvbuf_in_iovm = %d\n",
1523                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1524                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1525                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1526                                      devdata->alloc_failed_in_if_needed_cnt);
1527                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1529                                      devdata->alloc_failed_in_repost_rtn_cnt);
1530                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1532                  *                   devdata->inner_loop_limit_reached_cnt);
1533                  */
1534                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1535                                      " found_repost_rcvbuf_cnt = %lu\n",
1536                                      devdata->found_repost_rcvbuf_cnt);
1537                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1538                                      " repost_found_skb_cnt = %lu\n",
1539                                      devdata->repost_found_skb_cnt);
1540                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1541                                      " n_repost_deficit = %lu\n",
1542                                      devdata->n_repost_deficit);
1543                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1544                                      " bad_rcv_buf = %lu\n",
1545                                      devdata->bad_rcv_buf);
1546                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1547                                      " n_rcv_packets_not_accepted = %lu\n",
1548                                      devdata->n_rcv_packets_not_accepted);
1549                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1550                                      " interrupts_rcvd = %llu\n",
1551                                      devdata->interrupts_rcvd);
1552                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1553                                      " interrupts_notme = %llu\n",
1554                                      devdata->interrupts_notme);
1555                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1556                                      " interrupts_disabled = %llu\n",
1557                                      devdata->interrupts_disabled);
1558                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1559                                      " busy_cnt = %llu\n",
1560                                      devdata->busy_cnt);
1561                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1562                                      " flow_control_upper_hits = %llu\n",
1563                                      devdata->flow_control_upper_hits);
1564                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1565                                      " flow_control_lower_hits = %llu\n",
1566                                      devdata->flow_control_lower_hits);
1567                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1568                                      " netif_queue = %s\n",
1569                                      netif_queue_stopped(devdata->netdev) ?
1570                                      "stopped" : "running");
1571                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1572                                      " xmits_outstanding = %lu\n",
1573                                      devdata_xmits_outstanding(devdata));
1574         }
1575         rcu_read_unlock();
1576         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1577         kfree(vbuf);
1578         return bytes_read;
1579 }
1580
1581 static struct dentry *visornic_debugfs_dir;
1582 static const struct file_operations debugfs_info_fops = {
1583         .read = info_debugfs_read,
1584 };
1585
1586 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1587  * @devdata: Visornic device.
1588  */
1589 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1590 {
1591         int i;
1592         struct net_device *netdev;
1593         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1594         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1595         int err;
1596
1597         /* don't do this until vnic is marked ready */
1598         if (!(devdata->enabled && devdata->enab_dis_acked))
1599                 return;
1600
1601         netdev = devdata->netdev;
1602         rcv_bufs_allocated = 0;
1603         /* this code is trying to prevent getting stuck here forever,
1604          * but still retry it if you cant allocate them all this time.
1605          */
1606         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1607         while (cur_num_rcv_bufs_to_alloc > 0) {
1608                 cur_num_rcv_bufs_to_alloc--;
1609                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1610                         if (devdata->rcvbuf[i])
1611                                 continue;
1612                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1613                         if (!devdata->rcvbuf[i]) {
1614                                 devdata->alloc_failed_in_if_needed_cnt++;
1615                                 break;
1616                         }
1617                         rcv_bufs_allocated++;
1618                         err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1619                         if (err) {
1620                                 kfree_skb(devdata->rcvbuf[i]);
1621                                 devdata->rcvbuf[i] = NULL;
1622                                 break;
1623                         }
1624                         devdata->chstat.extra_rcvbufs_sent++;
1625                 }
1626         }
1627         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1628 }
1629
1630 /* drain_resp_queue - drains and ignores all messages from the resp queue
1631  * @cmdrsp:  IO channel command response message.
1632  * @devdata: Visornic device to drain.
1633  */
1634 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1635                              struct visornic_devdata *devdata)
1636 {
1637         while (!visorchannel_signalremove(devdata->dev->visorchannel,
1638                                           IOCHAN_FROM_IOPART,
1639                                           cmdrsp))
1640                 ;
1641 }
1642
1643 /* service_resp_queue - drain the response queue
1644  * @cmdrsp:  IO channel command response message.
1645  * @devdata: Visornic device to drain.
1646  * @rx_work_done:
1647  * @budget:
1648  *
1649  * Drain the response queue of any responses from the IO Partition. Process the
1650  * responses as we get them.
1651  */
1652 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1653                                struct visornic_devdata *devdata,
1654                                int *rx_work_done, int budget)
1655 {
1656         unsigned long flags;
1657         struct net_device *netdev;
1658
1659         while (*rx_work_done < budget) {
1660                 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1661                  * moment
1662                  */
1663                 /* queue empty */
1664                 if (visorchannel_signalremove(devdata->dev->visorchannel,
1665                                               IOCHAN_FROM_IOPART,
1666                                               cmdrsp))
1667                         break;
1668
1669                 switch (cmdrsp->net.type) {
1670                 case NET_RCV:
1671                         devdata->chstat.got_rcv++;
1672                         /* process incoming packet */
1673                         *rx_work_done += visornic_rx(cmdrsp);
1674                         break;
1675                 case NET_XMIT_DONE:
1676                         spin_lock_irqsave(&devdata->priv_lock, flags);
1677                         devdata->chstat.got_xmit_done++;
1678                         if (cmdrsp->net.xmtdone.xmt_done_result)
1679                                 devdata->chstat.xmit_fail++;
1680                         /* only call queue wake if we stopped it */
1681                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1682                         /* ASSERT netdev == vnicinfo->netdev; */
1683                         if ((netdev == devdata->netdev) &&
1684                             netif_queue_stopped(netdev)) {
1685                                 /* check if we have crossed the lower watermark
1686                                  * for netif_wake_queue()
1687                                  */
1688                                 if (vnic_hit_low_watermark
1689                                     (devdata,
1690                                      devdata->lower_threshold_net_xmits)) {
1691                                         /* enough NET_XMITs completed
1692                                          * so can restart netif queue
1693                                          */
1694                                         netif_wake_queue(netdev);
1695                                         devdata->flow_control_lower_hits++;
1696                                 }
1697                         }
1698                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1699                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1700                         kfree_skb(cmdrsp->net.buf);
1701                         break;
1702                 case NET_RCV_ENBDIS_ACK:
1703                         devdata->chstat.got_enbdisack++;
1704                         netdev = (struct net_device *)
1705                         cmdrsp->net.enbdis.context;
1706                         spin_lock_irqsave(&devdata->priv_lock, flags);
1707                         devdata->enab_dis_acked = 1;
1708                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1709
1710                         if (devdata->server_down &&
1711                             devdata->server_change_state) {
1712                                 /* Inform Linux that the link is up */
1713                                 devdata->server_down = false;
1714                                 devdata->server_change_state = false;
1715                                 netif_wake_queue(netdev);
1716                                 netif_carrier_on(netdev);
1717                         }
1718                         break;
1719                 case NET_CONNECT_STATUS:
1720                         netdev = devdata->netdev;
1721                         if (cmdrsp->net.enbdis.enable == 1) {
1722                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1723                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1724                                 spin_unlock_irqrestore(&devdata->priv_lock,
1725                                                        flags);
1726                                 netif_wake_queue(netdev);
1727                                 netif_carrier_on(netdev);
1728                         } else {
1729                                 netif_stop_queue(netdev);
1730                                 netif_carrier_off(netdev);
1731                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1732                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1733                                 spin_unlock_irqrestore(&devdata->priv_lock,
1734                                                        flags);
1735                         }
1736                         break;
1737                 default:
1738                         break;
1739                 }
1740                 /* cmdrsp is now available for reuse  */
1741         }
1742 }
1743
1744 static int visornic_poll(struct napi_struct *napi, int budget)
1745 {
1746         struct visornic_devdata *devdata = container_of(napi,
1747                                                         struct visornic_devdata,
1748                                                         napi);
1749         int rx_count = 0;
1750
1751         send_rcv_posts_if_needed(devdata);
1752         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1753
1754         /* If there aren't any more packets to receive stop the poll */
1755         if (rx_count < budget)
1756                 napi_complete_done(napi, rx_count);
1757
1758         return rx_count;
1759 }
1760
1761 /* poll_for_irq - checks the status of the response queue
1762  * @v: Void pointer to the visronic devdata struct.
1763  *
1764  * Main function of the vnic_incoming thread. Periodically check the response
1765  * queue and drain it if needed.
1766  */
1767 static void poll_for_irq(unsigned long v)
1768 {
1769         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1770
1771         if (!visorchannel_signalempty(
1772                                    devdata->dev->visorchannel,
1773                                    IOCHAN_FROM_IOPART))
1774                 napi_schedule(&devdata->napi);
1775
1776         atomic_set(&devdata->interrupt_rcvd, 0);
1777
1778         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1779 }
1780
1781 /* visornic_probe - probe function for visornic devices
1782  * @dev: The visor device discovered.
1783  *
1784  * Called when visorbus discovers a visornic device on its bus. It creates a new
1785  * visornic ethernet adapter.
1786  *
1787  * Return: 0 on success, or negative integer on error.
1788  */
1789 static int visornic_probe(struct visor_device *dev)
1790 {
1791         struct visornic_devdata *devdata = NULL;
1792         struct net_device *netdev = NULL;
1793         int err;
1794         int channel_offset = 0;
1795         u64 features;
1796
1797         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1798         if (!netdev) {
1799                 dev_err(&dev->device,
1800                         "%s alloc_etherdev failed\n", __func__);
1801                 return -ENOMEM;
1802         }
1803
1804         netdev->netdev_ops = &visornic_dev_ops;
1805         netdev->watchdog_timeo = 5 * HZ;
1806         SET_NETDEV_DEV(netdev, &dev->device);
1807
1808         /* Get MAC address from channel and read it into the device. */
1809         netdev->addr_len = ETH_ALEN;
1810         channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1811         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1812                                     ETH_ALEN);
1813         if (err < 0) {
1814                 dev_err(&dev->device,
1815                         "%s failed to get mac addr from chan (%d)\n",
1816                         __func__, err);
1817                 goto cleanup_netdev;
1818         }
1819
1820         devdata = devdata_initialize(netdev_priv(netdev), dev);
1821         if (!devdata) {
1822                 dev_err(&dev->device,
1823                         "%s devdata_initialize failed\n", __func__);
1824                 err = -ENOMEM;
1825                 goto cleanup_netdev;
1826         }
1827         /* don't trust messages laying around in the channel */
1828         drain_resp_queue(devdata->cmdrsp, devdata);
1829
1830         devdata->netdev = netdev;
1831         dev_set_drvdata(&dev->device, devdata);
1832         init_waitqueue_head(&devdata->rsp_queue);
1833         spin_lock_init(&devdata->priv_lock);
1834         /* not yet */
1835         devdata->enabled = 0;
1836         atomic_set(&devdata->usage, 1);
1837
1838         /* Setup rcv bufs */
1839         channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1840         err = visorbus_read_channel(dev, channel_offset,
1841                                     &devdata->num_rcv_bufs, 4);
1842         if (err) {
1843                 dev_err(&dev->device,
1844                         "%s failed to get #rcv bufs from chan (%d)\n",
1845                         __func__, err);
1846                 goto cleanup_netdev;
1847         }
1848
1849         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1850                                   sizeof(struct sk_buff *), GFP_KERNEL);
1851         if (!devdata->rcvbuf) {
1852                 err = -ENOMEM;
1853                 goto cleanup_netdev;
1854         }
1855
1856         /* set the net_xmit outstanding threshold
1857          * always leave two slots open but you should have 3 at a minimum
1858          * note that max_outstanding_net_xmits must be > 0
1859          */
1860         devdata->max_outstanding_net_xmits =
1861                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1862         devdata->upper_threshold_net_xmits =
1863                 max_t(unsigned long,
1864                       2, (devdata->max_outstanding_net_xmits - 1));
1865         devdata->lower_threshold_net_xmits =
1866                 max_t(unsigned long,
1867                       1, (devdata->max_outstanding_net_xmits / 2));
1868
1869         skb_queue_head_init(&devdata->xmitbufhead);
1870
1871         /* create a cmdrsp we can use to post and unpost rcv buffers */
1872         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1873         if (!devdata->cmdrsp_rcv) {
1874                 err = -ENOMEM;
1875                 goto cleanup_rcvbuf;
1876         }
1877         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1878         if (!devdata->xmit_cmdrsp) {
1879                 err = -ENOMEM;
1880                 goto cleanup_cmdrsp_rcv;
1881         }
1882         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1883         devdata->server_down = false;
1884         devdata->server_change_state = false;
1885
1886         /*set the default mtu */
1887         channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1888         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1889         if (err) {
1890                 dev_err(&dev->device,
1891                         "%s failed to get mtu from chan (%d)\n",
1892                         __func__, err);
1893                 goto cleanup_xmit_cmdrsp;
1894         }
1895
1896         /* TODO: Setup Interrupt information */
1897         /* Let's start our threads to get responses */
1898         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1899
1900         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1901                     (unsigned long)devdata);
1902         /* Note: This time has to start running before the while
1903          * loop below because the napi routine is responsible for
1904          * setting enab_dis_acked
1905          */
1906         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1907
1908         channel_offset = offsetof(struct visor_io_channel,
1909                                   channel_header.features);
1910         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1911         if (err) {
1912                 dev_err(&dev->device,
1913                         "%s failed to get features from chan (%d)\n",
1914                         __func__, err);
1915                 goto cleanup_napi_add;
1916         }
1917
1918         features |= VISOR_CHANNEL_IS_POLLING;
1919         features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1920         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1921         if (err) {
1922                 dev_err(&dev->device,
1923                         "%s failed to set features in chan (%d)\n",
1924                         __func__, err);
1925                 goto cleanup_napi_add;
1926         }
1927
1928         /* Note: Interrupts have to be enable before the while
1929          * loop below because the napi routine is responsible for
1930          * setting enab_dis_acked
1931          */
1932         visorbus_enable_channel_interrupts(dev);
1933
1934         err = register_netdev(netdev);
1935         if (err) {
1936                 dev_err(&dev->device,
1937                         "%s register_netdev failed (%d)\n", __func__, err);
1938                 goto cleanup_napi_add;
1939         }
1940
1941         /* create debug/sysfs directories */
1942         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1943                                                       visornic_debugfs_dir);
1944         if (!devdata->eth_debugfs_dir) {
1945                 dev_err(&dev->device,
1946                         "%s debugfs_create_dir %s failed\n",
1947                         __func__, netdev->name);
1948                 err = -ENOMEM;
1949                 goto cleanup_register_netdev;
1950         }
1951
1952         dev_info(&dev->device, "%s success netdev=%s\n",
1953                  __func__, netdev->name);
1954         return 0;
1955
1956 cleanup_register_netdev:
1957         unregister_netdev(netdev);
1958
1959 cleanup_napi_add:
1960         del_timer_sync(&devdata->irq_poll_timer);
1961         netif_napi_del(&devdata->napi);
1962
1963 cleanup_xmit_cmdrsp:
1964         kfree(devdata->xmit_cmdrsp);
1965
1966 cleanup_cmdrsp_rcv:
1967         kfree(devdata->cmdrsp_rcv);
1968
1969 cleanup_rcvbuf:
1970         kfree(devdata->rcvbuf);
1971
1972 cleanup_netdev:
1973         free_netdev(netdev);
1974         return err;
1975 }
1976
1977 /* host_side_disappeared - IO Partition is gone
1978  * @devdata: Device object.
1979  *
1980  * IO partition servicing this device is gone; do cleanup.
1981  */
1982 static void host_side_disappeared(struct visornic_devdata *devdata)
1983 {
1984         unsigned long flags;
1985
1986         spin_lock_irqsave(&devdata->priv_lock, flags);
1987         /* indicate device destroyed */
1988         devdata->dev = NULL;
1989         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1990 }
1991
1992 /* visornic_remove - called when visornic dev goes away
1993  * @dev: Visornic device that is being removed.
1994  *
1995  * Called when DEVICE_DESTROY gets called to remove device.
1996  */
1997 static void visornic_remove(struct visor_device *dev)
1998 {
1999         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2000         struct net_device *netdev;
2001         unsigned long flags;
2002
2003         if (!devdata) {
2004                 dev_err(&dev->device, "%s no devdata\n", __func__);
2005                 return;
2006         }
2007         spin_lock_irqsave(&devdata->priv_lock, flags);
2008         if (devdata->going_away) {
2009                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2010                 dev_err(&dev->device, "%s already being removed\n", __func__);
2011                 return;
2012         }
2013         devdata->going_away = true;
2014         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2015         netdev = devdata->netdev;
2016         if (!netdev) {
2017                 dev_err(&dev->device, "%s not net device\n", __func__);
2018                 return;
2019         }
2020
2021         /* going_away prevents new items being added to the workqueues */
2022         cancel_work_sync(&devdata->timeout_reset);
2023
2024         debugfs_remove_recursive(devdata->eth_debugfs_dir);
2025         /* this will call visornic_close() */
2026         unregister_netdev(netdev);
2027
2028         del_timer_sync(&devdata->irq_poll_timer);
2029         netif_napi_del(&devdata->napi);
2030
2031         dev_set_drvdata(&dev->device, NULL);
2032         host_side_disappeared(devdata);
2033         devdata_release(devdata);
2034         free_netdev(netdev);
2035 }
2036
2037 /* visornic_pause - called when IO Part disappears
2038  * @dev:           Visornic device that is being serviced.
2039  * @complete_func: Call when finished.
2040  *
2041  * Called when the IO Partition has gone down. Need to free up resources and
2042  * wait for IO partition to come back. Mark link as down and don't attempt any
2043  * DMA. When we have freed memory, call the complete_func so that Command knows
2044  * we are done. If we don't call complete_func, the IO Partition will never
2045  * come back.
2046  *
2047  * Return: 0 on success.
2048  */
2049 static int visornic_pause(struct visor_device *dev,
2050                           visorbus_state_complete_func complete_func)
2051 {
2052         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2053
2054         visornic_serverdown(devdata, complete_func);
2055         return 0;
2056 }
2057
2058 /* visornic_resume - called when IO Partition has recovered
2059  * @dev:           Visornic device that is being serviced.
2060  * @compelte_func: Call when finished.
2061  *
2062  * Called when the IO partition has recovered. Re-establish connection to the IO
2063  * Partition and set the link up. Okay to do DMA again.
2064  *
2065  * Returns 0 for success, negative integer on error.
2066  */
2067 static int visornic_resume(struct visor_device *dev,
2068                            visorbus_state_complete_func complete_func)
2069 {
2070         struct visornic_devdata *devdata;
2071         struct net_device *netdev;
2072         unsigned long flags;
2073
2074         devdata = dev_get_drvdata(&dev->device);
2075         if (!devdata) {
2076                 dev_err(&dev->device, "%s no devdata\n", __func__);
2077                 return -EINVAL;
2078         }
2079
2080         netdev = devdata->netdev;
2081
2082         spin_lock_irqsave(&devdata->priv_lock, flags);
2083         if (devdata->server_change_state) {
2084                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2085                 dev_err(&dev->device, "%s server already changing state\n",
2086                         __func__);
2087                 return -EINVAL;
2088         }
2089         if (!devdata->server_down) {
2090                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2091                 dev_err(&dev->device, "%s server not down\n", __func__);
2092                 complete_func(dev, 0);
2093                 return 0;
2094         }
2095         devdata->server_change_state = true;
2096         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2097
2098         /* Must transition channel to ATTACHED state BEFORE
2099          * we can start using the device again.
2100          * TODO: State transitions
2101          */
2102         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2103
2104         rtnl_lock();
2105         dev_open(netdev);
2106         rtnl_unlock();
2107
2108         complete_func(dev, 0);
2109         return 0;
2110 }
2111
2112 /* This is used to tell the visorbus driver which types of visor devices
2113  * we support, and what functions to call when a visor device that we support
2114  * is attached or removed.
2115  */
2116 static struct visor_driver visornic_driver = {
2117         .name = "visornic",
2118         .owner = THIS_MODULE,
2119         .channel_types = visornic_channel_types,
2120         .probe = visornic_probe,
2121         .remove = visornic_remove,
2122         .pause = visornic_pause,
2123         .resume = visornic_resume,
2124         .channel_interrupt = NULL,
2125 };
2126
2127 /* visornic_init - init function
2128  *
2129  * Init function for the visornic driver. Do initial driver setup and wait
2130  * for devices.
2131  *
2132  * Return: 0 on success, negative integer on error.
2133  */
2134 static int visornic_init(void)
2135 {
2136         struct dentry *ret;
2137         int err = -ENOMEM;
2138
2139         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2140         if (!visornic_debugfs_dir)
2141                 return err;
2142
2143         ret = debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2144                                   &debugfs_info_fops);
2145         if (!ret)
2146                 goto cleanup_debugfs;
2147         ret = debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir,
2148                                   NULL, &debugfs_enable_ints_fops);
2149         if (!ret)
2150                 goto cleanup_debugfs;
2151
2152         err = visorbus_register_visor_driver(&visornic_driver);
2153         if (err)
2154                 goto cleanup_debugfs;
2155
2156         return 0;
2157
2158 cleanup_debugfs:
2159         debugfs_remove_recursive(visornic_debugfs_dir);
2160         return err;
2161 }
2162
2163 /* visornic_cleanup - driver exit routine
2164  *
2165  * Unregister driver from the bus and free up memory.
2166  */
2167 static void visornic_cleanup(void)
2168 {
2169         visorbus_unregister_visor_driver(&visornic_driver);
2170         debugfs_remove_recursive(visornic_debugfs_dir);
2171 }
2172
2173 module_init(visornic_init);
2174 module_exit(visornic_cleanup);
2175
2176 MODULE_AUTHOR("Unisys");
2177 MODULE_LICENSE("GPL");
2178 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");