GNU Linux-libre 4.14.290-gnu1
[releases.git] / drivers / infiniband / hw / hfi1 / mad.c
1 /*
2  * Copyright(c) 2015-2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/net.h>
49 #include <rdma/opa_addr.h>
50 #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
51                         / (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
52
53 #include "hfi.h"
54 #include "mad.h"
55 #include "trace.h"
56 #include "qp.h"
57 #include "vnic.h"
58
59 /* the reset value from the FM is supposed to be 0xffff, handle both */
60 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
61 #define OPA_LINK_WIDTH_RESET 0xffff
62
63 struct trap_node {
64         struct list_head list;
65         struct opa_mad_notice_attr data;
66         __be64 tid;
67         int len;
68         u32 retry;
69         u8 in_use;
70         u8 repress;
71 };
72
73 static int smp_length_check(u32 data_size, u32 request_len)
74 {
75         if (unlikely(request_len < data_size))
76                 return -EINVAL;
77
78         return 0;
79 }
80
81 static int reply(struct ib_mad_hdr *smp)
82 {
83         /*
84          * The verbs framework will handle the directed/LID route
85          * packet changes.
86          */
87         smp->method = IB_MGMT_METHOD_GET_RESP;
88         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
89                 smp->status |= IB_SMP_DIRECTION;
90         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
91 }
92
93 static inline void clear_opa_smp_data(struct opa_smp *smp)
94 {
95         void *data = opa_get_smp_data(smp);
96         size_t size = opa_get_smp_data_size(smp);
97
98         memset(data, 0, size);
99 }
100
101 void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
102 {
103         struct ib_event event;
104
105         event.event = IB_EVENT_PKEY_CHANGE;
106         event.device = &dd->verbs_dev.rdi.ibdev;
107         event.element.port_num = port;
108         ib_dispatch_event(&event);
109 }
110
111 /*
112  * If the port is down, clean up all pending traps.  We need to be careful
113  * with the given trap, because it may be queued.
114  */
115 static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
116 {
117         struct trap_node *node, *q;
118         unsigned long flags;
119         struct list_head trap_list;
120         int i;
121
122         for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
123                 spin_lock_irqsave(&ibp->rvp.lock, flags);
124                 list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
125                 ibp->rvp.trap_lists[i].list_len = 0;
126                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
127
128                 /*
129                  * Remove all items from the list, freeing all the non-given
130                  * traps.
131                  */
132                 list_for_each_entry_safe(node, q, &trap_list, list) {
133                         list_del(&node->list);
134                         if (node != trap)
135                                 kfree(node);
136                 }
137         }
138
139         /*
140          * If this wasn't on one of the lists it would not be freed.  If it
141          * was on the list, it is now safe to free.
142          */
143         kfree(trap);
144 }
145
146 static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
147                                             struct trap_node *trap)
148 {
149         struct trap_node *node;
150         struct trap_list *trap_list;
151         unsigned long flags;
152         unsigned long timeout;
153         int found = 0;
154         unsigned int queue_id;
155         static int trap_count;
156
157         queue_id = trap->data.generic_type & 0x0F;
158         if (queue_id >= RVT_MAX_TRAP_LISTS) {
159                 trap_count++;
160                 pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
161                                    trap->data.generic_type, trap_count);
162                 kfree(trap);
163                 return NULL;
164         }
165
166         /*
167          * Since the retry (handle timeout) does not remove a trap request
168          * from the list, all we have to do is compare the node.
169          */
170         spin_lock_irqsave(&ibp->rvp.lock, flags);
171         trap_list = &ibp->rvp.trap_lists[queue_id];
172
173         list_for_each_entry(node, &trap_list->list, list) {
174                 if (node == trap) {
175                         node->retry++;
176                         found = 1;
177                         break;
178                 }
179         }
180
181         /* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
182         if (!found) {
183                 if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
184                         trap_list->list_len++;
185                         list_add_tail(&trap->list, &trap_list->list);
186                 } else {
187                         pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
188                                             trap->data.generic_type);
189                         kfree(trap);
190                 }
191         }
192
193         /*
194          * Next check to see if there is a timer pending.  If not, set it up
195          * and get the first trap from the list.
196          */
197         node = NULL;
198         if (!timer_pending(&ibp->rvp.trap_timer)) {
199                 /*
200                  * o14-2
201                  * If the time out is set we have to wait until it expires
202                  * before the trap can be sent.
203                  * This should be > RVT_TRAP_TIMEOUT
204                  */
205                 timeout = (RVT_TRAP_TIMEOUT *
206                            (1UL << ibp->rvp.subnet_timeout)) / 1000;
207                 mod_timer(&ibp->rvp.trap_timer,
208                           jiffies + usecs_to_jiffies(timeout));
209                 node = list_first_entry(&trap_list->list, struct trap_node,
210                                         list);
211                 node->in_use = 1;
212         }
213         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
214
215         return node;
216 }
217
218 static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
219                                          struct opa_smp *smp)
220 {
221         struct trap_list *trap_list;
222         struct trap_node *trap;
223         unsigned long flags;
224         int i;
225
226         if (smp->attr_id != IB_SMP_ATTR_NOTICE)
227                 return;
228
229         spin_lock_irqsave(&ibp->rvp.lock, flags);
230         for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
231                 trap_list = &ibp->rvp.trap_lists[i];
232                 trap = list_first_entry_or_null(&trap_list->list,
233                                                 struct trap_node, list);
234                 if (trap && trap->tid == smp->tid) {
235                         if (trap->in_use) {
236                                 trap->repress = 1;
237                         } else {
238                                 trap_list->list_len--;
239                                 list_del(&trap->list);
240                                 kfree(trap);
241                         }
242                         break;
243                 }
244         }
245         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
246 }
247
248 static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
249                                    struct rdma_ah_attr *attr, u32 dlid)
250 {
251         rdma_ah_set_dlid(attr, dlid);
252         rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
253         if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
254                 struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
255
256                 rdma_ah_set_ah_flags(attr, IB_AH_GRH);
257                 grh->sgid_index = 0;
258                 grh->hop_limit = 1;
259                 grh->dgid.global.subnet_prefix =
260                         ibp->rvp.gid_prefix;
261                 grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
262         }
263 }
264
265 static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
266                               struct rvt_ah *ah, u32 dlid)
267 {
268         struct rdma_ah_attr attr;
269         struct rvt_qp *qp0;
270         int ret = -EINVAL;
271
272         memset(&attr, 0, sizeof(attr));
273         attr.type = ah->ibah.type;
274         hfi1_update_sm_ah_attr(ibp, &attr, dlid);
275         rcu_read_lock();
276         qp0 = rcu_dereference(ibp->rvp.qp[0]);
277         if (qp0)
278                 ret = rdma_modify_ah(&ah->ibah, &attr);
279         rcu_read_unlock();
280         return ret;
281 }
282
283 static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
284 {
285         struct rdma_ah_attr attr;
286         struct ib_ah *ah = ERR_PTR(-EINVAL);
287         struct rvt_qp *qp0;
288         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
289         struct hfi1_devdata *dd = dd_from_ppd(ppd);
290         u8 port_num = ppd->port;
291
292         memset(&attr, 0, sizeof(attr));
293         attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
294         hfi1_update_sm_ah_attr(ibp, &attr, dlid);
295         rcu_read_lock();
296         qp0 = rcu_dereference(ibp->rvp.qp[0]);
297         if (qp0)
298                 ah = rdma_create_ah(qp0->ibqp.pd, &attr);
299         rcu_read_unlock();
300         return ah;
301 }
302
303 static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
304 {
305         struct ib_mad_send_buf *send_buf;
306         struct ib_mad_agent *agent;
307         struct opa_smp *smp;
308         unsigned long flags;
309         int pkey_idx;
310         u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
311
312         agent = ibp->rvp.send_agent;
313         if (!agent) {
314                 cleanup_traps(ibp, trap);
315                 return;
316         }
317
318         /* o14-3.2.1 */
319         if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
320                 cleanup_traps(ibp, trap);
321                 return;
322         }
323
324         /* Add the trap to the list if necessary and see if we can send it */
325         trap = check_and_add_trap(ibp, trap);
326         if (!trap)
327                 return;
328
329         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
330         if (pkey_idx < 0) {
331                 pr_warn("%s: failed to find limited mgmt pkey, defaulting 0x%x\n",
332                         __func__, hfi1_get_pkey(ibp, 1));
333                 pkey_idx = 1;
334         }
335
336         send_buf = ib_create_send_mad(agent, qpn, pkey_idx, 0,
337                                       IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
338                                       GFP_ATOMIC, IB_MGMT_BASE_VERSION);
339         if (IS_ERR(send_buf))
340                 return;
341
342         smp = send_buf->mad;
343         smp->base_version = OPA_MGMT_BASE_VERSION;
344         smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
345         smp->class_version = OPA_SM_CLASS_VERSION;
346         smp->method = IB_MGMT_METHOD_TRAP;
347
348         /* Only update the transaction ID for new traps (o13-5). */
349         if (trap->tid == 0) {
350                 ibp->rvp.tid++;
351                 /* make sure that tid != 0 */
352                 if (ibp->rvp.tid == 0)
353                         ibp->rvp.tid++;
354                 trap->tid = cpu_to_be64(ibp->rvp.tid);
355         }
356         smp->tid = trap->tid;
357
358         smp->attr_id = IB_SMP_ATTR_NOTICE;
359         /* o14-1: smp->mkey = 0; */
360
361         memcpy(smp->route.lid.data, &trap->data, trap->len);
362
363         spin_lock_irqsave(&ibp->rvp.lock, flags);
364         if (!ibp->rvp.sm_ah) {
365                 if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
366                         struct ib_ah *ah;
367
368                         ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
369                         if (IS_ERR(ah)) {
370                                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
371                                 return;
372                         }
373                         send_buf->ah = ah;
374                         ibp->rvp.sm_ah = ibah_to_rvtah(ah);
375                 } else {
376                         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
377                         return;
378                 }
379         } else {
380                 send_buf->ah = &ibp->rvp.sm_ah->ibah;
381         }
382
383         /*
384          * If the trap was repressed while things were getting set up, don't
385          * bother sending it. This could happen for a retry.
386          */
387         if (trap->repress) {
388                 list_del(&trap->list);
389                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
390                 kfree(trap);
391                 ib_free_send_mad(send_buf);
392                 return;
393         }
394
395         trap->in_use = 0;
396         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
397
398         if (ib_post_send_mad(send_buf, NULL))
399                 ib_free_send_mad(send_buf);
400 }
401
402 void hfi1_handle_trap_timer(unsigned long data)
403 {
404         struct hfi1_ibport *ibp = (struct hfi1_ibport *)data;
405         struct trap_node *trap = NULL;
406         unsigned long flags;
407         int i;
408
409         /* Find the trap with the highest priority */
410         spin_lock_irqsave(&ibp->rvp.lock, flags);
411         for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
412                 trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
413                                                 struct trap_node, list);
414         }
415         spin_unlock_irqrestore(&ibp->rvp.lock, flags);
416
417         if (trap)
418                 send_trap(ibp, trap);
419 }
420
421 static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
422 {
423         struct trap_node *trap;
424
425         trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
426         if (!trap)
427                 return NULL;
428
429         INIT_LIST_HEAD(&trap->list);
430         trap->data.generic_type = type;
431         trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
432         trap->data.trap_num = trap_num;
433         trap->data.issuer_lid = cpu_to_be32(lid);
434
435         return trap;
436 }
437
438 /*
439  * Send a bad P_Key trap (ch. 14.3.8).
440  */
441 void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
442                    u32 qp1, u32 qp2, u32 lid1, u32 lid2)
443 {
444         struct trap_node *trap;
445         u32 lid = ppd_from_ibp(ibp)->lid;
446
447         ibp->rvp.n_pkt_drops++;
448         ibp->rvp.pkey_violations++;
449
450         trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
451                                 lid);
452         if (!trap)
453                 return;
454
455         /* Send violation trap */
456         trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
457         trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
458         trap->data.ntc_257_258.key = cpu_to_be32(key);
459         trap->data.ntc_257_258.sl = sl << 3;
460         trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
461         trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
462
463         trap->len = sizeof(trap->data);
464         send_trap(ibp, trap);
465 }
466
467 /*
468  * Send a bad M_Key trap (ch. 14.3.9).
469  */
470 static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
471                      __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
472 {
473         struct trap_node *trap;
474         u32 lid = ppd_from_ibp(ibp)->lid;
475
476         trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
477                                 lid);
478         if (!trap)
479                 return;
480
481         /* Send violation trap */
482         trap->data.ntc_256.lid = trap->data.issuer_lid;
483         trap->data.ntc_256.method = mad->method;
484         trap->data.ntc_256.attr_id = mad->attr_id;
485         trap->data.ntc_256.attr_mod = mad->attr_mod;
486         trap->data.ntc_256.mkey = mkey;
487         if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
488                 trap->data.ntc_256.dr_slid = dr_slid;
489                 trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
490                 if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
491                         trap->data.ntc_256.dr_trunc_hop |=
492                                 IB_NOTICE_TRAP_DR_TRUNC;
493                         hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
494                 }
495                 trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
496                 memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
497                        hop_cnt);
498         }
499
500         trap->len = sizeof(trap->data);
501
502         send_trap(ibp, trap);
503 }
504
505 /*
506  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
507  */
508 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
509 {
510         struct trap_node *trap;
511         struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
512         struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
513         struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
514         u32 lid = ppd_from_ibp(ibp)->lid;
515
516         trap = create_trap_node(IB_NOTICE_TYPE_INFO,
517                                 OPA_TRAP_CHANGE_CAPABILITY,
518                                 lid);
519         if (!trap)
520                 return;
521
522         trap->data.ntc_144.lid = trap->data.issuer_lid;
523         trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
524         trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
525
526         trap->len = sizeof(trap->data);
527         send_trap(ibp, trap);
528 }
529
530 /*
531  * Send a System Image GUID Changed trap (ch. 14.3.12).
532  */
533 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
534 {
535         struct trap_node *trap;
536         u32 lid = ppd_from_ibp(ibp)->lid;
537
538         trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
539                                 lid);
540         if (!trap)
541                 return;
542
543         trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
544         trap->data.ntc_145.lid = trap->data.issuer_lid;
545
546         trap->len = sizeof(trap->data);
547         send_trap(ibp, trap);
548 }
549
550 /*
551  * Send a Node Description Changed trap (ch. 14.3.13).
552  */
553 void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
554 {
555         struct trap_node *trap;
556         u32 lid = ppd_from_ibp(ibp)->lid;
557
558         trap = create_trap_node(IB_NOTICE_TYPE_INFO,
559                                 OPA_TRAP_CHANGE_CAPABILITY,
560                                 lid);
561         if (!trap)
562                 return;
563
564         trap->data.ntc_144.lid = trap->data.issuer_lid;
565         trap->data.ntc_144.change_flags =
566                 cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
567
568         trap->len = sizeof(trap->data);
569         send_trap(ibp, trap);
570 }
571
572 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
573                                    u8 *data, struct ib_device *ibdev,
574                                    u8 port, u32 *resp_len, u32 max_len)
575 {
576         struct opa_node_description *nd;
577
578         if (am || smp_length_check(sizeof(*nd), max_len)) {
579                 smp->status |= IB_SMP_INVALID_FIELD;
580                 return reply((struct ib_mad_hdr *)smp);
581         }
582
583         nd = (struct opa_node_description *)data;
584
585         memcpy(nd->data, ibdev->node_desc, sizeof(nd->data));
586
587         if (resp_len)
588                 *resp_len += sizeof(*nd);
589
590         return reply((struct ib_mad_hdr *)smp);
591 }
592
593 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
594                                    struct ib_device *ibdev, u8 port,
595                                    u32 *resp_len, u32 max_len)
596 {
597         struct opa_node_info *ni;
598         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
599         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
600
601         ni = (struct opa_node_info *)data;
602
603         /* GUID 0 is illegal */
604         if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
605             smp_length_check(sizeof(*ni), max_len) ||
606             get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
607                 smp->status |= IB_SMP_INVALID_FIELD;
608                 return reply((struct ib_mad_hdr *)smp);
609         }
610
611         ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
612         ni->base_version = OPA_MGMT_BASE_VERSION;
613         ni->class_version = OPA_SM_CLASS_VERSION;
614         ni->node_type = 1;     /* channel adapter */
615         ni->num_ports = ibdev->phys_port_cnt;
616         /* This is already in network order */
617         ni->system_image_guid = ib_hfi1_sys_image_guid;
618         ni->node_guid = ibdev->node_guid;
619         ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
620         ni->device_id = cpu_to_be16(dd->pcidev->device);
621         ni->revision = cpu_to_be32(dd->minrev);
622         ni->local_port_num = port;
623         ni->vendor_id[0] = dd->oui1;
624         ni->vendor_id[1] = dd->oui2;
625         ni->vendor_id[2] = dd->oui3;
626
627         if (resp_len)
628                 *resp_len += sizeof(*ni);
629
630         return reply((struct ib_mad_hdr *)smp);
631 }
632
633 static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
634                              u8 port)
635 {
636         struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
637         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
638         unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
639
640         /* GUID 0 is illegal */
641         if (smp->attr_mod || pidx >= dd->num_pports ||
642             ibdev->node_guid == 0 ||
643             get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
644                 smp->status |= IB_SMP_INVALID_FIELD;
645                 return reply((struct ib_mad_hdr *)smp);
646         }
647
648         nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
649         nip->base_version = OPA_MGMT_BASE_VERSION;
650         nip->class_version = OPA_SM_CLASS_VERSION;
651         nip->node_type = 1;     /* channel adapter */
652         nip->num_ports = ibdev->phys_port_cnt;
653         /* This is already in network order */
654         nip->sys_guid = ib_hfi1_sys_image_guid;
655         nip->node_guid = ibdev->node_guid;
656         nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
657         nip->device_id = cpu_to_be16(dd->pcidev->device);
658         nip->revision = cpu_to_be32(dd->minrev);
659         nip->local_port_num = port;
660         nip->vendor_id[0] = dd->oui1;
661         nip->vendor_id[1] = dd->oui2;
662         nip->vendor_id[2] = dd->oui3;
663
664         return reply((struct ib_mad_hdr *)smp);
665 }
666
667 static void set_link_width_enabled(struct hfi1_pportdata *ppd, u32 w)
668 {
669         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_ENB, w);
670 }
671
672 static void set_link_width_downgrade_enabled(struct hfi1_pportdata *ppd, u32 w)
673 {
674         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LWID_DG_ENB, w);
675 }
676
677 static void set_link_speed_enabled(struct hfi1_pportdata *ppd, u32 s)
678 {
679         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_SPD_ENB, s);
680 }
681
682 static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
683                       int mad_flags, __be64 mkey, __be32 dr_slid,
684                       u8 return_path[], u8 hop_cnt)
685 {
686         int valid_mkey = 0;
687         int ret = 0;
688
689         /* Is the mkey in the process of expiring? */
690         if (ibp->rvp.mkey_lease_timeout &&
691             time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
692                 /* Clear timeout and mkey protection field. */
693                 ibp->rvp.mkey_lease_timeout = 0;
694                 ibp->rvp.mkeyprot = 0;
695         }
696
697         if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
698             ibp->rvp.mkey == mkey)
699                 valid_mkey = 1;
700
701         /* Unset lease timeout on any valid Get/Set/TrapRepress */
702         if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
703             (mad->method == IB_MGMT_METHOD_GET ||
704              mad->method == IB_MGMT_METHOD_SET ||
705              mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
706                 ibp->rvp.mkey_lease_timeout = 0;
707
708         if (!valid_mkey) {
709                 switch (mad->method) {
710                 case IB_MGMT_METHOD_GET:
711                         /* Bad mkey not a violation below level 2 */
712                         if (ibp->rvp.mkeyprot < 2)
713                                 break;
714                 case IB_MGMT_METHOD_SET:
715                 case IB_MGMT_METHOD_TRAP_REPRESS:
716                         if (ibp->rvp.mkey_violations != 0xFFFF)
717                                 ++ibp->rvp.mkey_violations;
718                         if (!ibp->rvp.mkey_lease_timeout &&
719                             ibp->rvp.mkey_lease_period)
720                                 ibp->rvp.mkey_lease_timeout = jiffies +
721                                         ibp->rvp.mkey_lease_period * HZ;
722                         /* Generate a trap notice. */
723                         bad_mkey(ibp, mad, mkey, dr_slid, return_path,
724                                  hop_cnt);
725                         ret = 1;
726                 }
727         }
728
729         return ret;
730 }
731
732 /*
733  * The SMA caches reads from LCB registers in case the LCB is unavailable.
734  * (The LCB is unavailable in certain link states, for example.)
735  */
736 struct lcb_datum {
737         u32 off;
738         u64 val;
739 };
740
741 static struct lcb_datum lcb_cache[] = {
742         { DC_LCB_STS_ROUND_TRIP_LTP_CNT, 0 },
743 };
744
745 static int write_lcb_cache(u32 off, u64 val)
746 {
747         int i;
748
749         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
750                 if (lcb_cache[i].off == off) {
751                         lcb_cache[i].val = val;
752                         return 0;
753                 }
754         }
755
756         pr_warn("%s bad offset 0x%x\n", __func__, off);
757         return -1;
758 }
759
760 static int read_lcb_cache(u32 off, u64 *val)
761 {
762         int i;
763
764         for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
765                 if (lcb_cache[i].off == off) {
766                         *val = lcb_cache[i].val;
767                         return 0;
768                 }
769         }
770
771         pr_warn("%s bad offset 0x%x\n", __func__, off);
772         return -1;
773 }
774
775 void read_ltp_rtt(struct hfi1_devdata *dd)
776 {
777         u64 reg;
778
779         if (read_lcb_csr(dd, DC_LCB_STS_ROUND_TRIP_LTP_CNT, &reg))
780                 dd_dev_err(dd, "%s: unable to read LTP RTT\n", __func__);
781         else
782                 write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
783 }
784
785 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
786                                    struct ib_device *ibdev, u8 port,
787                                    u32 *resp_len, u32 max_len)
788 {
789         int i;
790         struct hfi1_devdata *dd;
791         struct hfi1_pportdata *ppd;
792         struct hfi1_ibport *ibp;
793         struct opa_port_info *pi = (struct opa_port_info *)data;
794         u8 mtu;
795         u8 credit_rate;
796         u8 is_beaconing_active;
797         u32 state;
798         u32 num_ports = OPA_AM_NPORT(am);
799         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
800         u32 buffer_units;
801         u64 tmp = 0;
802
803         if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
804                 smp->status |= IB_SMP_INVALID_FIELD;
805                 return reply((struct ib_mad_hdr *)smp);
806         }
807
808         dd = dd_from_ibdev(ibdev);
809         /* IB numbers ports from 1, hw from 0 */
810         ppd = dd->pport + (port - 1);
811         ibp = &ppd->ibport_data;
812
813         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
814             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
815                 smp->status |= IB_SMP_INVALID_FIELD;
816                 return reply((struct ib_mad_hdr *)smp);
817         }
818
819         pi->lid = cpu_to_be32(ppd->lid);
820
821         /* Only return the mkey if the protection field allows it. */
822         if (!(smp->method == IB_MGMT_METHOD_GET &&
823               ibp->rvp.mkey != smp->mkey &&
824               ibp->rvp.mkeyprot == 1))
825                 pi->mkey = ibp->rvp.mkey;
826
827         pi->subnet_prefix = ibp->rvp.gid_prefix;
828         pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
829         pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
830         pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
831         pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
832         pi->sa_qp = cpu_to_be32(ppd->sa_qp);
833
834         pi->link_width.enabled = cpu_to_be16(ppd->link_width_enabled);
835         pi->link_width.supported = cpu_to_be16(ppd->link_width_supported);
836         pi->link_width.active = cpu_to_be16(ppd->link_width_active);
837
838         pi->link_width_downgrade.supported =
839                         cpu_to_be16(ppd->link_width_downgrade_supported);
840         pi->link_width_downgrade.enabled =
841                         cpu_to_be16(ppd->link_width_downgrade_enabled);
842         pi->link_width_downgrade.tx_active =
843                         cpu_to_be16(ppd->link_width_downgrade_tx_active);
844         pi->link_width_downgrade.rx_active =
845                         cpu_to_be16(ppd->link_width_downgrade_rx_active);
846
847         pi->link_speed.supported = cpu_to_be16(ppd->link_speed_supported);
848         pi->link_speed.active = cpu_to_be16(ppd->link_speed_active);
849         pi->link_speed.enabled = cpu_to_be16(ppd->link_speed_enabled);
850
851         state = driver_lstate(ppd);
852
853         if (start_of_sm_config && (state == IB_PORT_INIT))
854                 ppd->is_sm_config_started = 1;
855
856         pi->port_phys_conf = (ppd->port_type & 0xf);
857
858         pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
859         pi->port_states.ledenable_offlinereason |=
860                 ppd->is_sm_config_started << 5;
861         /*
862          * This pairs with the memory barrier in hfi1_start_led_override to
863          * ensure that we read the correct state of LED beaconing represented
864          * by led_override_timer_active
865          */
866         smp_rmb();
867         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
868         pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
869         pi->port_states.ledenable_offlinereason |=
870                 ppd->offline_disabled_reason;
871
872         pi->port_states.portphysstate_portstate =
873                 (driver_pstate(ppd) << 4) | state;
874
875         pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
876
877         memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
878         for (i = 0; i < ppd->vls_supported; i++) {
879                 mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
880                 if ((i % 2) == 0)
881                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
882                 else
883                         pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
884         }
885         /* don't forget VL 15 */
886         mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
887         pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
888         pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
889         pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
890         pi->partenforce_filterraw |=
891                 (ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
892         if (ppd->part_enforce & HFI1_PART_ENFORCE_IN)
893                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
894         if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
895                 pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
896         pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
897         /* P_KeyViolations are counted by hardware. */
898         pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
899         pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
900
901         pi->vl.cap = ppd->vls_supported;
902         pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
903         pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
904         pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
905
906         pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
907
908         pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
909                                           OPA_PORT_LINK_MODE_OPA << 5 |
910                                           OPA_PORT_LINK_MODE_OPA);
911
912         pi->port_ltp_crc_mode = cpu_to_be16(ppd->port_ltp_crc_mode);
913
914         pi->port_mode = cpu_to_be16(
915                                 ppd->is_active_optimize_enabled ?
916                                         OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
917
918         pi->port_packet_format.supported =
919                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
920                             OPA_PORT_PACKET_FORMAT_16B);
921         pi->port_packet_format.enabled =
922                 cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
923                             OPA_PORT_PACKET_FORMAT_16B);
924
925         /* flit_control.interleave is (OPA V1, version .76):
926          * bits         use
927          * ----         ---
928          * 2            res
929          * 2            DistanceSupported
930          * 2            DistanceEnabled
931          * 5            MaxNextLevelTxEnabled
932          * 5            MaxNestLevelRxSupported
933          *
934          * HFI supports only "distance mode 1" (see OPA V1, version .76,
935          * section 9.6.2), so set DistanceSupported, DistanceEnabled
936          * to 0x1.
937          */
938         pi->flit_control.interleave = cpu_to_be16(0x1400);
939
940         pi->link_down_reason = ppd->local_link_down_reason.sma;
941         pi->neigh_link_down_reason = ppd->neigh_link_down_reason.sma;
942         pi->port_error_action = cpu_to_be32(ppd->port_error_action);
943         pi->mtucap = mtu_to_enum(hfi1_max_mtu, IB_MTU_4096);
944
945         /* 32.768 usec. response time (guessing) */
946         pi->resptimevalue = 3;
947
948         pi->local_port_num = port;
949
950         /* buffer info for FM */
951         pi->overall_buffer_space = cpu_to_be16(dd->link_credits);
952
953         pi->neigh_node_guid = cpu_to_be64(ppd->neighbor_guid);
954         pi->neigh_port_num = ppd->neighbor_port_number;
955         pi->port_neigh_mode =
956                 (ppd->neighbor_type & OPA_PI_MASK_NEIGH_NODE_TYPE) |
957                 (ppd->mgmt_allowed ? OPA_PI_MASK_NEIGH_MGMT_ALLOWED : 0) |
958                 (ppd->neighbor_fm_security ?
959                         OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS : 0);
960
961         /* HFIs shall always return VL15 credits to their
962          * neighbor in a timely manner, without any credit return pacing.
963          */
964         credit_rate = 0;
965         buffer_units  = (dd->vau) & OPA_PI_MASK_BUF_UNIT_BUF_ALLOC;
966         buffer_units |= (dd->vcu << 3) & OPA_PI_MASK_BUF_UNIT_CREDIT_ACK;
967         buffer_units |= (credit_rate << 6) &
968                                 OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE;
969         buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
970         pi->buffer_units = cpu_to_be32(buffer_units);
971
972         pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
973         pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
974                                             << 3 | (OPA_MCAST_NR & 0x7));
975
976         /* HFI supports a replay buffer 128 LTPs in size */
977         pi->replay_depth.buffer = 0x80;
978         /* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
979         read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
980
981         /*
982          * this counter is 16 bits wide, but the replay_depth.wire
983          * variable is only 8 bits
984          */
985         if (tmp > 0xff)
986                 tmp = 0xff;
987         pi->replay_depth.wire = tmp;
988
989         if (resp_len)
990                 *resp_len += sizeof(struct opa_port_info);
991
992         return reply((struct ib_mad_hdr *)smp);
993 }
994
995 /**
996  * get_pkeys - return the PKEY table
997  * @dd: the hfi1_ib device
998  * @port: the IB port number
999  * @pkeys: the pkey table is placed here
1000  */
1001 static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1002 {
1003         struct hfi1_pportdata *ppd = dd->pport + port - 1;
1004
1005         memcpy(pkeys, ppd->pkeys, sizeof(ppd->pkeys));
1006
1007         return 0;
1008 }
1009
1010 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1011                                     struct ib_device *ibdev, u8 port,
1012                                     u32 *resp_len, u32 max_len)
1013 {
1014         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1015         u32 n_blocks_req = OPA_AM_NBLK(am);
1016         u32 start_block = am & 0x7ff;
1017         __be16 *p;
1018         u16 *q;
1019         int i;
1020         u16 n_blocks_avail;
1021         unsigned npkeys = hfi1_get_npkeys(dd);
1022         size_t size;
1023
1024         if (n_blocks_req == 0) {
1025                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1026                         port, start_block, n_blocks_req);
1027                 smp->status |= IB_SMP_INVALID_FIELD;
1028                 return reply((struct ib_mad_hdr *)smp);
1029         }
1030
1031         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1032
1033         size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
1034
1035         if (smp_length_check(size, max_len)) {
1036                 smp->status |= IB_SMP_INVALID_FIELD;
1037                 return reply((struct ib_mad_hdr *)smp);
1038         }
1039
1040         if (start_block + n_blocks_req > n_blocks_avail ||
1041             n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1042                 pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
1043                         "avail 0x%x; blk/smp 0x%lx\n",
1044                         start_block, n_blocks_req, n_blocks_avail,
1045                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
1046                 smp->status |= IB_SMP_INVALID_FIELD;
1047                 return reply((struct ib_mad_hdr *)smp);
1048         }
1049
1050         p = (__be16 *)data;
1051         q = (u16 *)data;
1052         /* get the real pkeys if we are requesting the first block */
1053         if (start_block == 0) {
1054                 get_pkeys(dd, port, q);
1055                 for (i = 0; i < npkeys; i++)
1056                         p[i] = cpu_to_be16(q[i]);
1057                 if (resp_len)
1058                         *resp_len += size;
1059         } else {
1060                 smp->status |= IB_SMP_INVALID_FIELD;
1061         }
1062         return reply((struct ib_mad_hdr *)smp);
1063 }
1064
1065 enum {
1066         HFI_TRANSITION_DISALLOWED,
1067         HFI_TRANSITION_IGNORED,
1068         HFI_TRANSITION_ALLOWED,
1069         HFI_TRANSITION_UNDEFINED,
1070 };
1071
1072 /*
1073  * Use shortened names to improve readability of
1074  * {logical,physical}_state_transitions
1075  */
1076 enum {
1077         __D = HFI_TRANSITION_DISALLOWED,
1078         __I = HFI_TRANSITION_IGNORED,
1079         __A = HFI_TRANSITION_ALLOWED,
1080         __U = HFI_TRANSITION_UNDEFINED,
1081 };
1082
1083 /*
1084  * IB_PORTPHYSSTATE_POLLING (2) through OPA_PORTPHYSSTATE_MAX (11) are
1085  * represented in physical_state_transitions.
1086  */
1087 #define __N_PHYSTATES (OPA_PORTPHYSSTATE_MAX - IB_PORTPHYSSTATE_POLLING + 1)
1088
1089 /*
1090  * Within physical_state_transitions, rows represent "old" states,
1091  * columns "new" states, and physical_state_transitions.allowed[old][new]
1092  * indicates if the transition from old state to new state is legal (see
1093  * OPAg1v1, Table 6-4).
1094  */
1095 static const struct {
1096         u8 allowed[__N_PHYSTATES][__N_PHYSTATES];
1097 } physical_state_transitions = {
1098         {
1099                 /* 2    3    4    5    6    7    8    9   10   11 */
1100         /* 2 */ { __A, __A, __D, __D, __D, __D, __D, __D, __D, __D },
1101         /* 3 */ { __A, __I, __D, __D, __D, __D, __D, __D, __D, __A },
1102         /* 4 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1103         /* 5 */ { __A, __A, __D, __I, __D, __D, __D, __D, __D, __D },
1104         /* 6 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1105         /* 7 */ { __D, __A, __D, __D, __D, __I, __D, __D, __D, __D },
1106         /* 8 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1107         /* 9 */ { __I, __A, __D, __D, __D, __D, __D, __I, __D, __D },
1108         /*10 */ { __U, __U, __U, __U, __U, __U, __U, __U, __U, __U },
1109         /*11 */ { __D, __A, __D, __D, __D, __D, __D, __D, __D, __I },
1110         }
1111 };
1112
1113 /*
1114  * IB_PORT_DOWN (1) through IB_PORT_ACTIVE_DEFER (5) are represented
1115  * logical_state_transitions
1116  */
1117
1118 #define __N_LOGICAL_STATES (IB_PORT_ACTIVE_DEFER - IB_PORT_DOWN + 1)
1119
1120 /*
1121  * Within logical_state_transitions rows represent "old" states,
1122  * columns "new" states, and logical_state_transitions.allowed[old][new]
1123  * indicates if the transition from old state to new state is legal (see
1124  * OPAg1v1, Table 9-12).
1125  */
1126 static const struct {
1127         u8 allowed[__N_LOGICAL_STATES][__N_LOGICAL_STATES];
1128 } logical_state_transitions = {
1129         {
1130                 /* 1    2    3    4    5 */
1131         /* 1 */ { __I, __D, __D, __D, __U},
1132         /* 2 */ { __D, __I, __A, __D, __U},
1133         /* 3 */ { __D, __D, __I, __A, __U},
1134         /* 4 */ { __D, __D, __I, __I, __U},
1135         /* 5 */ { __U, __U, __U, __U, __U},
1136         }
1137 };
1138
1139 static int logical_transition_allowed(int old, int new)
1140 {
1141         if (old < IB_PORT_NOP || old > IB_PORT_ACTIVE_DEFER ||
1142             new < IB_PORT_NOP || new > IB_PORT_ACTIVE_DEFER) {
1143                 pr_warn("invalid logical state(s) (old %d new %d)\n",
1144                         old, new);
1145                 return HFI_TRANSITION_UNDEFINED;
1146         }
1147
1148         if (new == IB_PORT_NOP)
1149                 return HFI_TRANSITION_ALLOWED; /* always allowed */
1150
1151         /* adjust states for indexing into logical_state_transitions */
1152         old -= IB_PORT_DOWN;
1153         new -= IB_PORT_DOWN;
1154
1155         if (old < 0 || new < 0)
1156                 return HFI_TRANSITION_UNDEFINED;
1157         return logical_state_transitions.allowed[old][new];
1158 }
1159
1160 static int physical_transition_allowed(int old, int new)
1161 {
1162         if (old < IB_PORTPHYSSTATE_NOP || old > OPA_PORTPHYSSTATE_MAX ||
1163             new < IB_PORTPHYSSTATE_NOP || new > OPA_PORTPHYSSTATE_MAX) {
1164                 pr_warn("invalid physical state(s) (old %d new %d)\n",
1165                         old, new);
1166                 return HFI_TRANSITION_UNDEFINED;
1167         }
1168
1169         if (new == IB_PORTPHYSSTATE_NOP)
1170                 return HFI_TRANSITION_ALLOWED; /* always allowed */
1171
1172         /* adjust states for indexing into physical_state_transitions */
1173         old -= IB_PORTPHYSSTATE_POLLING;
1174         new -= IB_PORTPHYSSTATE_POLLING;
1175
1176         if (old < 0 || new < 0)
1177                 return HFI_TRANSITION_UNDEFINED;
1178         return physical_state_transitions.allowed[old][new];
1179 }
1180
1181 static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
1182                                           u32 logical_new, u32 physical_new)
1183 {
1184         u32 physical_old = driver_pstate(ppd);
1185         u32 logical_old = driver_lstate(ppd);
1186         int ret, logical_allowed, physical_allowed;
1187
1188         ret = logical_transition_allowed(logical_old, logical_new);
1189         logical_allowed = ret;
1190
1191         if (ret == HFI_TRANSITION_DISALLOWED ||
1192             ret == HFI_TRANSITION_UNDEFINED) {
1193                 pr_warn("invalid logical state transition %s -> %s\n",
1194                         opa_lstate_name(logical_old),
1195                         opa_lstate_name(logical_new));
1196                 return ret;
1197         }
1198
1199         ret = physical_transition_allowed(physical_old, physical_new);
1200         physical_allowed = ret;
1201
1202         if (ret == HFI_TRANSITION_DISALLOWED ||
1203             ret == HFI_TRANSITION_UNDEFINED) {
1204                 pr_warn("invalid physical state transition %s -> %s\n",
1205                         opa_pstate_name(physical_old),
1206                         opa_pstate_name(physical_new));
1207                 return ret;
1208         }
1209
1210         if (logical_allowed == HFI_TRANSITION_IGNORED &&
1211             physical_allowed == HFI_TRANSITION_IGNORED)
1212                 return HFI_TRANSITION_IGNORED;
1213
1214         /*
1215          * A change request of Physical Port State from
1216          * 'Offline' to 'Polling' should be ignored.
1217          */
1218         if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
1219             (physical_new == IB_PORTPHYSSTATE_POLLING))
1220                 return HFI_TRANSITION_IGNORED;
1221
1222         /*
1223          * Either physical_allowed or logical_allowed is
1224          * HFI_TRANSITION_ALLOWED.
1225          */
1226         return HFI_TRANSITION_ALLOWED;
1227 }
1228
1229 static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
1230                            u32 logical_state, u32 phys_state,
1231                            int suppress_idle_sma)
1232 {
1233         struct hfi1_devdata *dd = ppd->dd;
1234         u32 link_state;
1235         int ret;
1236
1237         ret = port_states_transition_allowed(ppd, logical_state, phys_state);
1238         if (ret == HFI_TRANSITION_DISALLOWED ||
1239             ret == HFI_TRANSITION_UNDEFINED) {
1240                 /* error message emitted above */
1241                 smp->status |= IB_SMP_INVALID_FIELD;
1242                 return 0;
1243         }
1244
1245         if (ret == HFI_TRANSITION_IGNORED)
1246                 return 0;
1247
1248         if ((phys_state != IB_PORTPHYSSTATE_NOP) &&
1249             !(logical_state == IB_PORT_DOWN ||
1250               logical_state == IB_PORT_NOP)){
1251                 pr_warn("SubnSet(OPA_PortInfo) port state invalid: logical_state 0x%x physical_state 0x%x\n",
1252                         logical_state, phys_state);
1253                 smp->status |= IB_SMP_INVALID_FIELD;
1254         }
1255
1256         /*
1257          * Logical state changes are summarized in OPAv1g1 spec.,
1258          * Table 9-12; physical state changes are summarized in
1259          * OPAv1g1 spec., Table 6.4.
1260          */
1261         switch (logical_state) {
1262         case IB_PORT_NOP:
1263                 if (phys_state == IB_PORTPHYSSTATE_NOP)
1264                         break;
1265                 /* FALLTHROUGH */
1266         case IB_PORT_DOWN:
1267                 if (phys_state == IB_PORTPHYSSTATE_NOP) {
1268                         link_state = HLS_DN_DOWNDEF;
1269                 } else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
1270                         link_state = HLS_DN_POLL;
1271                         set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
1272                                              0, OPA_LINKDOWN_REASON_FM_BOUNCE);
1273                 } else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
1274                         link_state = HLS_DN_DISABLE;
1275                 } else {
1276                         pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
1277                                 phys_state);
1278                         smp->status |= IB_SMP_INVALID_FIELD;
1279                         break;
1280                 }
1281
1282                 if ((link_state == HLS_DN_POLL ||
1283                      link_state == HLS_DN_DOWNDEF)) {
1284                         /*
1285                          * Going to poll.  No matter what the current state,
1286                          * always move offline first, then tune and start the
1287                          * link.  This correctly handles a FM link bounce and
1288                          * a link enable.  Going offline is a no-op if already
1289                          * offline.
1290                          */
1291                         set_link_state(ppd, HLS_DN_OFFLINE);
1292                         start_link(ppd);
1293                 } else {
1294                         set_link_state(ppd, link_state);
1295                 }
1296                 if (link_state == HLS_DN_DISABLE &&
1297                     (ppd->offline_disabled_reason >
1298                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
1299                      ppd->offline_disabled_reason ==
1300                      HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
1301                         ppd->offline_disabled_reason =
1302                         HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
1303                 /*
1304                  * Don't send a reply if the response would be sent
1305                  * through the disabled port.
1306                  */
1307                 if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
1308                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
1309                 break;
1310         case IB_PORT_ARMED:
1311                 ret = set_link_state(ppd, HLS_UP_ARMED);
1312                 if ((ret == 0) && (suppress_idle_sma == 0))
1313                         send_idle_sma(dd, SMA_IDLE_ARM);
1314                 break;
1315         case IB_PORT_ACTIVE:
1316                 if (ppd->neighbor_normal) {
1317                         ret = set_link_state(ppd, HLS_UP_ACTIVE);
1318                         if (ret == 0)
1319                                 send_idle_sma(dd, SMA_IDLE_ACTIVE);
1320                 } else {
1321                         pr_warn("SubnSet(OPA_PortInfo) Cannot move to Active with NeighborNormal 0\n");
1322                         smp->status |= IB_SMP_INVALID_FIELD;
1323                 }
1324                 break;
1325         default:
1326                 pr_warn("SubnSet(OPA_PortInfo) invalid logical state 0x%x\n",
1327                         logical_state);
1328                 smp->status |= IB_SMP_INVALID_FIELD;
1329         }
1330
1331         return 0;
1332 }
1333
1334 /**
1335  * subn_set_opa_portinfo - set port information
1336  * @smp: the incoming SM packet
1337  * @ibdev: the infiniband device
1338  * @port: the port on the device
1339  *
1340  */
1341 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
1342                                    struct ib_device *ibdev, u8 port,
1343                                    u32 *resp_len, u32 max_len)
1344 {
1345         struct opa_port_info *pi = (struct opa_port_info *)data;
1346         struct ib_event event;
1347         struct hfi1_devdata *dd;
1348         struct hfi1_pportdata *ppd;
1349         struct hfi1_ibport *ibp;
1350         u8 clientrereg;
1351         unsigned long flags;
1352         u32 smlid;
1353         u32 lid;
1354         u8 ls_old, ls_new, ps_new;
1355         u8 vls;
1356         u8 msl;
1357         u8 crc_enabled;
1358         u16 lse, lwe, mtu;
1359         u32 num_ports = OPA_AM_NPORT(am);
1360         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
1361         int ret, i, invalid = 0, call_set_mtu = 0;
1362         int call_link_downgrade_policy = 0;
1363
1364         if (num_ports != 1 ||
1365             smp_length_check(sizeof(*pi), max_len)) {
1366                 smp->status |= IB_SMP_INVALID_FIELD;
1367                 return reply((struct ib_mad_hdr *)smp);
1368         }
1369
1370         lid = be32_to_cpu(pi->lid);
1371         if (lid & 0xFF000000) {
1372                 pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
1373                 smp->status |= IB_SMP_INVALID_FIELD;
1374                 goto get_only;
1375         }
1376
1377
1378         smlid = be32_to_cpu(pi->sm_lid);
1379         if (smlid & 0xFF000000) {
1380                 pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
1381                 smp->status |= IB_SMP_INVALID_FIELD;
1382                 goto get_only;
1383         }
1384
1385         clientrereg = (pi->clientrereg_subnettimeout &
1386                         OPA_PI_MASK_CLIENT_REREGISTER);
1387
1388         dd = dd_from_ibdev(ibdev);
1389         /* IB numbers ports from 1, hw from 0 */
1390         ppd = dd->pport + (port - 1);
1391         ibp = &ppd->ibport_data;
1392         event.device = ibdev;
1393         event.element.port_num = port;
1394
1395         ls_old = driver_lstate(ppd);
1396
1397         ibp->rvp.mkey = pi->mkey;
1398         if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
1399                 ibp->rvp.gid_prefix = pi->subnet_prefix;
1400                 event.event = IB_EVENT_GID_CHANGE;
1401                 ib_dispatch_event(&event);
1402         }
1403         ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
1404
1405         /* Must be a valid unicast LID address. */
1406         if ((lid == 0 && ls_old > IB_PORT_INIT) ||
1407              (hfi1_is_16B_mcast(lid))) {
1408                 smp->status |= IB_SMP_INVALID_FIELD;
1409                 pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
1410                         lid);
1411         } else if (ppd->lid != lid ||
1412                  ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC)) {
1413                 if (ppd->lid != lid)
1414                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LID_CHANGE_BIT);
1415                 if (ppd->lmc != (pi->mkeyprotect_lmc & OPA_PI_MASK_LMC))
1416                         hfi1_set_uevent_bits(ppd, _HFI1_EVENT_LMC_CHANGE_BIT);
1417                 hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
1418                 event.event = IB_EVENT_LID_CHANGE;
1419                 ib_dispatch_event(&event);
1420
1421                 if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
1422                         /* Manufacture GID from LID to support extended
1423                          * addresses
1424                          */
1425                         ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
1426                                 be64_to_cpu(OPA_MAKE_ID(lid));
1427                         event.event = IB_EVENT_GID_CHANGE;
1428                         ib_dispatch_event(&event);
1429                 }
1430         }
1431
1432         msl = pi->smsl & OPA_PI_MASK_SMSL;
1433         if (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON)
1434                 ppd->linkinit_reason =
1435                         (pi->partenforce_filterraw &
1436                          OPA_PI_MASK_LINKINIT_REASON);
1437
1438         /* Must be a valid unicast LID address. */
1439         if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
1440              (hfi1_is_16B_mcast(smlid))) {
1441                 smp->status |= IB_SMP_INVALID_FIELD;
1442                 pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
1443         } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
1444                 pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
1445                 spin_lock_irqsave(&ibp->rvp.lock, flags);
1446                 if (ibp->rvp.sm_ah) {
1447                         if (smlid != ibp->rvp.sm_lid)
1448                                 hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
1449                         if (msl != ibp->rvp.sm_sl)
1450                                 rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
1451                 }
1452                 spin_unlock_irqrestore(&ibp->rvp.lock, flags);
1453                 if (smlid != ibp->rvp.sm_lid)
1454                         ibp->rvp.sm_lid = smlid;
1455                 if (msl != ibp->rvp.sm_sl)
1456                         ibp->rvp.sm_sl = msl;
1457                 event.event = IB_EVENT_SM_CHANGE;
1458                 ib_dispatch_event(&event);
1459         }
1460
1461         if (pi->link_down_reason == 0) {
1462                 ppd->local_link_down_reason.sma = 0;
1463                 ppd->local_link_down_reason.latest = 0;
1464         }
1465
1466         if (pi->neigh_link_down_reason == 0) {
1467                 ppd->neigh_link_down_reason.sma = 0;
1468                 ppd->neigh_link_down_reason.latest = 0;
1469         }
1470
1471         ppd->sm_trap_qp = be32_to_cpu(pi->sm_trap_qp);
1472         ppd->sa_qp = be32_to_cpu(pi->sa_qp);
1473
1474         ppd->port_error_action = be32_to_cpu(pi->port_error_action);
1475         lwe = be16_to_cpu(pi->link_width.enabled);
1476         if (lwe) {
1477                 if (lwe == OPA_LINK_WIDTH_RESET ||
1478                     lwe == OPA_LINK_WIDTH_RESET_OLD)
1479                         set_link_width_enabled(ppd, ppd->link_width_supported);
1480                 else if ((lwe & ~ppd->link_width_supported) == 0)
1481                         set_link_width_enabled(ppd, lwe);
1482                 else
1483                         smp->status |= IB_SMP_INVALID_FIELD;
1484         }
1485         lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
1486         /* LWD.E is always applied - 0 means "disabled" */
1487         if (lwe == OPA_LINK_WIDTH_RESET ||
1488             lwe == OPA_LINK_WIDTH_RESET_OLD) {
1489                 set_link_width_downgrade_enabled(ppd,
1490                                                  ppd->
1491                                                  link_width_downgrade_supported
1492                                                  );
1493         } else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
1494                 /* only set and apply if something changed */
1495                 if (lwe != ppd->link_width_downgrade_enabled) {
1496                         set_link_width_downgrade_enabled(ppd, lwe);
1497                         call_link_downgrade_policy = 1;
1498                 }
1499         } else {
1500                 smp->status |= IB_SMP_INVALID_FIELD;
1501         }
1502         lse = be16_to_cpu(pi->link_speed.enabled);
1503         if (lse) {
1504                 if (lse & be16_to_cpu(pi->link_speed.supported))
1505                         set_link_speed_enabled(ppd, lse);
1506                 else
1507                         smp->status |= IB_SMP_INVALID_FIELD;
1508         }
1509
1510         ibp->rvp.mkeyprot =
1511                 (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
1512         ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
1513         (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
1514                                     ibp->rvp.vl_high_limit);
1515
1516         if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
1517             ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
1518                 smp->status |= IB_SMP_INVALID_FIELD;
1519                 return reply((struct ib_mad_hdr *)smp);
1520         }
1521         for (i = 0; i < ppd->vls_supported; i++) {
1522                 if ((i % 2) == 0)
1523                         mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
1524                                            4) & 0xF);
1525                 else
1526                         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
1527                                           0xF);
1528                 if (mtu == 0xffff) {
1529                         pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
1530                                 mtu,
1531                                 (pi->neigh_mtu.pvlx_to_mtu[0] >> 4) & 0xF);
1532                         smp->status |= IB_SMP_INVALID_FIELD;
1533                         mtu = hfi1_max_mtu; /* use a valid MTU */
1534                 }
1535                 if (dd->vld[i].mtu != mtu) {
1536                         dd_dev_info(dd,
1537                                     "MTU change on vl %d from %d to %d\n",
1538                                     i, dd->vld[i].mtu, mtu);
1539                         dd->vld[i].mtu = mtu;
1540                         call_set_mtu++;
1541                 }
1542         }
1543         /* As per OPAV1 spec: VL15 must support and be configured
1544          * for operation with a 2048 or larger MTU.
1545          */
1546         mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
1547         if (mtu < 2048 || mtu == 0xffff)
1548                 mtu = 2048;
1549         if (dd->vld[15].mtu != mtu) {
1550                 dd_dev_info(dd,
1551                             "MTU change on vl 15 from %d to %d\n",
1552                             dd->vld[15].mtu, mtu);
1553                 dd->vld[15].mtu = mtu;
1554                 call_set_mtu++;
1555         }
1556         if (call_set_mtu)
1557                 set_mtu(ppd);
1558
1559         /* Set operational VLs */
1560         vls = pi->operational_vls & OPA_PI_MASK_OPERATIONAL_VL;
1561         if (vls) {
1562                 if (vls > ppd->vls_supported) {
1563                         pr_warn("SubnSet(OPA_PortInfo) VL's supported invalid %d\n",
1564                                 pi->operational_vls);
1565                         smp->status |= IB_SMP_INVALID_FIELD;
1566                 } else {
1567                         if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
1568                                             vls) == -EINVAL)
1569                                 smp->status |= IB_SMP_INVALID_FIELD;
1570                 }
1571         }
1572
1573         if (pi->mkey_violations == 0)
1574                 ibp->rvp.mkey_violations = 0;
1575
1576         if (pi->pkey_violations == 0)
1577                 ibp->rvp.pkey_violations = 0;
1578
1579         if (pi->qkey_violations == 0)
1580                 ibp->rvp.qkey_violations = 0;
1581
1582         ibp->rvp.subnet_timeout =
1583                 pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
1584
1585         crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
1586         crc_enabled >>= 4;
1587         crc_enabled &= 0xf;
1588
1589         if (crc_enabled != 0)
1590                 ppd->port_crc_mode_enabled = port_ltp_to_cap(crc_enabled);
1591
1592         ppd->is_active_optimize_enabled =
1593                         !!(be16_to_cpu(pi->port_mode)
1594                                         & OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE);
1595
1596         ls_new = pi->port_states.portphysstate_portstate &
1597                         OPA_PI_MASK_PORT_STATE;
1598         ps_new = (pi->port_states.portphysstate_portstate &
1599                         OPA_PI_MASK_PORT_PHYSICAL_STATE) >> 4;
1600
1601         if (ls_old == IB_PORT_INIT) {
1602                 if (start_of_sm_config) {
1603                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
1604                                 ppd->is_sm_config_started = 1;
1605                 } else if (ls_new == IB_PORT_ARMED) {
1606                         if (ppd->is_sm_config_started == 0)
1607                                 invalid = 1;
1608                 }
1609         }
1610
1611         /* Handle CLIENT_REREGISTER event b/c SM asked us for it */
1612         if (clientrereg) {
1613                 event.event = IB_EVENT_CLIENT_REREGISTER;
1614                 ib_dispatch_event(&event);
1615         }
1616
1617         /*
1618          * Do the port state change now that the other link parameters
1619          * have been set.
1620          * Changing the port physical state only makes sense if the link
1621          * is down or is being set to down.
1622          */
1623
1624         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
1625         if (ret)
1626                 return ret;
1627
1628         ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1629                                       max_len);
1630
1631         /* restore re-reg bit per o14-12.2.1 */
1632         pi->clientrereg_subnettimeout |= clientrereg;
1633
1634         /*
1635          * Apply the new link downgrade policy.  This may result in a link
1636          * bounce.  Do this after everything else so things are settled.
1637          * Possible problem: if setting the port state above fails, then
1638          * the policy change is not applied.
1639          */
1640         if (call_link_downgrade_policy)
1641                 apply_link_downgrade_policy(ppd, 0);
1642
1643         return ret;
1644
1645 get_only:
1646         return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
1647                                        max_len);
1648 }
1649
1650 /**
1651  * set_pkeys - set the PKEY table for ctxt 0
1652  * @dd: the hfi1_ib device
1653  * @port: the IB port number
1654  * @pkeys: the PKEY table
1655  */
1656 static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
1657 {
1658         struct hfi1_pportdata *ppd;
1659         int i;
1660         int changed = 0;
1661         int update_includes_mgmt_partition = 0;
1662
1663         /*
1664          * IB port one/two always maps to context zero/one,
1665          * always a kernel context, no locking needed
1666          * If we get here with ppd setup, no need to check
1667          * that rcd is valid.
1668          */
1669         ppd = dd->pport + (port - 1);
1670         /*
1671          * If the update does not include the management pkey, don't do it.
1672          */
1673         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1674                 if (pkeys[i] == LIM_MGMT_P_KEY) {
1675                         update_includes_mgmt_partition = 1;
1676                         break;
1677                 }
1678         }
1679
1680         if (!update_includes_mgmt_partition)
1681                 return 1;
1682
1683         for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
1684                 u16 key = pkeys[i];
1685                 u16 okey = ppd->pkeys[i];
1686
1687                 if (key == okey)
1688                         continue;
1689                 /*
1690                  * The SM gives us the complete PKey table. We have
1691                  * to ensure that we put the PKeys in the matching
1692                  * slots.
1693                  */
1694                 ppd->pkeys[i] = key;
1695                 changed = 1;
1696         }
1697
1698         if (changed) {
1699                 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
1700                 hfi1_event_pkey_change(dd, port);
1701         }
1702
1703         return 0;
1704 }
1705
1706 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
1707                                     struct ib_device *ibdev, u8 port,
1708                                     u32 *resp_len, u32 max_len)
1709 {
1710         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1711         u32 n_blocks_sent = OPA_AM_NBLK(am);
1712         u32 start_block = am & 0x7ff;
1713         u16 *p = (u16 *)data;
1714         __be16 *q = (__be16 *)data;
1715         int i;
1716         u16 n_blocks_avail;
1717         unsigned npkeys = hfi1_get_npkeys(dd);
1718         u32 size = 0;
1719
1720         if (n_blocks_sent == 0) {
1721                 pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
1722                         port, start_block, n_blocks_sent);
1723                 smp->status |= IB_SMP_INVALID_FIELD;
1724                 return reply((struct ib_mad_hdr *)smp);
1725         }
1726
1727         n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
1728
1729         size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
1730
1731         if (smp_length_check(size, max_len)) {
1732                 smp->status |= IB_SMP_INVALID_FIELD;
1733                 return reply((struct ib_mad_hdr *)smp);
1734         }
1735
1736         if (start_block + n_blocks_sent > n_blocks_avail ||
1737             n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
1738                 pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
1739                         start_block, n_blocks_sent, n_blocks_avail,
1740                         OPA_NUM_PKEY_BLOCKS_PER_SMP);
1741                 smp->status |= IB_SMP_INVALID_FIELD;
1742                 return reply((struct ib_mad_hdr *)smp);
1743         }
1744
1745         for (i = 0; i < n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE; i++)
1746                 p[i] = be16_to_cpu(q[i]);
1747
1748         if (start_block == 0 && set_pkeys(dd, port, p) != 0) {
1749                 smp->status |= IB_SMP_INVALID_FIELD;
1750                 return reply((struct ib_mad_hdr *)smp);
1751         }
1752
1753         return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
1754                                         max_len);
1755 }
1756
1757 #define ILLEGAL_VL 12
1758 /*
1759  * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except
1760  * for SC15, which must map to VL15). If we don't remap things this
1761  * way it is possible for VL15 counters to increment when we try to
1762  * send on a SC which is mapped to an invalid VL.
1763  * When getting the table convert ILLEGAL_VL back to VL15.
1764  */
1765 static void filter_sc2vlt(void *data, bool set)
1766 {
1767         int i;
1768         u8 *pd = data;
1769
1770         for (i = 0; i < OPA_MAX_SCS; i++) {
1771                 if (i == 15)
1772                         continue;
1773
1774                 if (set) {
1775                         if ((pd[i] & 0x1f) == 0xf)
1776                                 pd[i] = ILLEGAL_VL;
1777                 } else {
1778                         if ((pd[i] & 0x1f) == ILLEGAL_VL)
1779                                 pd[i] = 0xf;
1780                 }
1781         }
1782 }
1783
1784 static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1785 {
1786         u64 *val = data;
1787
1788         filter_sc2vlt(data, true);
1789
1790         write_csr(dd, SEND_SC2VLT0, *val++);
1791         write_csr(dd, SEND_SC2VLT1, *val++);
1792         write_csr(dd, SEND_SC2VLT2, *val++);
1793         write_csr(dd, SEND_SC2VLT3, *val++);
1794         write_seqlock_irq(&dd->sc2vl_lock);
1795         memcpy(dd->sc2vl, data, sizeof(dd->sc2vl));
1796         write_sequnlock_irq(&dd->sc2vl_lock);
1797         return 0;
1798 }
1799
1800 static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
1801 {
1802         u64 *val = (u64 *)data;
1803
1804         *val++ = read_csr(dd, SEND_SC2VLT0);
1805         *val++ = read_csr(dd, SEND_SC2VLT1);
1806         *val++ = read_csr(dd, SEND_SC2VLT2);
1807         *val++ = read_csr(dd, SEND_SC2VLT3);
1808
1809         filter_sc2vlt((u64 *)data, false);
1810         return 0;
1811 }
1812
1813 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1814                                    struct ib_device *ibdev, u8 port,
1815                                    u32 *resp_len, u32 max_len)
1816 {
1817         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1818         u8 *p = data;
1819         size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
1820         unsigned i;
1821
1822         if (am || smp_length_check(size, max_len)) {
1823                 smp->status |= IB_SMP_INVALID_FIELD;
1824                 return reply((struct ib_mad_hdr *)smp);
1825         }
1826
1827         for (i = 0; i < ARRAY_SIZE(ibp->sl_to_sc); i++)
1828                 *p++ = ibp->sl_to_sc[i];
1829
1830         if (resp_len)
1831                 *resp_len += size;
1832
1833         return reply((struct ib_mad_hdr *)smp);
1834 }
1835
1836 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
1837                                    struct ib_device *ibdev, u8 port,
1838                                    u32 *resp_len, u32 max_len)
1839 {
1840         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1841         u8 *p = data;
1842         size_t size = ARRAY_SIZE(ibp->sl_to_sc);
1843         int i;
1844         u8 sc;
1845
1846         if (am || smp_length_check(size, max_len)) {
1847                 smp->status |= IB_SMP_INVALID_FIELD;
1848                 return reply((struct ib_mad_hdr *)smp);
1849         }
1850
1851         for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
1852                 sc = *p++;
1853                 if (ibp->sl_to_sc[i] != sc) {
1854                         ibp->sl_to_sc[i] = sc;
1855
1856                         /* Put all stale qps into error state */
1857                         hfi1_error_port_qps(ibp, i);
1858                 }
1859         }
1860
1861         return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
1862                                        max_len);
1863 }
1864
1865 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1866                                    struct ib_device *ibdev, u8 port,
1867                                    u32 *resp_len, u32 max_len)
1868 {
1869         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1870         u8 *p = data;
1871         size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
1872         unsigned i;
1873
1874         if (am || smp_length_check(size, max_len)) {
1875                 smp->status |= IB_SMP_INVALID_FIELD;
1876                 return reply((struct ib_mad_hdr *)smp);
1877         }
1878
1879         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1880                 *p++ = ibp->sc_to_sl[i];
1881
1882         if (resp_len)
1883                 *resp_len += size;
1884
1885         return reply((struct ib_mad_hdr *)smp);
1886 }
1887
1888 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
1889                                    struct ib_device *ibdev, u8 port,
1890                                    u32 *resp_len, u32 max_len)
1891 {
1892         struct hfi1_ibport *ibp = to_iport(ibdev, port);
1893         size_t size = ARRAY_SIZE(ibp->sc_to_sl);
1894         u8 *p = data;
1895         int i;
1896
1897         if (am || smp_length_check(size, max_len)) {
1898                 smp->status |= IB_SMP_INVALID_FIELD;
1899                 return reply((struct ib_mad_hdr *)smp);
1900         }
1901
1902         for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
1903                 ibp->sc_to_sl[i] = *p++;
1904
1905         return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
1906                                        max_len);
1907 }
1908
1909 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1910                                     struct ib_device *ibdev, u8 port,
1911                                     u32 *resp_len, u32 max_len)
1912 {
1913         u32 n_blocks = OPA_AM_NBLK(am);
1914         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1915         void *vp = (void *)data;
1916         size_t size = 4 * sizeof(u64);
1917
1918         if (n_blocks != 1 || smp_length_check(size, max_len)) {
1919                 smp->status |= IB_SMP_INVALID_FIELD;
1920                 return reply((struct ib_mad_hdr *)smp);
1921         }
1922
1923         get_sc2vlt_tables(dd, vp);
1924
1925         if (resp_len)
1926                 *resp_len += size;
1927
1928         return reply((struct ib_mad_hdr *)smp);
1929 }
1930
1931 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
1932                                     struct ib_device *ibdev, u8 port,
1933                                     u32 *resp_len, u32 max_len)
1934 {
1935         u32 n_blocks = OPA_AM_NBLK(am);
1936         int async_update = OPA_AM_ASYNC(am);
1937         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1938         void *vp = (void *)data;
1939         struct hfi1_pportdata *ppd;
1940         int lstate;
1941         /*
1942          * set_sc2vlt_tables writes the information contained in *data
1943          * to four 64-bit registers SendSC2VLt[0-3]. We need to make
1944          * sure *max_len is not greater than the total size of the four
1945          * SendSC2VLt[0-3] registers.
1946          */
1947         size_t size = 4 * sizeof(u64);
1948
1949         if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
1950                 smp->status |= IB_SMP_INVALID_FIELD;
1951                 return reply((struct ib_mad_hdr *)smp);
1952         }
1953
1954         /* IB numbers ports from 1, hw from 0 */
1955         ppd = dd->pport + (port - 1);
1956         lstate = driver_lstate(ppd);
1957         /*
1958          * it's known that async_update is 0 by this point, but include
1959          * the explicit check for clarity
1960          */
1961         if (!async_update &&
1962             (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
1963                 smp->status |= IB_SMP_INVALID_FIELD;
1964                 return reply((struct ib_mad_hdr *)smp);
1965         }
1966
1967         set_sc2vlt_tables(dd, vp);
1968
1969         return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
1970                                         max_len);
1971 }
1972
1973 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1974                                      struct ib_device *ibdev, u8 port,
1975                                      u32 *resp_len, u32 max_len)
1976 {
1977         u32 n_blocks = OPA_AM_NPORT(am);
1978         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
1979         struct hfi1_pportdata *ppd;
1980         void *vp = (void *)data;
1981         int size = sizeof(struct sc2vlnt);
1982
1983         if (n_blocks != 1 || smp_length_check(size, max_len)) {
1984                 smp->status |= IB_SMP_INVALID_FIELD;
1985                 return reply((struct ib_mad_hdr *)smp);
1986         }
1987
1988         ppd = dd->pport + (port - 1);
1989
1990         fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
1991
1992         if (resp_len)
1993                 *resp_len += size;
1994
1995         return reply((struct ib_mad_hdr *)smp);
1996 }
1997
1998 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
1999                                      struct ib_device *ibdev, u8 port,
2000                                      u32 *resp_len, u32 max_len)
2001 {
2002         u32 n_blocks = OPA_AM_NPORT(am);
2003         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2004         struct hfi1_pportdata *ppd;
2005         void *vp = (void *)data;
2006         int lstate;
2007         int size = sizeof(struct sc2vlnt);
2008
2009         if (n_blocks != 1 || smp_length_check(size, max_len)) {
2010                 smp->status |= IB_SMP_INVALID_FIELD;
2011                 return reply((struct ib_mad_hdr *)smp);
2012         }
2013
2014         /* IB numbers ports from 1, hw from 0 */
2015         ppd = dd->pport + (port - 1);
2016         lstate = driver_lstate(ppd);
2017         if (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE) {
2018                 smp->status |= IB_SMP_INVALID_FIELD;
2019                 return reply((struct ib_mad_hdr *)smp);
2020         }
2021
2022         ppd = dd->pport + (port - 1);
2023
2024         fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
2025
2026         return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
2027                                          resp_len, max_len);
2028 }
2029
2030 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2031                               struct ib_device *ibdev, u8 port,
2032                               u32 *resp_len, u32 max_len)
2033 {
2034         u32 nports = OPA_AM_NPORT(am);
2035         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2036         u32 lstate;
2037         struct hfi1_ibport *ibp;
2038         struct hfi1_pportdata *ppd;
2039         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2040
2041         if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2042                 smp->status |= IB_SMP_INVALID_FIELD;
2043                 return reply((struct ib_mad_hdr *)smp);
2044         }
2045
2046         ibp = to_iport(ibdev, port);
2047         ppd = ppd_from_ibp(ibp);
2048
2049         lstate = driver_lstate(ppd);
2050
2051         if (start_of_sm_config && (lstate == IB_PORT_INIT))
2052                 ppd->is_sm_config_started = 1;
2053
2054         psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
2055         psi->port_states.ledenable_offlinereason |=
2056                 ppd->is_sm_config_started << 5;
2057         psi->port_states.ledenable_offlinereason |=
2058                 ppd->offline_disabled_reason;
2059
2060         psi->port_states.portphysstate_portstate =
2061                 (driver_pstate(ppd) << 4) | (lstate & 0xf);
2062         psi->link_width_downgrade_tx_active =
2063                 cpu_to_be16(ppd->link_width_downgrade_tx_active);
2064         psi->link_width_downgrade_rx_active =
2065                 cpu_to_be16(ppd->link_width_downgrade_rx_active);
2066         if (resp_len)
2067                 *resp_len += sizeof(struct opa_port_state_info);
2068
2069         return reply((struct ib_mad_hdr *)smp);
2070 }
2071
2072 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
2073                               struct ib_device *ibdev, u8 port,
2074                               u32 *resp_len, u32 max_len)
2075 {
2076         u32 nports = OPA_AM_NPORT(am);
2077         u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
2078         u32 ls_old;
2079         u8 ls_new, ps_new;
2080         struct hfi1_ibport *ibp;
2081         struct hfi1_pportdata *ppd;
2082         struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
2083         int ret, invalid = 0;
2084
2085         if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
2086                 smp->status |= IB_SMP_INVALID_FIELD;
2087                 return reply((struct ib_mad_hdr *)smp);
2088         }
2089
2090         ibp = to_iport(ibdev, port);
2091         ppd = ppd_from_ibp(ibp);
2092
2093         ls_old = driver_lstate(ppd);
2094
2095         ls_new = port_states_to_logical_state(&psi->port_states);
2096         ps_new = port_states_to_phys_state(&psi->port_states);
2097
2098         if (ls_old == IB_PORT_INIT) {
2099                 if (start_of_sm_config) {
2100                         if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
2101                                 ppd->is_sm_config_started = 1;
2102                 } else if (ls_new == IB_PORT_ARMED) {
2103                         if (ppd->is_sm_config_started == 0)
2104                                 invalid = 1;
2105                 }
2106         }
2107
2108         ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
2109         if (ret)
2110                 return ret;
2111
2112         if (invalid)
2113                 smp->status |= IB_SMP_INVALID_FIELD;
2114
2115         return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
2116                                   max_len);
2117 }
2118
2119 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
2120                                      struct ib_device *ibdev, u8 port,
2121                                      u32 *resp_len, u32 max_len)
2122 {
2123         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2124         u32 addr = OPA_AM_CI_ADDR(am);
2125         u32 len = OPA_AM_CI_LEN(am) + 1;
2126         int ret;
2127
2128         if (dd->pport->port_type != PORT_TYPE_QSFP ||
2129             smp_length_check(len, max_len)) {
2130                 smp->status |= IB_SMP_INVALID_FIELD;
2131                 return reply((struct ib_mad_hdr *)smp);
2132         }
2133
2134 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
2135 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
2136 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
2137
2138         /*
2139          * check that addr is within spec, and
2140          * addr and (addr + len - 1) are on the same "page"
2141          */
2142         if (addr >= 4096 ||
2143             (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
2144                 smp->status |= IB_SMP_INVALID_FIELD;
2145                 return reply((struct ib_mad_hdr *)smp);
2146         }
2147
2148         ret = get_cable_info(dd, port, addr, len, data);
2149
2150         if (ret == -ENODEV) {
2151                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
2152                 return reply((struct ib_mad_hdr *)smp);
2153         }
2154
2155         /* The address range for the CableInfo SMA query is wider than the
2156          * memory available on the QSFP cable. We want to return a valid
2157          * response, albeit zeroed out, for address ranges beyond available
2158          * memory but that are within the CableInfo query spec
2159          */
2160         if (ret < 0 && ret != -ERANGE) {
2161                 smp->status |= IB_SMP_INVALID_FIELD;
2162                 return reply((struct ib_mad_hdr *)smp);
2163         }
2164
2165         if (resp_len)
2166                 *resp_len += len;
2167
2168         return reply((struct ib_mad_hdr *)smp);
2169 }
2170
2171 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2172                               struct ib_device *ibdev, u8 port, u32 *resp_len,
2173                               u32 max_len)
2174 {
2175         u32 num_ports = OPA_AM_NPORT(am);
2176         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2177         struct hfi1_pportdata *ppd;
2178         struct buffer_control *p = (struct buffer_control *)data;
2179         int size = sizeof(struct buffer_control);
2180
2181         if (num_ports != 1 || smp_length_check(size, max_len)) {
2182                 smp->status |= IB_SMP_INVALID_FIELD;
2183                 return reply((struct ib_mad_hdr *)smp);
2184         }
2185
2186         ppd = dd->pport + (port - 1);
2187         fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
2188         trace_bct_get(dd, p);
2189         if (resp_len)
2190                 *resp_len += size;
2191
2192         return reply((struct ib_mad_hdr *)smp);
2193 }
2194
2195 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
2196                               struct ib_device *ibdev, u8 port, u32 *resp_len,
2197                               u32 max_len)
2198 {
2199         u32 num_ports = OPA_AM_NPORT(am);
2200         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2201         struct hfi1_pportdata *ppd;
2202         struct buffer_control *p = (struct buffer_control *)data;
2203
2204         if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
2205                 smp->status |= IB_SMP_INVALID_FIELD;
2206                 return reply((struct ib_mad_hdr *)smp);
2207         }
2208         ppd = dd->pport + (port - 1);
2209         trace_bct_set(dd, p);
2210         if (fm_set_table(ppd, FM_TBL_BUFFER_CONTROL, p) < 0) {
2211                 smp->status |= IB_SMP_INVALID_FIELD;
2212                 return reply((struct ib_mad_hdr *)smp);
2213         }
2214
2215         return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
2216                                   max_len);
2217 }
2218
2219 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2220                                  struct ib_device *ibdev, u8 port,
2221                                  u32 *resp_len, u32 max_len)
2222 {
2223         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2224         u32 num_ports = OPA_AM_NPORT(am);
2225         u8 section = (am & 0x00ff0000) >> 16;
2226         u8 *p = data;
2227         int size = 256;
2228
2229         if (num_ports != 1 || smp_length_check(size, max_len)) {
2230                 smp->status |= IB_SMP_INVALID_FIELD;
2231                 return reply((struct ib_mad_hdr *)smp);
2232         }
2233
2234         switch (section) {
2235         case OPA_VLARB_LOW_ELEMENTS:
2236                 fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
2237                 break;
2238         case OPA_VLARB_HIGH_ELEMENTS:
2239                 fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2240                 break;
2241         case OPA_VLARB_PREEMPT_ELEMENTS:
2242                 fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
2243                 break;
2244         case OPA_VLARB_PREEMPT_MATRIX:
2245                 fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
2246                 break;
2247         default:
2248                 pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
2249                         be32_to_cpu(smp->attr_mod));
2250                 smp->status |= IB_SMP_INVALID_FIELD;
2251                 size = 0;
2252                 break;
2253         }
2254
2255         if (size > 0 && resp_len)
2256                 *resp_len += size;
2257
2258         return reply((struct ib_mad_hdr *)smp);
2259 }
2260
2261 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
2262                                  struct ib_device *ibdev, u8 port,
2263                                  u32 *resp_len, u32 max_len)
2264 {
2265         struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
2266         u32 num_ports = OPA_AM_NPORT(am);
2267         u8 section = (am & 0x00ff0000) >> 16;
2268         u8 *p = data;
2269         int size = 256;
2270
2271         if (num_ports != 1 || smp_length_check(size, max_len)) {
2272                 smp->status |= IB_SMP_INVALID_FIELD;
2273                 return reply((struct ib_mad_hdr *)smp);
2274         }
2275
2276         switch (section) {
2277         case OPA_VLARB_LOW_ELEMENTS:
2278                 (void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
2279                 break;
2280         case OPA_VLARB_HIGH_ELEMENTS:
2281                 (void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
2282                 break;
2283         /*
2284          * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
2285          * can be changed from the default values
2286          */
2287         case OPA_VLARB_PREEMPT_ELEMENTS:
2288                 /* FALLTHROUGH */
2289         case OPA_VLARB_PREEMPT_MATRIX:
2290                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
2291                 break;
2292         default:
2293                 pr_warn("OPA SubnSet(VL Arb) AM Invalid : 0x%x\n",
2294                         be32_to_cpu(smp->attr_mod));
2295                 smp->status |= IB_SMP_INVALID_FIELD;
2296                 break;
2297         }
2298
2299         return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
2300                                      max_len);
2301 }
2302
2303 struct opa_pma_mad {
2304         struct ib_mad_hdr mad_hdr;
2305         u8 data[2024];
2306 } __packed;
2307
2308 struct opa_port_status_req {
2309         __u8 port_num;
2310         __u8 reserved[3];
2311         __be32 vl_select_mask;
2312 };
2313
2314 #define VL_MASK_ALL             0x00000000000080ffUL
2315
2316 struct opa_port_status_rsp {
2317         __u8 port_num;
2318         __u8 reserved[3];
2319         __be32  vl_select_mask;
2320
2321         /* Data counters */
2322         __be64 port_xmit_data;
2323         __be64 port_rcv_data;
2324         __be64 port_xmit_pkts;
2325         __be64 port_rcv_pkts;
2326         __be64 port_multicast_xmit_pkts;
2327         __be64 port_multicast_rcv_pkts;
2328         __be64 port_xmit_wait;
2329         __be64 sw_port_congestion;
2330         __be64 port_rcv_fecn;
2331         __be64 port_rcv_becn;
2332         __be64 port_xmit_time_cong;
2333         __be64 port_xmit_wasted_bw;
2334         __be64 port_xmit_wait_data;
2335         __be64 port_rcv_bubble;
2336         __be64 port_mark_fecn;
2337         /* Error counters */
2338         __be64 port_rcv_constraint_errors;
2339         __be64 port_rcv_switch_relay_errors;
2340         __be64 port_xmit_discards;
2341         __be64 port_xmit_constraint_errors;
2342         __be64 port_rcv_remote_physical_errors;
2343         __be64 local_link_integrity_errors;
2344         __be64 port_rcv_errors;
2345         __be64 excessive_buffer_overruns;
2346         __be64 fm_config_errors;
2347         __be32 link_error_recovery;
2348         __be32 link_downed;
2349         u8 uncorrectable_errors;
2350
2351         u8 link_quality_indicator; /* 5res, 3bit */
2352         u8 res2[6];
2353         struct _vls_pctrs {
2354                 /* per-VL Data counters */
2355                 __be64 port_vl_xmit_data;
2356                 __be64 port_vl_rcv_data;
2357                 __be64 port_vl_xmit_pkts;
2358                 __be64 port_vl_rcv_pkts;
2359                 __be64 port_vl_xmit_wait;
2360                 __be64 sw_port_vl_congestion;
2361                 __be64 port_vl_rcv_fecn;
2362                 __be64 port_vl_rcv_becn;
2363                 __be64 port_xmit_time_cong;
2364                 __be64 port_vl_xmit_wasted_bw;
2365                 __be64 port_vl_xmit_wait_data;
2366                 __be64 port_vl_rcv_bubble;
2367                 __be64 port_vl_mark_fecn;
2368                 __be64 port_vl_xmit_discards;
2369         } vls[0]; /* real array size defined by # bits set in vl_select_mask */
2370 };
2371
2372 enum counter_selects {
2373         CS_PORT_XMIT_DATA                       = (1 << 31),
2374         CS_PORT_RCV_DATA                        = (1 << 30),
2375         CS_PORT_XMIT_PKTS                       = (1 << 29),
2376         CS_PORT_RCV_PKTS                        = (1 << 28),
2377         CS_PORT_MCAST_XMIT_PKTS                 = (1 << 27),
2378         CS_PORT_MCAST_RCV_PKTS                  = (1 << 26),
2379         CS_PORT_XMIT_WAIT                       = (1 << 25),
2380         CS_SW_PORT_CONGESTION                   = (1 << 24),
2381         CS_PORT_RCV_FECN                        = (1 << 23),
2382         CS_PORT_RCV_BECN                        = (1 << 22),
2383         CS_PORT_XMIT_TIME_CONG                  = (1 << 21),
2384         CS_PORT_XMIT_WASTED_BW                  = (1 << 20),
2385         CS_PORT_XMIT_WAIT_DATA                  = (1 << 19),
2386         CS_PORT_RCV_BUBBLE                      = (1 << 18),
2387         CS_PORT_MARK_FECN                       = (1 << 17),
2388         CS_PORT_RCV_CONSTRAINT_ERRORS           = (1 << 16),
2389         CS_PORT_RCV_SWITCH_RELAY_ERRORS         = (1 << 15),
2390         CS_PORT_XMIT_DISCARDS                   = (1 << 14),
2391         CS_PORT_XMIT_CONSTRAINT_ERRORS          = (1 << 13),
2392         CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS      = (1 << 12),
2393         CS_LOCAL_LINK_INTEGRITY_ERRORS          = (1 << 11),
2394         CS_PORT_RCV_ERRORS                      = (1 << 10),
2395         CS_EXCESSIVE_BUFFER_OVERRUNS            = (1 << 9),
2396         CS_FM_CONFIG_ERRORS                     = (1 << 8),
2397         CS_LINK_ERROR_RECOVERY                  = (1 << 7),
2398         CS_LINK_DOWNED                          = (1 << 6),
2399         CS_UNCORRECTABLE_ERRORS                 = (1 << 5),
2400 };
2401
2402 struct opa_clear_port_status {
2403         __be64 port_select_mask[4];
2404         __be32 counter_select_mask;
2405 };
2406
2407 struct opa_aggregate {
2408         __be16 attr_id;
2409         __be16 err_reqlength;   /* 1 bit, 8 res, 7 bit */
2410         __be32 attr_mod;
2411         u8 data[0];
2412 };
2413
2414 #define MSK_LLI 0x000000f0
2415 #define MSK_LLI_SFT 4
2416 #define MSK_LER 0x0000000f
2417 #define MSK_LER_SFT 0
2418 #define ADD_LLI 8
2419 #define ADD_LER 2
2420
2421 /* Request contains first three fields, response contains those plus the rest */
2422 struct opa_port_data_counters_msg {
2423         __be64 port_select_mask[4];
2424         __be32 vl_select_mask;
2425         __be32 resolution;
2426
2427         /* Response fields follow */
2428         struct _port_dctrs {
2429                 u8 port_number;
2430                 u8 reserved2[3];
2431                 __be32 link_quality_indicator; /* 29res, 3bit */
2432
2433                 /* Data counters */
2434                 __be64 port_xmit_data;
2435                 __be64 port_rcv_data;
2436                 __be64 port_xmit_pkts;
2437                 __be64 port_rcv_pkts;
2438                 __be64 port_multicast_xmit_pkts;
2439                 __be64 port_multicast_rcv_pkts;
2440                 __be64 port_xmit_wait;
2441                 __be64 sw_port_congestion;
2442                 __be64 port_rcv_fecn;
2443                 __be64 port_rcv_becn;
2444                 __be64 port_xmit_time_cong;
2445                 __be64 port_xmit_wasted_bw;
2446                 __be64 port_xmit_wait_data;
2447                 __be64 port_rcv_bubble;
2448                 __be64 port_mark_fecn;
2449
2450                 __be64 port_error_counter_summary;
2451                 /* Sum of error counts/port */
2452
2453                 struct _vls_dctrs {
2454                         /* per-VL Data counters */
2455                         __be64 port_vl_xmit_data;
2456                         __be64 port_vl_rcv_data;
2457                         __be64 port_vl_xmit_pkts;
2458                         __be64 port_vl_rcv_pkts;
2459                         __be64 port_vl_xmit_wait;
2460                         __be64 sw_port_vl_congestion;
2461                         __be64 port_vl_rcv_fecn;
2462                         __be64 port_vl_rcv_becn;
2463                         __be64 port_xmit_time_cong;
2464                         __be64 port_vl_xmit_wasted_bw;
2465                         __be64 port_vl_xmit_wait_data;
2466                         __be64 port_vl_rcv_bubble;
2467                         __be64 port_vl_mark_fecn;
2468                 } vls[0];
2469                 /* array size defined by #bits set in vl_select_mask*/
2470         } port[1]; /* array size defined by  #ports in attribute modifier */
2471 };
2472
2473 struct opa_port_error_counters64_msg {
2474         /*
2475          * Request contains first two fields, response contains the
2476          * whole magilla
2477          */
2478         __be64 port_select_mask[4];
2479         __be32 vl_select_mask;
2480
2481         /* Response-only fields follow */
2482         __be32 reserved1;
2483         struct _port_ectrs {
2484                 u8 port_number;
2485                 u8 reserved2[7];
2486                 __be64 port_rcv_constraint_errors;
2487                 __be64 port_rcv_switch_relay_errors;
2488                 __be64 port_xmit_discards;
2489                 __be64 port_xmit_constraint_errors;
2490                 __be64 port_rcv_remote_physical_errors;
2491                 __be64 local_link_integrity_errors;
2492                 __be64 port_rcv_errors;
2493                 __be64 excessive_buffer_overruns;
2494                 __be64 fm_config_errors;
2495                 __be32 link_error_recovery;
2496                 __be32 link_downed;
2497                 u8 uncorrectable_errors;
2498                 u8 reserved3[7];
2499                 struct _vls_ectrs {
2500                         __be64 port_vl_xmit_discards;
2501                 } vls[0];
2502                 /* array size defined by #bits set in vl_select_mask */
2503         } port[1]; /* array size defined by #ports in attribute modifier */
2504 };
2505
2506 struct opa_port_error_info_msg {
2507         __be64 port_select_mask[4];
2508         __be32 error_info_select_mask;
2509         __be32 reserved1;
2510         struct _port_ei {
2511                 u8 port_number;
2512                 u8 reserved2[7];
2513
2514                 /* PortRcvErrorInfo */
2515                 struct {
2516                         u8 status_and_code;
2517                         union {
2518                                 u8 raw[17];
2519                                 struct {
2520                                         /* EI1to12 format */
2521                                         u8 packet_flit1[8];
2522                                         u8 packet_flit2[8];
2523                                         u8 remaining_flit_bits12;
2524                                 } ei1to12;
2525                                 struct {
2526                                         u8 packet_bytes[8];
2527                                         u8 remaining_flit_bits;
2528                                 } ei13;
2529                         } ei;
2530                         u8 reserved3[6];
2531                 } __packed port_rcv_ei;
2532
2533                 /* ExcessiveBufferOverrunInfo */
2534                 struct {
2535                         u8 status_and_sc;
2536                         u8 reserved4[7];
2537                 } __packed excessive_buffer_overrun_ei;
2538
2539                 /* PortXmitConstraintErrorInfo */
2540                 struct {
2541                         u8 status;
2542                         u8 reserved5;
2543                         __be16 pkey;
2544                         __be32 slid;
2545                 } __packed port_xmit_constraint_ei;
2546
2547                 /* PortRcvConstraintErrorInfo */
2548                 struct {
2549                         u8 status;
2550                         u8 reserved6;
2551                         __be16 pkey;
2552                         __be32 slid;
2553                 } __packed port_rcv_constraint_ei;
2554
2555                 /* PortRcvSwitchRelayErrorInfo */
2556                 struct {
2557                         u8 status_and_code;
2558                         u8 reserved7[3];
2559                         __u32 error_info;
2560                 } __packed port_rcv_switch_relay_ei;
2561
2562                 /* UncorrectableErrorInfo */
2563                 struct {
2564                         u8 status_and_code;
2565                         u8 reserved8;
2566                 } __packed uncorrectable_ei;
2567
2568                 /* FMConfigErrorInfo */
2569                 struct {
2570                         u8 status_and_code;
2571                         u8 error_info;
2572                 } __packed fm_config_ei;
2573                 __u32 reserved9;
2574         } port[1]; /* actual array size defined by #ports in attr modifier */
2575 };
2576
2577 /* opa_port_error_info_msg error_info_select_mask bit definitions */
2578 enum error_info_selects {
2579         ES_PORT_RCV_ERROR_INFO                  = (1 << 31),
2580         ES_EXCESSIVE_BUFFER_OVERRUN_INFO        = (1 << 30),
2581         ES_PORT_XMIT_CONSTRAINT_ERROR_INFO      = (1 << 29),
2582         ES_PORT_RCV_CONSTRAINT_ERROR_INFO       = (1 << 28),
2583         ES_PORT_RCV_SWITCH_RELAY_ERROR_INFO     = (1 << 27),
2584         ES_UNCORRECTABLE_ERROR_INFO             = (1 << 26),
2585         ES_FM_CONFIG_ERROR_INFO                 = (1 << 25)
2586 };
2587
2588 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
2589                                      struct ib_device *ibdev, u32 *resp_len)
2590 {
2591         struct opa_class_port_info *p =
2592                 (struct opa_class_port_info *)pmp->data;
2593
2594         memset(pmp->data, 0, sizeof(pmp->data));
2595
2596         if (pmp->mad_hdr.attr_mod != 0)
2597                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2598
2599         p->base_version = OPA_MGMT_BASE_VERSION;
2600         p->class_version = OPA_SM_CLASS_VERSION;
2601         /*
2602          * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2603          */
2604         p->cap_mask2_resp_time = cpu_to_be32(18);
2605
2606         if (resp_len)
2607                 *resp_len += sizeof(*p);
2608
2609         return reply((struct ib_mad_hdr *)pmp);
2610 }
2611
2612 static void a0_portstatus(struct hfi1_pportdata *ppd,
2613                           struct opa_port_status_rsp *rsp)
2614 {
2615         if (!is_bx(ppd->dd)) {
2616                 unsigned long vl;
2617                 u64 sum_vl_xmit_wait = 0;
2618                 unsigned long vl_all_mask = VL_MASK_ALL;
2619
2620                 for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2621                         u64 tmp = sum_vl_xmit_wait +
2622                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2623                                                  idx_from_vl(vl));
2624                         if (tmp < sum_vl_xmit_wait) {
2625                                 /* we wrapped */
2626                                 sum_vl_xmit_wait = (u64)~0;
2627                                 break;
2628                         }
2629                         sum_vl_xmit_wait = tmp;
2630                 }
2631                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2632                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2633         }
2634 }
2635
2636 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
2637                                   struct ib_device *ibdev,
2638                                   u8 port, u32 *resp_len)
2639 {
2640         struct opa_port_status_req *req =
2641                 (struct opa_port_status_req *)pmp->data;
2642         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2643         struct opa_port_status_rsp *rsp;
2644         unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
2645         unsigned long vl;
2646         size_t response_data_size;
2647         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2648         u8 port_num = req->port_num;
2649         u8 num_vls = hweight64(vl_select_mask);
2650         struct _vls_pctrs *vlinfo;
2651         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2652         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2653         int vfi;
2654         u64 tmp, tmp2;
2655
2656         response_data_size = sizeof(struct opa_port_status_rsp) +
2657                                 num_vls * sizeof(struct _vls_pctrs);
2658         if (response_data_size > sizeof(pmp->data)) {
2659                 pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
2660                 return reply((struct ib_mad_hdr *)pmp);
2661         }
2662
2663         if (nports != 1 || (port_num && port_num != port) ||
2664             num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
2665                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2666                 return reply((struct ib_mad_hdr *)pmp);
2667         }
2668
2669         memset(pmp->data, 0, sizeof(pmp->data));
2670
2671         rsp = (struct opa_port_status_rsp *)pmp->data;
2672         if (port_num)
2673                 rsp->port_num = port_num;
2674         else
2675                 rsp->port_num = port;
2676
2677         rsp->port_rcv_constraint_errors =
2678                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2679                                            CNTR_INVALID_VL));
2680
2681         hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
2682
2683         rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
2684         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2685                                           CNTR_INVALID_VL));
2686         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2687                                          CNTR_INVALID_VL));
2688         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2689                                           CNTR_INVALID_VL));
2690         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2691                                          CNTR_INVALID_VL));
2692         rsp->port_multicast_xmit_pkts =
2693                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2694                                           CNTR_INVALID_VL));
2695         rsp->port_multicast_rcv_pkts =
2696                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2697                                           CNTR_INVALID_VL));
2698         rsp->port_xmit_wait =
2699                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2700         rsp->port_rcv_fecn =
2701                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2702         rsp->port_rcv_becn =
2703                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2704         rsp->port_xmit_discards =
2705                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
2706                                            CNTR_INVALID_VL));
2707         rsp->port_xmit_constraint_errors =
2708                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2709                                            CNTR_INVALID_VL));
2710         rsp->port_rcv_remote_physical_errors =
2711                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2712                                           CNTR_INVALID_VL));
2713         rsp->local_link_integrity_errors =
2714                 cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
2715                                           CNTR_INVALID_VL));
2716         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2717         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
2718                                    CNTR_INVALID_VL);
2719         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
2720                 /* overflow/wrapped */
2721                 rsp->link_error_recovery = cpu_to_be32(~0);
2722         } else {
2723                 rsp->link_error_recovery = cpu_to_be32(tmp2);
2724         }
2725         rsp->port_rcv_errors =
2726                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
2727         rsp->excessive_buffer_overruns =
2728                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
2729         rsp->fm_config_errors =
2730                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2731                                           CNTR_INVALID_VL));
2732         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
2733                                                       CNTR_INVALID_VL));
2734
2735         /* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
2736         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2737         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
2738
2739         vlinfo = &rsp->vls[0];
2740         vfi = 0;
2741         /* The vl_select_mask has been checked above, and we know
2742          * that it contains only entries which represent valid VLs.
2743          * So in the for_each_set_bit() loop below, we don't need
2744          * any additional checks for vl.
2745          */
2746         for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
2747                 memset(vlinfo, 0, sizeof(*vlinfo));
2748
2749                 tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
2750                 rsp->vls[vfi].port_vl_rcv_data = cpu_to_be64(tmp);
2751
2752                 rsp->vls[vfi].port_vl_rcv_pkts =
2753                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2754                                                   idx_from_vl(vl)));
2755
2756                 rsp->vls[vfi].port_vl_xmit_data =
2757                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2758                                                    idx_from_vl(vl)));
2759
2760                 rsp->vls[vfi].port_vl_xmit_pkts =
2761                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2762                                                    idx_from_vl(vl)));
2763
2764                 rsp->vls[vfi].port_vl_xmit_wait =
2765                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2766                                                    idx_from_vl(vl)));
2767
2768                 rsp->vls[vfi].port_vl_rcv_fecn =
2769                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2770                                                   idx_from_vl(vl)));
2771
2772                 rsp->vls[vfi].port_vl_rcv_becn =
2773                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2774                                                   idx_from_vl(vl)));
2775
2776                 rsp->vls[vfi].port_vl_xmit_discards =
2777                         cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
2778                                                    idx_from_vl(vl)));
2779                 vlinfo++;
2780                 vfi++;
2781         }
2782
2783         a0_portstatus(ppd, rsp);
2784
2785         if (resp_len)
2786                 *resp_len += response_data_size;
2787
2788         return reply((struct ib_mad_hdr *)pmp);
2789 }
2790
2791 static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
2792                                      u8 res_lli, u8 res_ler)
2793 {
2794         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2795         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2796         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2797         u64 error_counter_summary = 0, tmp;
2798
2799         error_counter_summary += read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
2800                                                 CNTR_INVALID_VL);
2801         /* port_rcv_switch_relay_errors is 0 for HFIs */
2802         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_DSCD,
2803                                                 CNTR_INVALID_VL);
2804         error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
2805                                                 CNTR_INVALID_VL);
2806         error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
2807                                                CNTR_INVALID_VL);
2808         /* local link integrity must be right-shifted by the lli resolution */
2809         error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY,
2810                                                 CNTR_INVALID_VL) >> res_lli);
2811         /* link error recovery must b right-shifted by the ler resolution */
2812         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
2813         tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
2814         error_counter_summary += (tmp >> res_ler);
2815         error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
2816                                                CNTR_INVALID_VL);
2817         error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2818         error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
2819                                                CNTR_INVALID_VL);
2820         /* ppd->link_downed is a 32-bit value */
2821         error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
2822                                                 CNTR_INVALID_VL);
2823         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
2824         /* this is an 8-bit quantity */
2825         error_counter_summary += tmp < 0x100 ? (tmp & 0xff) : 0xff;
2826
2827         return error_counter_summary;
2828 }
2829
2830 static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
2831 {
2832         if (!is_bx(ppd->dd)) {
2833                 unsigned long vl;
2834                 u64 sum_vl_xmit_wait = 0;
2835                 unsigned long vl_all_mask = VL_MASK_ALL;
2836
2837                 for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
2838                         u64 tmp = sum_vl_xmit_wait +
2839                                   read_port_cntr(ppd, C_TX_WAIT_VL,
2840                                                  idx_from_vl(vl));
2841                         if (tmp < sum_vl_xmit_wait) {
2842                                 /* we wrapped */
2843                                 sum_vl_xmit_wait = (u64)~0;
2844                                 break;
2845                         }
2846                         sum_vl_xmit_wait = tmp;
2847                 }
2848                 if (be64_to_cpu(rsp->port_xmit_wait) > sum_vl_xmit_wait)
2849                         rsp->port_xmit_wait = cpu_to_be64(sum_vl_xmit_wait);
2850         }
2851 }
2852
2853 static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
2854                                    struct _port_dctrs *rsp)
2855 {
2856         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2857
2858         rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
2859                                                 CNTR_INVALID_VL));
2860         rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
2861                                                 CNTR_INVALID_VL));
2862         rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
2863                                                 CNTR_INVALID_VL));
2864         rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
2865                                                 CNTR_INVALID_VL));
2866         rsp->port_multicast_xmit_pkts =
2867                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
2868                                           CNTR_INVALID_VL));
2869         rsp->port_multicast_rcv_pkts =
2870                 cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
2871                                           CNTR_INVALID_VL));
2872 }
2873
2874 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
2875                                     struct ib_device *ibdev,
2876                                     u8 port, u32 *resp_len)
2877 {
2878         struct opa_port_data_counters_msg *req =
2879                 (struct opa_port_data_counters_msg *)pmp->data;
2880         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
2881         struct hfi1_ibport *ibp = to_iport(ibdev, port);
2882         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
2883         struct _port_dctrs *rsp;
2884         struct _vls_dctrs *vlinfo;
2885         size_t response_data_size;
2886         u32 num_ports;
2887         u8 num_pslm;
2888         u8 lq, num_vls;
2889         u8 res_lli, res_ler;
2890         u64 port_mask;
2891         u8 port_num;
2892         unsigned long vl;
2893         unsigned long vl_select_mask;
2894         int vfi;
2895
2896         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
2897         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
2898         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
2899         vl_select_mask = be32_to_cpu(req->vl_select_mask);
2900         res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
2901         res_lli = res_lli ? res_lli + ADD_LLI : 0;
2902         res_ler = (u8)(be32_to_cpu(req->resolution) & MSK_LER) >> MSK_LER_SFT;
2903         res_ler = res_ler ? res_ler + ADD_LER : 0;
2904
2905         if (num_ports != 1 || (vl_select_mask & ~VL_MASK_ALL)) {
2906                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2907                 return reply((struct ib_mad_hdr *)pmp);
2908         }
2909
2910         /* Sanity check */
2911         response_data_size = sizeof(struct opa_port_data_counters_msg) +
2912                                 num_vls * sizeof(struct _vls_dctrs);
2913
2914         if (response_data_size > sizeof(pmp->data)) {
2915                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2916                 return reply((struct ib_mad_hdr *)pmp);
2917         }
2918
2919         /*
2920          * The bit set in the mask needs to be consistent with the
2921          * port the request came in on.
2922          */
2923         port_mask = be64_to_cpu(req->port_select_mask[3]);
2924         port_num = find_first_bit((unsigned long *)&port_mask,
2925                                   sizeof(port_mask) * 8);
2926
2927         if (port_num != port) {
2928                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
2929                 return reply((struct ib_mad_hdr *)pmp);
2930         }
2931
2932         rsp = &req->port[0];
2933         memset(rsp, 0, sizeof(*rsp));
2934
2935         rsp->port_number = port;
2936         /*
2937          * Note that link_quality_indicator is a 32 bit quantity in
2938          * 'datacounters' queries (as opposed to 'portinfo' queries,
2939          * where it's a byte).
2940          */
2941         hfi1_read_link_quality(dd, &lq);
2942         rsp->link_quality_indicator = cpu_to_be32((u32)lq);
2943         pma_get_opa_port_dctrs(ibdev, rsp);
2944
2945         rsp->port_xmit_wait =
2946                 cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
2947         rsp->port_rcv_fecn =
2948                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
2949         rsp->port_rcv_becn =
2950                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
2951         rsp->port_error_counter_summary =
2952                 cpu_to_be64(get_error_counter_summary(ibdev, port,
2953                                                       res_lli, res_ler));
2954
2955         vlinfo = &rsp->vls[0];
2956         vfi = 0;
2957         /* The vl_select_mask has been checked above, and we know
2958          * that it contains only entries which represent valid VLs.
2959          * So in the for_each_set_bit() loop below, we don't need
2960          * any additional checks for vl.
2961          */
2962         for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
2963                 memset(vlinfo, 0, sizeof(*vlinfo));
2964
2965                 rsp->vls[vfi].port_vl_xmit_data =
2966                         cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
2967                                                    idx_from_vl(vl)));
2968
2969                 rsp->vls[vfi].port_vl_rcv_data =
2970                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
2971                                                   idx_from_vl(vl)));
2972
2973                 rsp->vls[vfi].port_vl_xmit_pkts =
2974                         cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
2975                                                    idx_from_vl(vl)));
2976
2977                 rsp->vls[vfi].port_vl_rcv_pkts =
2978                         cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
2979                                                   idx_from_vl(vl)));
2980
2981                 rsp->vls[vfi].port_vl_xmit_wait =
2982                         cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
2983                                                    idx_from_vl(vl)));
2984
2985                 rsp->vls[vfi].port_vl_rcv_fecn =
2986                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
2987                                                   idx_from_vl(vl)));
2988                 rsp->vls[vfi].port_vl_rcv_becn =
2989                         cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
2990                                                   idx_from_vl(vl)));
2991
2992                 /* rsp->port_vl_xmit_time_cong is 0 for HFIs */
2993                 /* rsp->port_vl_xmit_wasted_bw ??? */
2994                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
2995                  * does this differ from rsp->vls[vfi].port_vl_xmit_wait
2996                  */
2997                 /*rsp->vls[vfi].port_vl_mark_fecn =
2998                  *      cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
2999                  *              + offset));
3000                  */
3001                 vlinfo++;
3002                 vfi++;
3003         }
3004
3005         a0_datacounters(ppd, rsp);
3006
3007         if (resp_len)
3008                 *resp_len += response_data_size;
3009
3010         return reply((struct ib_mad_hdr *)pmp);
3011 }
3012
3013 static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
3014                                        struct ib_device *ibdev, u8 port)
3015 {
3016         struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
3017                                                 pmp->data;
3018         struct _port_dctrs rsp;
3019
3020         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3021                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3022                 goto bail;
3023         }
3024
3025         memset(&rsp, 0, sizeof(rsp));
3026         pma_get_opa_port_dctrs(ibdev, &rsp);
3027
3028         p->port_xmit_data = rsp.port_xmit_data;
3029         p->port_rcv_data = rsp.port_rcv_data;
3030         p->port_xmit_packets = rsp.port_xmit_pkts;
3031         p->port_rcv_packets = rsp.port_rcv_pkts;
3032         p->port_unicast_xmit_packets = 0;
3033         p->port_unicast_rcv_packets =  0;
3034         p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
3035         p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
3036
3037 bail:
3038         return reply((struct ib_mad_hdr *)pmp);
3039 }
3040
3041 static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
3042                                    struct _port_ectrs *rsp, u8 port)
3043 {
3044         u64 tmp, tmp2;
3045         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3046         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3047         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3048
3049         tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
3050         tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3051                                         CNTR_INVALID_VL);
3052         if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
3053                 /* overflow/wrapped */
3054                 rsp->link_error_recovery = cpu_to_be32(~0);
3055         } else {
3056                 rsp->link_error_recovery = cpu_to_be32(tmp2);
3057         }
3058
3059         rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
3060                                                 CNTR_INVALID_VL));
3061         rsp->port_rcv_errors =
3062                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3063         rsp->port_rcv_remote_physical_errors =
3064                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3065                                           CNTR_INVALID_VL));
3066         rsp->port_rcv_switch_relay_errors = 0;
3067         rsp->port_xmit_discards =
3068                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
3069                                            CNTR_INVALID_VL));
3070         rsp->port_xmit_constraint_errors =
3071                 cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
3072                                            CNTR_INVALID_VL));
3073         rsp->port_rcv_constraint_errors =
3074                 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
3075                                            CNTR_INVALID_VL));
3076         rsp->local_link_integrity_errors =
3077                 cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY,
3078                                           CNTR_INVALID_VL));
3079         rsp->excessive_buffer_overruns =
3080                 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
3081 }
3082
3083 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
3084                                   struct ib_device *ibdev,
3085                                   u8 port, u32 *resp_len)
3086 {
3087         size_t response_data_size;
3088         struct _port_ectrs *rsp;
3089         u8 port_num;
3090         struct opa_port_error_counters64_msg *req;
3091         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3092         u32 num_ports;
3093         u8 num_pslm;
3094         u8 num_vls;
3095         struct hfi1_ibport *ibp;
3096         struct hfi1_pportdata *ppd;
3097         struct _vls_ectrs *vlinfo;
3098         unsigned long vl;
3099         u64 port_mask, tmp;
3100         unsigned long vl_select_mask;
3101         int vfi;
3102
3103         req = (struct opa_port_error_counters64_msg *)pmp->data;
3104
3105         num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3106
3107         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3108         num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
3109
3110         if (num_ports != 1 || num_ports != num_pslm) {
3111                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3112                 return reply((struct ib_mad_hdr *)pmp);
3113         }
3114
3115         response_data_size = sizeof(struct opa_port_error_counters64_msg) +
3116                                 num_vls * sizeof(struct _vls_ectrs);
3117
3118         if (response_data_size > sizeof(pmp->data)) {
3119                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3120                 return reply((struct ib_mad_hdr *)pmp);
3121         }
3122         /*
3123          * The bit set in the mask needs to be consistent with the
3124          * port the request came in on.
3125          */
3126         port_mask = be64_to_cpu(req->port_select_mask[3]);
3127         port_num = find_first_bit((unsigned long *)&port_mask,
3128                                   sizeof(port_mask) * 8);
3129
3130         if (port_num != port) {
3131                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3132                 return reply((struct ib_mad_hdr *)pmp);
3133         }
3134
3135         rsp = &req->port[0];
3136
3137         ibp = to_iport(ibdev, port_num);
3138         ppd = ppd_from_ibp(ibp);
3139
3140         memset(rsp, 0, sizeof(*rsp));
3141         rsp->port_number = port_num;
3142
3143         pma_get_opa_port_ectrs(ibdev, rsp, port_num);
3144
3145         rsp->port_rcv_remote_physical_errors =
3146                 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
3147                                           CNTR_INVALID_VL));
3148         rsp->fm_config_errors =
3149                 cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
3150                                           CNTR_INVALID_VL));
3151         tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
3152
3153         rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
3154         rsp->port_rcv_errors =
3155                 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
3156         vlinfo = &rsp->vls[0];
3157         vfi = 0;
3158         vl_select_mask = be32_to_cpu(req->vl_select_mask);
3159         for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3160                 memset(vlinfo, 0, sizeof(*vlinfo));
3161                 rsp->vls[vfi].port_vl_xmit_discards =
3162                         cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3163                                                    idx_from_vl(vl)));
3164                 vlinfo += 1;
3165                 vfi++;
3166         }
3167
3168         if (resp_len)
3169                 *resp_len += response_data_size;
3170
3171         return reply((struct ib_mad_hdr *)pmp);
3172 }
3173
3174 static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
3175                                    struct ib_device *ibdev, u8 port)
3176 {
3177         struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
3178                 pmp->data;
3179         struct _port_ectrs rsp;
3180         u64 temp_link_overrun_errors;
3181         u64 temp_64;
3182         u32 temp_32;
3183
3184         memset(&rsp, 0, sizeof(rsp));
3185         pma_get_opa_port_ectrs(ibdev, &rsp, port);
3186
3187         if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
3188                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3189                 goto bail;
3190         }
3191
3192         p->symbol_error_counter = 0; /* N/A for OPA */
3193
3194         temp_32 = be32_to_cpu(rsp.link_error_recovery);
3195         if (temp_32 > 0xFFUL)
3196                 p->link_error_recovery_counter = 0xFF;
3197         else
3198                 p->link_error_recovery_counter = (u8)temp_32;
3199
3200         temp_32 = be32_to_cpu(rsp.link_downed);
3201         if (temp_32 > 0xFFUL)
3202                 p->link_downed_counter = 0xFF;
3203         else
3204                 p->link_downed_counter = (u8)temp_32;
3205
3206         temp_64 = be64_to_cpu(rsp.port_rcv_errors);
3207         if (temp_64 > 0xFFFFUL)
3208                 p->port_rcv_errors = cpu_to_be16(0xFFFF);
3209         else
3210                 p->port_rcv_errors = cpu_to_be16((u16)temp_64);
3211
3212         temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
3213         if (temp_64 > 0xFFFFUL)
3214                 p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
3215         else
3216                 p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
3217
3218         temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
3219         p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
3220
3221         temp_64 = be64_to_cpu(rsp.port_xmit_discards);
3222         if (temp_64 > 0xFFFFUL)
3223                 p->port_xmit_discards = cpu_to_be16(0xFFFF);
3224         else
3225                 p->port_xmit_discards = cpu_to_be16((u16)temp_64);
3226
3227         temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
3228         if (temp_64 > 0xFFUL)
3229                 p->port_xmit_constraint_errors = 0xFF;
3230         else
3231                 p->port_xmit_constraint_errors = (u8)temp_64;
3232
3233         temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
3234         if (temp_64 > 0xFFUL)
3235                 p->port_rcv_constraint_errors = 0xFFUL;
3236         else
3237                 p->port_rcv_constraint_errors = (u8)temp_64;
3238
3239         /* LocalLink: 7:4, BufferOverrun: 3:0 */
3240         temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
3241         if (temp_64 > 0xFUL)
3242                 temp_64 = 0xFUL;
3243
3244         temp_link_overrun_errors = temp_64 << 4;
3245
3246         temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
3247         if (temp_64 > 0xFUL)
3248                 temp_64 = 0xFUL;
3249         temp_link_overrun_errors |= temp_64;
3250
3251         p->link_overrun_errors = (u8)temp_link_overrun_errors;
3252
3253         p->vl15_dropped = 0; /* N/A for OPA */
3254
3255 bail:
3256         return reply((struct ib_mad_hdr *)pmp);
3257 }
3258
3259 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
3260                                  struct ib_device *ibdev,
3261                                  u8 port, u32 *resp_len)
3262 {
3263         size_t response_data_size;
3264         struct _port_ei *rsp;
3265         struct opa_port_error_info_msg *req;
3266         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3267         u64 port_mask;
3268         u32 num_ports;
3269         u8 port_num;
3270         u8 num_pslm;
3271         u64 reg;
3272
3273         req = (struct opa_port_error_info_msg *)pmp->data;
3274         rsp = &req->port[0];
3275
3276         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3277         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3278
3279         memset(rsp, 0, sizeof(*rsp));
3280
3281         if (num_ports != 1 || num_ports != num_pslm) {
3282                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3283                 return reply((struct ib_mad_hdr *)pmp);
3284         }
3285
3286         /* Sanity check */
3287         response_data_size = sizeof(struct opa_port_error_info_msg);
3288
3289         if (response_data_size > sizeof(pmp->data)) {
3290                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3291                 return reply((struct ib_mad_hdr *)pmp);
3292         }
3293
3294         /*
3295          * The bit set in the mask needs to be consistent with the port
3296          * the request came in on.
3297          */
3298         port_mask = be64_to_cpu(req->port_select_mask[3]);
3299         port_num = find_first_bit((unsigned long *)&port_mask,
3300                                   sizeof(port_mask) * 8);
3301
3302         if (port_num != port) {
3303                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3304                 return reply((struct ib_mad_hdr *)pmp);
3305         }
3306
3307         /* PortRcvErrorInfo */
3308         rsp->port_rcv_ei.status_and_code =
3309                 dd->err_info_rcvport.status_and_code;
3310         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
3311                &dd->err_info_rcvport.packet_flit1, sizeof(u64));
3312         memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
3313                &dd->err_info_rcvport.packet_flit2, sizeof(u64));
3314
3315         /* ExcessiverBufferOverrunInfo */
3316         reg = read_csr(dd, RCV_ERR_INFO);
3317         if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
3318                 /*
3319                  * if the RcvExcessBufferOverrun bit is set, save SC of
3320                  * first pkt that encountered an excess buffer overrun
3321                  */
3322                 u8 tmp = (u8)reg;
3323
3324                 tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
3325                 tmp <<= 2;
3326                 rsp->excessive_buffer_overrun_ei.status_and_sc = tmp;
3327                 /* set the status bit */
3328                 rsp->excessive_buffer_overrun_ei.status_and_sc |= 0x80;
3329         }
3330
3331         rsp->port_xmit_constraint_ei.status =
3332                 dd->err_info_xmit_constraint.status;
3333         rsp->port_xmit_constraint_ei.pkey =
3334                 cpu_to_be16(dd->err_info_xmit_constraint.pkey);
3335         rsp->port_xmit_constraint_ei.slid =
3336                 cpu_to_be32(dd->err_info_xmit_constraint.slid);
3337
3338         rsp->port_rcv_constraint_ei.status =
3339                 dd->err_info_rcv_constraint.status;
3340         rsp->port_rcv_constraint_ei.pkey =
3341                 cpu_to_be16(dd->err_info_rcv_constraint.pkey);
3342         rsp->port_rcv_constraint_ei.slid =
3343                 cpu_to_be32(dd->err_info_rcv_constraint.slid);
3344
3345         /* UncorrectableErrorInfo */
3346         rsp->uncorrectable_ei.status_and_code = dd->err_info_uncorrectable;
3347
3348         /* FMConfigErrorInfo */
3349         rsp->fm_config_ei.status_and_code = dd->err_info_fmconfig;
3350
3351         if (resp_len)
3352                 *resp_len += response_data_size;
3353
3354         return reply((struct ib_mad_hdr *)pmp);
3355 }
3356
3357 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
3358                                   struct ib_device *ibdev,
3359                                   u8 port, u32 *resp_len)
3360 {
3361         struct opa_clear_port_status *req =
3362                 (struct opa_clear_port_status *)pmp->data;
3363         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3364         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3365         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3366         u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
3367         u64 portn = be64_to_cpu(req->port_select_mask[3]);
3368         u32 counter_select = be32_to_cpu(req->counter_select_mask);
3369         unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
3370         unsigned long vl;
3371
3372         if ((nports != 1) || (portn != 1 << port)) {
3373                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3374                 return reply((struct ib_mad_hdr *)pmp);
3375         }
3376         /*
3377          * only counters returned by pma_get_opa_portstatus() are
3378          * handled, so when pma_get_opa_portstatus() gets a fix,
3379          * the corresponding change should be made here as well.
3380          */
3381
3382         if (counter_select & CS_PORT_XMIT_DATA)
3383                 write_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL, 0);
3384
3385         if (counter_select & CS_PORT_RCV_DATA)
3386                 write_dev_cntr(dd, C_DC_RCV_FLITS, CNTR_INVALID_VL, 0);
3387
3388         if (counter_select & CS_PORT_XMIT_PKTS)
3389                 write_dev_cntr(dd, C_DC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3390
3391         if (counter_select & CS_PORT_RCV_PKTS)
3392                 write_dev_cntr(dd, C_DC_RCV_PKTS, CNTR_INVALID_VL, 0);
3393
3394         if (counter_select & CS_PORT_MCAST_XMIT_PKTS)
3395                 write_dev_cntr(dd, C_DC_MC_XMIT_PKTS, CNTR_INVALID_VL, 0);
3396
3397         if (counter_select & CS_PORT_MCAST_RCV_PKTS)
3398                 write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
3399
3400         if (counter_select & CS_PORT_XMIT_WAIT)
3401                 write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
3402
3403         /* ignore cs_sw_portCongestion for HFIs */
3404
3405         if (counter_select & CS_PORT_RCV_FECN)
3406                 write_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL, 0);
3407
3408         if (counter_select & CS_PORT_RCV_BECN)
3409                 write_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL, 0);
3410
3411         /* ignore cs_port_xmit_time_cong for HFIs */
3412         /* ignore cs_port_xmit_wasted_bw for now */
3413         /* ignore cs_port_xmit_wait_data for now */
3414         if (counter_select & CS_PORT_RCV_BUBBLE)
3415                 write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
3416
3417         /* Only applicable for switch */
3418         /* if (counter_select & CS_PORT_MARK_FECN)
3419          *      write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
3420          */
3421
3422         if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
3423                 write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
3424
3425         /* ignore cs_port_rcv_switch_relay_errors for HFIs */
3426         if (counter_select & CS_PORT_XMIT_DISCARDS)
3427                 write_port_cntr(ppd, C_SW_XMIT_DSCD, CNTR_INVALID_VL, 0);
3428
3429         if (counter_select & CS_PORT_XMIT_CONSTRAINT_ERRORS)
3430                 write_port_cntr(ppd, C_SW_XMIT_CSTR_ERR, CNTR_INVALID_VL, 0);
3431
3432         if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS)
3433                 write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0);
3434
3435         if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS)
3436                 write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
3437
3438         if (counter_select & CS_LINK_ERROR_RECOVERY) {
3439                 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
3440                 write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
3441                                CNTR_INVALID_VL, 0);
3442         }
3443
3444         if (counter_select & CS_PORT_RCV_ERRORS)
3445                 write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
3446
3447         if (counter_select & CS_EXCESSIVE_BUFFER_OVERRUNS) {
3448                 write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
3449                 dd->rcv_ovfl_cnt = 0;
3450         }
3451
3452         if (counter_select & CS_FM_CONFIG_ERRORS)
3453                 write_dev_cntr(dd, C_DC_FM_CFG_ERR, CNTR_INVALID_VL, 0);
3454
3455         if (counter_select & CS_LINK_DOWNED)
3456                 write_port_cntr(ppd, C_SW_LINK_DOWN, CNTR_INVALID_VL, 0);
3457
3458         if (counter_select & CS_UNCORRECTABLE_ERRORS)
3459                 write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
3460
3461         for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
3462                 if (counter_select & CS_PORT_XMIT_DATA)
3463                         write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
3464
3465                 if (counter_select & CS_PORT_RCV_DATA)
3466                         write_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl), 0);
3467
3468                 if (counter_select & CS_PORT_XMIT_PKTS)
3469                         write_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl), 0);
3470
3471                 if (counter_select & CS_PORT_RCV_PKTS)
3472                         write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
3473
3474                 if (counter_select & CS_PORT_XMIT_WAIT)
3475                         write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
3476
3477                 /* sw_port_vl_congestion is 0 for HFIs */
3478                 if (counter_select & CS_PORT_RCV_FECN)
3479                         write_dev_cntr(dd, C_DC_RCV_FCN_VL, idx_from_vl(vl), 0);
3480
3481                 if (counter_select & CS_PORT_RCV_BECN)
3482                         write_dev_cntr(dd, C_DC_RCV_BCN_VL, idx_from_vl(vl), 0);
3483
3484                 /* port_vl_xmit_time_cong is 0 for HFIs */
3485                 /* port_vl_xmit_wasted_bw ??? */
3486                 /* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ??? */
3487                 if (counter_select & CS_PORT_RCV_BUBBLE)
3488                         write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
3489
3490                 /* if (counter_select & CS_PORT_MARK_FECN)
3491                  *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
3492                  */
3493                 if (counter_select & C_SW_XMIT_DSCD_VL)
3494                         write_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
3495                                         idx_from_vl(vl), 0);
3496         }
3497
3498         if (resp_len)
3499                 *resp_len += sizeof(*req);
3500
3501         return reply((struct ib_mad_hdr *)pmp);
3502 }
3503
3504 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
3505                                  struct ib_device *ibdev,
3506                                  u8 port, u32 *resp_len)
3507 {
3508         struct _port_ei *rsp;
3509         struct opa_port_error_info_msg *req;
3510         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3511         u64 port_mask;
3512         u32 num_ports;
3513         u8 port_num;
3514         u8 num_pslm;
3515         u32 error_info_select;
3516
3517         req = (struct opa_port_error_info_msg *)pmp->data;
3518         rsp = &req->port[0];
3519
3520         num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
3521         num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
3522
3523         memset(rsp, 0, sizeof(*rsp));
3524
3525         if (num_ports != 1 || num_ports != num_pslm) {
3526                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3527                 return reply((struct ib_mad_hdr *)pmp);
3528         }
3529
3530         /*
3531          * The bit set in the mask needs to be consistent with the port
3532          * the request came in on.
3533          */
3534         port_mask = be64_to_cpu(req->port_select_mask[3]);
3535         port_num = find_first_bit((unsigned long *)&port_mask,
3536                                   sizeof(port_mask) * 8);
3537
3538         if (port_num != port) {
3539                 pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
3540                 return reply((struct ib_mad_hdr *)pmp);
3541         }
3542
3543         error_info_select = be32_to_cpu(req->error_info_select_mask);
3544
3545         /* PortRcvErrorInfo */
3546         if (error_info_select & ES_PORT_RCV_ERROR_INFO)
3547                 /* turn off status bit */
3548                 dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
3549
3550         /* ExcessiverBufferOverrunInfo */
3551         if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
3552                 /*
3553                  * status bit is essentially kept in the h/w - bit 5 of
3554                  * RCV_ERR_INFO
3555                  */
3556                 write_csr(dd, RCV_ERR_INFO,
3557                           RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
3558
3559         if (error_info_select & ES_PORT_XMIT_CONSTRAINT_ERROR_INFO)
3560                 dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
3561
3562         if (error_info_select & ES_PORT_RCV_CONSTRAINT_ERROR_INFO)
3563                 dd->err_info_rcv_constraint.status &= ~OPA_EI_STATUS_SMASK;
3564
3565         /* UncorrectableErrorInfo */
3566         if (error_info_select & ES_UNCORRECTABLE_ERROR_INFO)
3567                 /* turn off status bit */
3568                 dd->err_info_uncorrectable &= ~OPA_EI_STATUS_SMASK;
3569
3570         /* FMConfigErrorInfo */
3571         if (error_info_select & ES_FM_CONFIG_ERROR_INFO)
3572                 /* turn off status bit */
3573                 dd->err_info_fmconfig &= ~OPA_EI_STATUS_SMASK;
3574
3575         if (resp_len)
3576                 *resp_len += sizeof(*req);
3577
3578         return reply((struct ib_mad_hdr *)pmp);
3579 }
3580
3581 struct opa_congestion_info_attr {
3582         __be16 congestion_info;
3583         u8 control_table_cap;   /* Multiple of 64 entry unit CCTs */
3584         u8 congestion_log_length;
3585 } __packed;
3586
3587 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
3588                                     struct ib_device *ibdev, u8 port,
3589                                     u32 *resp_len, u32 max_len)
3590 {
3591         struct opa_congestion_info_attr *p =
3592                 (struct opa_congestion_info_attr *)data;
3593         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3594         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3595
3596         if (smp_length_check(sizeof(*p), max_len)) {
3597                 smp->status |= IB_SMP_INVALID_FIELD;
3598                 return reply((struct ib_mad_hdr *)smp);
3599         }
3600
3601         p->congestion_info = 0;
3602         p->control_table_cap = ppd->cc_max_table_entries;
3603         p->congestion_log_length = OPA_CONG_LOG_ELEMS;
3604
3605         if (resp_len)
3606                 *resp_len += sizeof(*p);
3607
3608         return reply((struct ib_mad_hdr *)smp);
3609 }
3610
3611 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
3612                                        u8 *data, struct ib_device *ibdev,
3613                                        u8 port, u32 *resp_len, u32 max_len)
3614 {
3615         int i;
3616         struct opa_congestion_setting_attr *p =
3617                 (struct opa_congestion_setting_attr *)data;
3618         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3619         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3620         struct opa_congestion_setting_entry_shadow *entries;
3621         struct cc_state *cc_state;
3622
3623         if (smp_length_check(sizeof(*p), max_len)) {
3624                 smp->status |= IB_SMP_INVALID_FIELD;
3625                 return reply((struct ib_mad_hdr *)smp);
3626         }
3627
3628         rcu_read_lock();
3629
3630         cc_state = get_cc_state(ppd);
3631
3632         if (!cc_state) {
3633                 rcu_read_unlock();
3634                 return reply((struct ib_mad_hdr *)smp);
3635         }
3636
3637         entries = cc_state->cong_setting.entries;
3638         p->port_control = cpu_to_be16(cc_state->cong_setting.port_control);
3639         p->control_map = cpu_to_be32(cc_state->cong_setting.control_map);
3640         for (i = 0; i < OPA_MAX_SLS; i++) {
3641                 p->entries[i].ccti_increase = entries[i].ccti_increase;
3642                 p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
3643                 p->entries[i].trigger_threshold =
3644                         entries[i].trigger_threshold;
3645                 p->entries[i].ccti_min = entries[i].ccti_min;
3646         }
3647
3648         rcu_read_unlock();
3649
3650         if (resp_len)
3651                 *resp_len += sizeof(*p);
3652
3653         return reply((struct ib_mad_hdr *)smp);
3654 }
3655
3656 /*
3657  * Apply congestion control information stored in the ppd to the
3658  * active structure.
3659  */
3660 static void apply_cc_state(struct hfi1_pportdata *ppd)
3661 {
3662         struct cc_state *old_cc_state, *new_cc_state;
3663
3664         new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
3665         if (!new_cc_state)
3666                 return;
3667
3668         /*
3669          * Hold the lock for updating *and* to prevent ppd information
3670          * from changing during the update.
3671          */
3672         spin_lock(&ppd->cc_state_lock);
3673
3674         old_cc_state = get_cc_state_protected(ppd);
3675         if (!old_cc_state) {
3676                 /* never active, or shutting down */
3677                 spin_unlock(&ppd->cc_state_lock);
3678                 kfree(new_cc_state);
3679                 return;
3680         }
3681
3682         *new_cc_state = *old_cc_state;
3683
3684         new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
3685         memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
3686                ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
3687
3688         new_cc_state->cong_setting.port_control = IB_CC_CCS_PC_SL_BASED;
3689         new_cc_state->cong_setting.control_map = ppd->cc_sl_control_map;
3690         memcpy(new_cc_state->cong_setting.entries, ppd->congestion_entries,
3691                OPA_MAX_SLS * sizeof(struct opa_congestion_setting_entry));
3692
3693         rcu_assign_pointer(ppd->cc_state, new_cc_state);
3694
3695         spin_unlock(&ppd->cc_state_lock);
3696
3697         kfree_rcu(old_cc_state, rcu);
3698 }
3699
3700 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
3701                                        struct ib_device *ibdev, u8 port,
3702                                        u32 *resp_len, u32 max_len)
3703 {
3704         struct opa_congestion_setting_attr *p =
3705                 (struct opa_congestion_setting_attr *)data;
3706         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3707         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3708         struct opa_congestion_setting_entry_shadow *entries;
3709         int i;
3710
3711         if (smp_length_check(sizeof(*p), max_len)) {
3712                 smp->status |= IB_SMP_INVALID_FIELD;
3713                 return reply((struct ib_mad_hdr *)smp);
3714         }
3715
3716         /*
3717          * Save details from packet into the ppd.  Hold the cc_state_lock so
3718          * our information is consistent with anyone trying to apply the state.
3719          */
3720         spin_lock(&ppd->cc_state_lock);
3721         ppd->cc_sl_control_map = be32_to_cpu(p->control_map);
3722
3723         entries = ppd->congestion_entries;
3724         for (i = 0; i < OPA_MAX_SLS; i++) {
3725                 entries[i].ccti_increase = p->entries[i].ccti_increase;
3726                 entries[i].ccti_timer = be16_to_cpu(p->entries[i].ccti_timer);
3727                 entries[i].trigger_threshold =
3728                         p->entries[i].trigger_threshold;
3729                 entries[i].ccti_min = p->entries[i].ccti_min;
3730         }
3731         spin_unlock(&ppd->cc_state_lock);
3732
3733         /* now apply the information */
3734         apply_cc_state(ppd);
3735
3736         return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
3737                                            resp_len, max_len);
3738 }
3739
3740 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
3741                                         u8 *data, struct ib_device *ibdev,
3742                                         u8 port, u32 *resp_len, u32 max_len)
3743 {
3744         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3745         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3746         struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
3747         s64 ts;
3748         int i;
3749
3750         if (am || smp_length_check(sizeof(*cong_log), max_len)) {
3751                 smp->status |= IB_SMP_INVALID_FIELD;
3752                 return reply((struct ib_mad_hdr *)smp);
3753         }
3754
3755         spin_lock_irq(&ppd->cc_log_lock);
3756
3757         cong_log->log_type = OPA_CC_LOG_TYPE_HFI;
3758         cong_log->congestion_flags = 0;
3759         cong_log->threshold_event_counter =
3760                 cpu_to_be16(ppd->threshold_event_counter);
3761         memcpy(cong_log->threshold_cong_event_map,
3762                ppd->threshold_cong_event_map,
3763                sizeof(cong_log->threshold_cong_event_map));
3764         /* keep timestamp in units of 1.024 usec */
3765         ts = ktime_to_ns(ktime_get()) / 1024;
3766         cong_log->current_time_stamp = cpu_to_be32(ts);
3767         for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
3768                 struct opa_hfi1_cong_log_event_internal *cce =
3769                         &ppd->cc_events[ppd->cc_mad_idx++];
3770                 if (ppd->cc_mad_idx == OPA_CONG_LOG_ELEMS)
3771                         ppd->cc_mad_idx = 0;
3772                 /*
3773                  * Entries which are older than twice the time
3774                  * required to wrap the counter are supposed to
3775                  * be zeroed (CA10-49 IBTA, release 1.2.1, V1).
3776                  */
3777                 if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
3778                         continue;
3779                 memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
3780                 memcpy(cong_log->events[i].remote_qp_number_cn_entry,
3781                        &cce->rqpn, 3);
3782                 cong_log->events[i].sl_svc_type_cn_entry =
3783                         ((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
3784                 cong_log->events[i].remote_lid_cn_entry =
3785                         cpu_to_be32(cce->rlid);
3786                 cong_log->events[i].timestamp_cn_entry =
3787                         cpu_to_be32(cce->timestamp);
3788         }
3789
3790         /*
3791          * Reset threshold_cong_event_map, and threshold_event_counter
3792          * to 0 when log is read.
3793          */
3794         memset(ppd->threshold_cong_event_map, 0x0,
3795                sizeof(ppd->threshold_cong_event_map));
3796         ppd->threshold_event_counter = 0;
3797
3798         spin_unlock_irq(&ppd->cc_log_lock);
3799
3800         if (resp_len)
3801                 *resp_len += sizeof(struct opa_hfi1_cong_log);
3802
3803         return reply((struct ib_mad_hdr *)smp);
3804 }
3805
3806 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3807                                    struct ib_device *ibdev, u8 port,
3808                                    u32 *resp_len, u32 max_len)
3809 {
3810         struct ib_cc_table_attr *cc_table_attr =
3811                 (struct ib_cc_table_attr *)data;
3812         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3813         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3814         u32 start_block = OPA_AM_START_BLK(am);
3815         u32 n_blocks = OPA_AM_NBLK(am);
3816         struct ib_cc_table_entry_shadow *entries;
3817         int i, j;
3818         u32 sentry, eentry;
3819         struct cc_state *cc_state;
3820         u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3821
3822         /* sanity check n_blocks, start_block */
3823         if (n_blocks == 0 || smp_length_check(size, max_len) ||
3824             start_block + n_blocks > ppd->cc_max_table_entries) {
3825                 smp->status |= IB_SMP_INVALID_FIELD;
3826                 return reply((struct ib_mad_hdr *)smp);
3827         }
3828
3829         rcu_read_lock();
3830
3831         cc_state = get_cc_state(ppd);
3832
3833         if (!cc_state) {
3834                 rcu_read_unlock();
3835                 return reply((struct ib_mad_hdr *)smp);
3836         }
3837
3838         sentry = start_block * IB_CCT_ENTRIES;
3839         eentry = sentry + (IB_CCT_ENTRIES * n_blocks);
3840
3841         cc_table_attr->ccti_limit = cpu_to_be16(cc_state->cct.ccti_limit);
3842
3843         entries = cc_state->cct.entries;
3844
3845         /* return n_blocks, though the last block may not be full */
3846         for (j = 0, i = sentry; i < eentry; j++, i++)
3847                 cc_table_attr->ccti_entries[j].entry =
3848                         cpu_to_be16(entries[i].entry);
3849
3850         rcu_read_unlock();
3851
3852         if (resp_len)
3853                 *resp_len += size;
3854
3855         return reply((struct ib_mad_hdr *)smp);
3856 }
3857
3858 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
3859                                    struct ib_device *ibdev, u8 port,
3860                                    u32 *resp_len, u32 max_len)
3861 {
3862         struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
3863         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3864         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
3865         u32 start_block = OPA_AM_START_BLK(am);
3866         u32 n_blocks = OPA_AM_NBLK(am);
3867         struct ib_cc_table_entry_shadow *entries;
3868         int i, j;
3869         u32 sentry, eentry;
3870         u16 ccti_limit;
3871         u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
3872
3873         /* sanity check n_blocks, start_block */
3874         if (n_blocks == 0 || smp_length_check(size, max_len) ||
3875             start_block + n_blocks > ppd->cc_max_table_entries) {
3876                 smp->status |= IB_SMP_INVALID_FIELD;
3877                 return reply((struct ib_mad_hdr *)smp);
3878         }
3879
3880         sentry = start_block * IB_CCT_ENTRIES;
3881         eentry = sentry + ((n_blocks - 1) * IB_CCT_ENTRIES) +
3882                  (be16_to_cpu(p->ccti_limit)) % IB_CCT_ENTRIES + 1;
3883
3884         /* sanity check ccti_limit */
3885         ccti_limit = be16_to_cpu(p->ccti_limit);
3886         if (ccti_limit + 1 > eentry) {
3887                 smp->status |= IB_SMP_INVALID_FIELD;
3888                 return reply((struct ib_mad_hdr *)smp);
3889         }
3890
3891         /*
3892          * Save details from packet into the ppd.  Hold the cc_state_lock so
3893          * our information is consistent with anyone trying to apply the state.
3894          */
3895         spin_lock(&ppd->cc_state_lock);
3896         ppd->total_cct_entry = ccti_limit + 1;
3897         entries = ppd->ccti_entries;
3898         for (j = 0, i = sentry; i < eentry; j++, i++)
3899                 entries[i].entry = be16_to_cpu(p->ccti_entries[j].entry);
3900         spin_unlock(&ppd->cc_state_lock);
3901
3902         /* now apply the information */
3903         apply_cc_state(ppd);
3904
3905         return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
3906                                        max_len);
3907 }
3908
3909 struct opa_led_info {
3910         __be32 rsvd_led_mask;
3911         __be32 rsvd;
3912 };
3913
3914 #define OPA_LED_SHIFT   31
3915 #define OPA_LED_MASK    BIT(OPA_LED_SHIFT)
3916
3917 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3918                                    struct ib_device *ibdev, u8 port,
3919                                    u32 *resp_len, u32 max_len)
3920 {
3921         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3922         struct hfi1_pportdata *ppd = dd->pport;
3923         struct opa_led_info *p = (struct opa_led_info *)data;
3924         u32 nport = OPA_AM_NPORT(am);
3925         u32 is_beaconing_active;
3926
3927         if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
3928                 smp->status |= IB_SMP_INVALID_FIELD;
3929                 return reply((struct ib_mad_hdr *)smp);
3930         }
3931
3932         /*
3933          * This pairs with the memory barrier in hfi1_start_led_override to
3934          * ensure that we read the correct state of LED beaconing represented
3935          * by led_override_timer_active
3936          */
3937         smp_rmb();
3938         is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
3939         p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
3940
3941         if (resp_len)
3942                 *resp_len += sizeof(struct opa_led_info);
3943
3944         return reply((struct ib_mad_hdr *)smp);
3945 }
3946
3947 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
3948                                    struct ib_device *ibdev, u8 port,
3949                                    u32 *resp_len, u32 max_len)
3950 {
3951         struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
3952         struct opa_led_info *p = (struct opa_led_info *)data;
3953         u32 nport = OPA_AM_NPORT(am);
3954         int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
3955
3956         if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
3957                 smp->status |= IB_SMP_INVALID_FIELD;
3958                 return reply((struct ib_mad_hdr *)smp);
3959         }
3960
3961         if (on)
3962                 hfi1_start_led_override(dd->pport, 2000, 1500);
3963         else
3964                 shutdown_led_override(dd->pport);
3965
3966         return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
3967                                        max_len);
3968 }
3969
3970 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
3971                             u8 *data, struct ib_device *ibdev, u8 port,
3972                             u32 *resp_len, u32 max_len)
3973 {
3974         int ret;
3975         struct hfi1_ibport *ibp = to_iport(ibdev, port);
3976
3977         switch (attr_id) {
3978         case IB_SMP_ATTR_NODE_DESC:
3979                 ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
3980                                               resp_len, max_len);
3981                 break;
3982         case IB_SMP_ATTR_NODE_INFO:
3983                 ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
3984                                               resp_len, max_len);
3985                 break;
3986         case IB_SMP_ATTR_PORT_INFO:
3987                 ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
3988                                               resp_len, max_len);
3989                 break;
3990         case IB_SMP_ATTR_PKEY_TABLE:
3991                 ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
3992                                                resp_len, max_len);
3993                 break;
3994         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
3995                 ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
3996                                               resp_len, max_len);
3997                 break;
3998         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
3999                 ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
4000                                               resp_len, max_len);
4001                 break;
4002         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4003                 ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
4004                                                resp_len, max_len);
4005                 break;
4006         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4007                 ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4008                                                 resp_len, max_len);
4009                 break;
4010         case OPA_ATTRIB_ID_PORT_STATE_INFO:
4011                 ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
4012                                          resp_len, max_len);
4013                 break;
4014         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4015                 ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
4016                                          resp_len, max_len);
4017                 break;
4018         case OPA_ATTRIB_ID_CABLE_INFO:
4019                 ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
4020                                                 resp_len, max_len);
4021                 break;
4022         case IB_SMP_ATTR_VL_ARB_TABLE:
4023                 ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
4024                                             resp_len, max_len);
4025                 break;
4026         case OPA_ATTRIB_ID_CONGESTION_INFO:
4027                 ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
4028                                                resp_len, max_len);
4029                 break;
4030         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4031                 ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
4032                                                   port, resp_len, max_len);
4033                 break;
4034         case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
4035                 ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
4036                                                    port, resp_len, max_len);
4037                 break;
4038         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4039                 ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
4040                                               resp_len, max_len);
4041                 break;
4042         case IB_SMP_ATTR_LED_INFO:
4043                 ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
4044                                               resp_len, max_len);
4045                 break;
4046         case IB_SMP_ATTR_SM_INFO:
4047                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4048                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4049                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4050                         return IB_MAD_RESULT_SUCCESS;
4051                 /* FALLTHROUGH */
4052         default:
4053                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
4054                 ret = reply((struct ib_mad_hdr *)smp);
4055                 break;
4056         }
4057         return ret;
4058 }
4059
4060 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
4061                             u8 *data, struct ib_device *ibdev, u8 port,
4062                             u32 *resp_len, u32 max_len)
4063 {
4064         int ret;
4065         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4066
4067         switch (attr_id) {
4068         case IB_SMP_ATTR_PORT_INFO:
4069                 ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
4070                                               resp_len, max_len);
4071                 break;
4072         case IB_SMP_ATTR_PKEY_TABLE:
4073                 ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
4074                                                resp_len, max_len);
4075                 break;
4076         case OPA_ATTRIB_ID_SL_TO_SC_MAP:
4077                 ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
4078                                               resp_len, max_len);
4079                 break;
4080         case OPA_ATTRIB_ID_SC_TO_SL_MAP:
4081                 ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
4082                                               resp_len, max_len);
4083                 break;
4084         case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
4085                 ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
4086                                                resp_len, max_len);
4087                 break;
4088         case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
4089                 ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
4090                                                 resp_len, max_len);
4091                 break;
4092         case OPA_ATTRIB_ID_PORT_STATE_INFO:
4093                 ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
4094                                          resp_len, max_len);
4095                 break;
4096         case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
4097                 ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
4098                                          resp_len, max_len);
4099                 break;
4100         case IB_SMP_ATTR_VL_ARB_TABLE:
4101                 ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
4102                                             resp_len, max_len);
4103                 break;
4104         case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
4105                 ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
4106                                                   port, resp_len, max_len);
4107                 break;
4108         case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
4109                 ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
4110                                               resp_len, max_len);
4111                 break;
4112         case IB_SMP_ATTR_LED_INFO:
4113                 ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
4114                                               resp_len, max_len);
4115                 break;
4116         case IB_SMP_ATTR_SM_INFO:
4117                 if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
4118                         return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
4119                 if (ibp->rvp.port_cap_flags & IB_PORT_SM)
4120                         return IB_MAD_RESULT_SUCCESS;
4121                 /* FALLTHROUGH */
4122         default:
4123                 smp->status |= IB_SMP_UNSUP_METH_ATTR;
4124                 ret = reply((struct ib_mad_hdr *)smp);
4125                 break;
4126         }
4127         return ret;
4128 }
4129
4130 static inline void set_aggr_error(struct opa_aggregate *ag)
4131 {
4132         ag->err_reqlength |= cpu_to_be16(0x8000);
4133 }
4134
4135 static int subn_get_opa_aggregate(struct opa_smp *smp,
4136                                   struct ib_device *ibdev, u8 port,
4137                                   u32 *resp_len)
4138 {
4139         int i;
4140         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4141         u8 *next_smp = opa_get_smp_data(smp);
4142
4143         if (num_attr < 1 || num_attr > 117) {
4144                 smp->status |= IB_SMP_INVALID_FIELD;
4145                 return reply((struct ib_mad_hdr *)smp);
4146         }
4147
4148         for (i = 0; i < num_attr; i++) {
4149                 struct opa_aggregate *agg;
4150                 size_t agg_data_len;
4151                 size_t agg_size;
4152                 u32 am;
4153
4154                 agg = (struct opa_aggregate *)next_smp;
4155                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4156                 agg_size = sizeof(*agg) + agg_data_len;
4157                 am = be32_to_cpu(agg->attr_mod);
4158
4159                 *resp_len += agg_size;
4160
4161                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4162                         smp->status |= IB_SMP_INVALID_FIELD;
4163                         return reply((struct ib_mad_hdr *)smp);
4164                 }
4165
4166                 /* zero the payload for this segment */
4167                 memset(next_smp + sizeof(*agg), 0, agg_data_len);
4168
4169                 (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
4170                                        ibdev, port, NULL, (u32)agg_data_len);
4171
4172                 if (smp->status & IB_SMP_INVALID_FIELD)
4173                         break;
4174                 if (smp->status & ~IB_SMP_DIRECTION) {
4175                         set_aggr_error(agg);
4176                         return reply((struct ib_mad_hdr *)smp);
4177                 }
4178                 next_smp += agg_size;
4179         }
4180
4181         return reply((struct ib_mad_hdr *)smp);
4182 }
4183
4184 static int subn_set_opa_aggregate(struct opa_smp *smp,
4185                                   struct ib_device *ibdev, u8 port,
4186                                   u32 *resp_len)
4187 {
4188         int i;
4189         u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
4190         u8 *next_smp = opa_get_smp_data(smp);
4191
4192         if (num_attr < 1 || num_attr > 117) {
4193                 smp->status |= IB_SMP_INVALID_FIELD;
4194                 return reply((struct ib_mad_hdr *)smp);
4195         }
4196
4197         for (i = 0; i < num_attr; i++) {
4198                 struct opa_aggregate *agg;
4199                 size_t agg_data_len;
4200                 size_t agg_size;
4201                 u32 am;
4202
4203                 agg = (struct opa_aggregate *)next_smp;
4204                 agg_data_len = (be16_to_cpu(agg->err_reqlength) & 0x007f) * 8;
4205                 agg_size = sizeof(*agg) + agg_data_len;
4206                 am = be32_to_cpu(agg->attr_mod);
4207
4208                 *resp_len += agg_size;
4209
4210                 if (next_smp + agg_size > ((u8 *)smp) + sizeof(*smp)) {
4211                         smp->status |= IB_SMP_INVALID_FIELD;
4212                         return reply((struct ib_mad_hdr *)smp);
4213                 }
4214
4215                 (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
4216                                        ibdev, port, NULL, (u32)agg_data_len);
4217                 if (smp->status & IB_SMP_INVALID_FIELD)
4218                         break;
4219                 if (smp->status & ~IB_SMP_DIRECTION) {
4220                         set_aggr_error(agg);
4221                         return reply((struct ib_mad_hdr *)smp);
4222                 }
4223                 next_smp += agg_size;
4224         }
4225
4226         return reply((struct ib_mad_hdr *)smp);
4227 }
4228
4229 /*
4230  * OPAv1 specifies that, on the transition to link up, these counters
4231  * are cleared:
4232  *   PortRcvErrors [*]
4233  *   LinkErrorRecovery
4234  *   LocalLinkIntegrityErrors
4235  *   ExcessiveBufferOverruns [*]
4236  *
4237  * [*] Error info associated with these counters is retained, but the
4238  * error info status is reset to 0.
4239  */
4240 void clear_linkup_counters(struct hfi1_devdata *dd)
4241 {
4242         /* PortRcvErrors */
4243         write_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL, 0);
4244         dd->err_info_rcvport.status_and_code &= ~OPA_EI_STATUS_SMASK;
4245         /* LinkErrorRecovery */
4246         write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
4247         write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0);
4248         /* LocalLinkIntegrityErrors */
4249         write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0);
4250         /* ExcessiveBufferOverruns */
4251         write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
4252         dd->rcv_ovfl_cnt = 0;
4253         dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
4254 }
4255
4256 /*
4257  * is_local_mad() returns 1 if 'mad' is sent from, and destined to the
4258  * local node, 0 otherwise.
4259  */
4260 static int is_local_mad(struct hfi1_ibport *ibp, const struct opa_mad *mad,
4261                         const struct ib_wc *in_wc)
4262 {
4263         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4264         const struct opa_smp *smp = (const struct opa_smp *)mad;
4265
4266         if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
4267                 return (smp->hop_cnt == 0 &&
4268                         smp->route.dr.dr_slid == OPA_LID_PERMISSIVE &&
4269                         smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE);
4270         }
4271
4272         return (in_wc->slid == ppd->lid);
4273 }
4274
4275 /*
4276  * opa_local_smp_check() should only be called on MADs for which
4277  * is_local_mad() returns true. It applies the SMP checks that are
4278  * specific to SMPs which are sent from, and destined to this node.
4279  * opa_local_smp_check() returns 0 if the SMP passes its checks, 1
4280  * otherwise.
4281  *
4282  * SMPs which arrive from other nodes are instead checked by
4283  * opa_smp_check().
4284  */
4285 static int opa_local_smp_check(struct hfi1_ibport *ibp,
4286                                const struct ib_wc *in_wc)
4287 {
4288         struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
4289         u16 pkey;
4290
4291         if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
4292                 return 1;
4293
4294         pkey = ppd->pkeys[in_wc->pkey_index];
4295         /*
4296          * We need to do the "node-local" checks specified in OPAv1,
4297          * rev 0.90, section 9.10.26, which are:
4298          *   - pkey is 0x7fff, or 0xffff
4299          *   - Source QPN == 0 || Destination QPN == 0
4300          *   - the MAD header's management class is either
4301          *     IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE or
4302          *     IB_MGMT_CLASS_SUBN_LID_ROUTED
4303          *   - SLID != 0
4304          *
4305          * However, we know (and so don't need to check again) that,
4306          * for local SMPs, the MAD stack passes MADs with:
4307          *   - Source QPN of 0
4308          *   - MAD mgmt_class is IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4309          *   - SLID is either: OPA_LID_PERMISSIVE (0xFFFFFFFF), or
4310          *     our own port's lid
4311          *
4312          */
4313         if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4314                 return 0;
4315         /*
4316          * On OPA devices it is okay to lose the upper 16 bits of LID as this
4317          * information is obtained elsewhere. Mask off the upper 16 bits.
4318          */
4319         ingress_pkey_table_fail(ppd, pkey, ib_lid_cpu16(0xFFFF & in_wc->slid));
4320         return 1;
4321 }
4322
4323 static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
4324                             u8 port, const struct opa_mad *in_mad,
4325                             struct opa_mad *out_mad,
4326                             u32 *resp_len)
4327 {
4328         struct opa_smp *smp = (struct opa_smp *)out_mad;
4329         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4330         u8 *data;
4331         u32 am, data_size;
4332         __be16 attr_id;
4333         int ret;
4334
4335         *out_mad = *in_mad;
4336         data = opa_get_smp_data(smp);
4337         data_size = (u32)opa_get_smp_data_size(smp);
4338
4339         am = be32_to_cpu(smp->attr_mod);
4340         attr_id = smp->attr_id;
4341         if (smp->class_version != OPA_SM_CLASS_VERSION) {
4342                 smp->status |= IB_SMP_UNSUP_VERSION;
4343                 ret = reply((struct ib_mad_hdr *)smp);
4344                 return ret;
4345         }
4346         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
4347                          smp->route.dr.dr_slid, smp->route.dr.return_path,
4348                          smp->hop_cnt);
4349         if (ret) {
4350                 u32 port_num = be32_to_cpu(smp->attr_mod);
4351
4352                 /*
4353                  * If this is a get/set portinfo, we already check the
4354                  * M_Key if the MAD is for another port and the M_Key
4355                  * is OK on the receiving port. This check is needed
4356                  * to increment the error counters when the M_Key
4357                  * fails to match on *both* ports.
4358                  */
4359                 if (attr_id == IB_SMP_ATTR_PORT_INFO &&
4360                     (smp->method == IB_MGMT_METHOD_GET ||
4361                      smp->method == IB_MGMT_METHOD_SET) &&
4362                     port_num && port_num <= ibdev->phys_port_cnt &&
4363                     port != port_num)
4364                         (void)check_mkey(to_iport(ibdev, port_num),
4365                                           (struct ib_mad_hdr *)smp, 0,
4366                                           smp->mkey, smp->route.dr.dr_slid,
4367                                           smp->route.dr.return_path,
4368                                           smp->hop_cnt);
4369                 ret = IB_MAD_RESULT_FAILURE;
4370                 return ret;
4371         }
4372
4373         *resp_len = opa_get_smp_header_size(smp);
4374
4375         switch (smp->method) {
4376         case IB_MGMT_METHOD_GET:
4377                 switch (attr_id) {
4378                 default:
4379                         clear_opa_smp_data(smp);
4380                         ret = subn_get_opa_sma(attr_id, smp, am, data,
4381                                                ibdev, port, resp_len,
4382                                                data_size);
4383                         break;
4384                 case OPA_ATTRIB_ID_AGGREGATE:
4385                         ret = subn_get_opa_aggregate(smp, ibdev, port,
4386                                                      resp_len);
4387                         break;
4388                 }
4389                 break;
4390         case IB_MGMT_METHOD_SET:
4391                 switch (attr_id) {
4392                 default:
4393                         ret = subn_set_opa_sma(attr_id, smp, am, data,
4394                                                ibdev, port, resp_len,
4395                                                data_size);
4396                         break;
4397                 case OPA_ATTRIB_ID_AGGREGATE:
4398                         ret = subn_set_opa_aggregate(smp, ibdev, port,
4399                                                      resp_len);
4400                         break;
4401                 }
4402                 break;
4403         case IB_MGMT_METHOD_TRAP:
4404         case IB_MGMT_METHOD_REPORT:
4405         case IB_MGMT_METHOD_REPORT_RESP:
4406         case IB_MGMT_METHOD_GET_RESP:
4407                 /*
4408                  * The ib_mad module will call us to process responses
4409                  * before checking for other consumers.
4410                  * Just tell the caller to process it normally.
4411                  */
4412                 ret = IB_MAD_RESULT_SUCCESS;
4413                 break;
4414         case IB_MGMT_METHOD_TRAP_REPRESS:
4415                 subn_handle_opa_trap_repress(ibp, smp);
4416                 /* Always successful */
4417                 ret = IB_MAD_RESULT_SUCCESS;
4418                 break;
4419         default:
4420                 smp->status |= IB_SMP_UNSUP_METHOD;
4421                 ret = reply((struct ib_mad_hdr *)smp);
4422                 break;
4423         }
4424
4425         return ret;
4426 }
4427
4428 static int process_subn(struct ib_device *ibdev, int mad_flags,
4429                         u8 port, const struct ib_mad *in_mad,
4430                         struct ib_mad *out_mad)
4431 {
4432         struct ib_smp *smp = (struct ib_smp *)out_mad;
4433         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4434         int ret;
4435
4436         *out_mad = *in_mad;
4437         if (smp->class_version != 1) {
4438                 smp->status |= IB_SMP_UNSUP_VERSION;
4439                 ret = reply((struct ib_mad_hdr *)smp);
4440                 return ret;
4441         }
4442
4443         ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
4444                          smp->mkey, (__force __be32)smp->dr_slid,
4445                          smp->return_path, smp->hop_cnt);
4446         if (ret) {
4447                 u32 port_num = be32_to_cpu(smp->attr_mod);
4448
4449                 /*
4450                  * If this is a get/set portinfo, we already check the
4451                  * M_Key if the MAD is for another port and the M_Key
4452                  * is OK on the receiving port. This check is needed
4453                  * to increment the error counters when the M_Key
4454                  * fails to match on *both* ports.
4455                  */
4456                 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
4457                     (smp->method == IB_MGMT_METHOD_GET ||
4458                      smp->method == IB_MGMT_METHOD_SET) &&
4459                     port_num && port_num <= ibdev->phys_port_cnt &&
4460                     port != port_num)
4461                         (void)check_mkey(to_iport(ibdev, port_num),
4462                                          (struct ib_mad_hdr *)smp, 0,
4463                                          smp->mkey,
4464                                          (__force __be32)smp->dr_slid,
4465                                          smp->return_path, smp->hop_cnt);
4466                 ret = IB_MAD_RESULT_FAILURE;
4467                 return ret;
4468         }
4469
4470         switch (smp->method) {
4471         case IB_MGMT_METHOD_GET:
4472                 switch (smp->attr_id) {
4473                 case IB_SMP_ATTR_NODE_INFO:
4474                         ret = subn_get_nodeinfo(smp, ibdev, port);
4475                         break;
4476                 default:
4477                         smp->status |= IB_SMP_UNSUP_METH_ATTR;
4478                         ret = reply((struct ib_mad_hdr *)smp);
4479                         break;
4480                 }
4481                 break;
4482         }
4483
4484         return ret;
4485 }
4486
4487 static int process_perf(struct ib_device *ibdev, u8 port,
4488                         const struct ib_mad *in_mad,
4489                         struct ib_mad *out_mad)
4490 {
4491         struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
4492         struct ib_class_port_info *cpi = (struct ib_class_port_info *)
4493                                                 &pmp->data;
4494         int ret = IB_MAD_RESULT_FAILURE;
4495
4496         *out_mad = *in_mad;
4497         if (pmp->mad_hdr.class_version != 1) {
4498                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4499                 ret = reply((struct ib_mad_hdr *)pmp);
4500                 return ret;
4501         }
4502
4503         switch (pmp->mad_hdr.method) {
4504         case IB_MGMT_METHOD_GET:
4505                 switch (pmp->mad_hdr.attr_id) {
4506                 case IB_PMA_PORT_COUNTERS:
4507                         ret = pma_get_ib_portcounters(pmp, ibdev, port);
4508                         break;
4509                 case IB_PMA_PORT_COUNTERS_EXT:
4510                         ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
4511                         break;
4512                 case IB_PMA_CLASS_PORT_INFO:
4513                         cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
4514                         ret = reply((struct ib_mad_hdr *)pmp);
4515                         break;
4516                 default:
4517                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4518                         ret = reply((struct ib_mad_hdr *)pmp);
4519                         break;
4520                 }
4521                 break;
4522
4523         case IB_MGMT_METHOD_SET:
4524                 if (pmp->mad_hdr.attr_id) {
4525                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4526                         ret = reply((struct ib_mad_hdr *)pmp);
4527                 }
4528                 break;
4529
4530         case IB_MGMT_METHOD_TRAP:
4531         case IB_MGMT_METHOD_GET_RESP:
4532                 /*
4533                  * The ib_mad module will call us to process responses
4534                  * before checking for other consumers.
4535                  * Just tell the caller to process it normally.
4536                  */
4537                 ret = IB_MAD_RESULT_SUCCESS;
4538                 break;
4539
4540         default:
4541                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4542                 ret = reply((struct ib_mad_hdr *)pmp);
4543                 break;
4544         }
4545
4546         return ret;
4547 }
4548
4549 static int process_perf_opa(struct ib_device *ibdev, u8 port,
4550                             const struct opa_mad *in_mad,
4551                             struct opa_mad *out_mad, u32 *resp_len)
4552 {
4553         struct opa_pma_mad *pmp = (struct opa_pma_mad *)out_mad;
4554         int ret;
4555
4556         *out_mad = *in_mad;
4557
4558         if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
4559                 pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
4560                 return reply((struct ib_mad_hdr *)pmp);
4561         }
4562
4563         *resp_len = sizeof(pmp->mad_hdr);
4564
4565         switch (pmp->mad_hdr.method) {
4566         case IB_MGMT_METHOD_GET:
4567                 switch (pmp->mad_hdr.attr_id) {
4568                 case IB_PMA_CLASS_PORT_INFO:
4569                         ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
4570                         break;
4571                 case OPA_PM_ATTRIB_ID_PORT_STATUS:
4572                         ret = pma_get_opa_portstatus(pmp, ibdev, port,
4573                                                      resp_len);
4574                         break;
4575                 case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
4576                         ret = pma_get_opa_datacounters(pmp, ibdev, port,
4577                                                        resp_len);
4578                         break;
4579                 case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
4580                         ret = pma_get_opa_porterrors(pmp, ibdev, port,
4581                                                      resp_len);
4582                         break;
4583                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4584                         ret = pma_get_opa_errorinfo(pmp, ibdev, port,
4585                                                     resp_len);
4586                         break;
4587                 default:
4588                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4589                         ret = reply((struct ib_mad_hdr *)pmp);
4590                         break;
4591                 }
4592                 break;
4593
4594         case IB_MGMT_METHOD_SET:
4595                 switch (pmp->mad_hdr.attr_id) {
4596                 case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
4597                         ret = pma_set_opa_portstatus(pmp, ibdev, port,
4598                                                      resp_len);
4599                         break;
4600                 case OPA_PM_ATTRIB_ID_ERROR_INFO:
4601                         ret = pma_set_opa_errorinfo(pmp, ibdev, port,
4602                                                     resp_len);
4603                         break;
4604                 default:
4605                         pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
4606                         ret = reply((struct ib_mad_hdr *)pmp);
4607                         break;
4608                 }
4609                 break;
4610
4611         case IB_MGMT_METHOD_TRAP:
4612         case IB_MGMT_METHOD_GET_RESP:
4613                 /*
4614                  * The ib_mad module will call us to process responses
4615                  * before checking for other consumers.
4616                  * Just tell the caller to process it normally.
4617                  */
4618                 ret = IB_MAD_RESULT_SUCCESS;
4619                 break;
4620
4621         default:
4622                 pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
4623                 ret = reply((struct ib_mad_hdr *)pmp);
4624                 break;
4625         }
4626
4627         return ret;
4628 }
4629
4630 static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4631                                 u8 port, const struct ib_wc *in_wc,
4632                                 const struct ib_grh *in_grh,
4633                                 const struct opa_mad *in_mad,
4634                                 struct opa_mad *out_mad, size_t *out_mad_size,
4635                                 u16 *out_mad_pkey_index)
4636 {
4637         int ret;
4638         int pkey_idx;
4639         u32 resp_len = 0;
4640         struct hfi1_ibport *ibp = to_iport(ibdev, port);
4641
4642         pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
4643         if (pkey_idx < 0) {
4644                 pr_warn("failed to find limited mgmt pkey, defaulting 0x%x\n",
4645                         hfi1_get_pkey(ibp, 1));
4646                 pkey_idx = 1;
4647         }
4648         *out_mad_pkey_index = (u16)pkey_idx;
4649
4650         switch (in_mad->mad_hdr.mgmt_class) {
4651         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4652         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4653                 if (is_local_mad(ibp, in_mad, in_wc)) {
4654                         ret = opa_local_smp_check(ibp, in_wc);
4655                         if (ret)
4656                                 return IB_MAD_RESULT_FAILURE;
4657                 }
4658                 ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
4659                                        out_mad, &resp_len);
4660                 goto bail;
4661         case IB_MGMT_CLASS_PERF_MGMT:
4662                 ret = process_perf_opa(ibdev, port, in_mad, out_mad,
4663                                        &resp_len);
4664                 goto bail;
4665
4666         default:
4667                 ret = IB_MAD_RESULT_SUCCESS;
4668         }
4669
4670 bail:
4671         if (ret & IB_MAD_RESULT_REPLY)
4672                 *out_mad_size = round_up(resp_len, 8);
4673         else if (ret & IB_MAD_RESULT_SUCCESS)
4674                 *out_mad_size = in_wc->byte_len - sizeof(struct ib_grh);
4675
4676         return ret;
4677 }
4678
4679 static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4680                                const struct ib_wc *in_wc,
4681                                const struct ib_grh *in_grh,
4682                                const struct ib_mad *in_mad,
4683                                struct ib_mad *out_mad)
4684 {
4685         int ret;
4686
4687         switch (in_mad->mad_hdr.mgmt_class) {
4688         case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
4689         case IB_MGMT_CLASS_SUBN_LID_ROUTED:
4690                 ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
4691                 break;
4692         case IB_MGMT_CLASS_PERF_MGMT:
4693                 ret = process_perf(ibdev, port, in_mad, out_mad);
4694                 break;
4695         default:
4696                 ret = IB_MAD_RESULT_SUCCESS;
4697                 break;
4698         }
4699
4700         return ret;
4701 }
4702
4703 /**
4704  * hfi1_process_mad - process an incoming MAD packet
4705  * @ibdev: the infiniband device this packet came in on
4706  * @mad_flags: MAD flags
4707  * @port: the port number this packet came in on
4708  * @in_wc: the work completion entry for this packet
4709  * @in_grh: the global route header for this packet
4710  * @in_mad: the incoming MAD
4711  * @out_mad: any outgoing MAD reply
4712  *
4713  * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
4714  * interested in processing.
4715  *
4716  * Note that the verbs framework has already done the MAD sanity checks,
4717  * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
4718  * MADs.
4719  *
4720  * This is called by the ib_mad module.
4721  */
4722 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
4723                      const struct ib_wc *in_wc, const struct ib_grh *in_grh,
4724                      const struct ib_mad_hdr *in_mad, size_t in_mad_size,
4725                      struct ib_mad_hdr *out_mad, size_t *out_mad_size,
4726                      u16 *out_mad_pkey_index)
4727 {
4728         switch (in_mad->base_version) {
4729         case OPA_MGMT_BASE_VERSION:
4730                 if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
4731                         dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
4732                         return IB_MAD_RESULT_FAILURE;
4733                 }
4734                 return hfi1_process_opa_mad(ibdev, mad_flags, port,
4735                                             in_wc, in_grh,
4736                                             (struct opa_mad *)in_mad,
4737                                             (struct opa_mad *)out_mad,
4738                                             out_mad_size,
4739                                             out_mad_pkey_index);
4740         case IB_MGMT_BASE_VERSION:
4741                 return hfi1_process_ib_mad(ibdev, mad_flags, port,
4742                                           in_wc, in_grh,
4743                                           (const struct ib_mad *)in_mad,
4744                                           (struct ib_mad *)out_mad);
4745         default:
4746                 break;
4747         }
4748
4749         return IB_MAD_RESULT_FAILURE;
4750 }