GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / infiniband / core / sa_query.c
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34
35 #include <linux/init.h>
36 #include <linux/err.h>
37 #include <linux/random.h>
38 #include <linux/spinlock.h>
39 #include <linux/slab.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/kref.h>
42 #include <linux/xarray.h>
43 #include <linux/workqueue.h>
44 #include <uapi/linux/if_ether.h>
45 #include <rdma/ib_pack.h>
46 #include <rdma/ib_cache.h>
47 #include <rdma/rdma_netlink.h>
48 #include <net/netlink.h>
49 #include <uapi/rdma/ib_user_sa.h>
50 #include <rdma/ib_marshall.h>
51 #include <rdma/ib_addr.h>
52 #include <rdma/opa_addr.h>
53 #include "sa.h"
54 #include "core_priv.h"
55
56 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN             100
57 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT         2000
58 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX             200000
59 #define IB_SA_CPI_MAX_RETRY_CNT                 3
60 #define IB_SA_CPI_RETRY_WAIT                    1000 /*msecs */
61 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
62
63 struct ib_sa_sm_ah {
64         struct ib_ah        *ah;
65         struct kref          ref;
66         u16                  pkey_index;
67         u8                   src_path_mask;
68 };
69
70 enum rdma_class_port_info_type {
71         RDMA_CLASS_PORT_INFO_IB,
72         RDMA_CLASS_PORT_INFO_OPA
73 };
74
75 struct rdma_class_port_info {
76         enum rdma_class_port_info_type type;
77         union {
78                 struct ib_class_port_info ib;
79                 struct opa_class_port_info opa;
80         };
81 };
82
83 struct ib_sa_classport_cache {
84         bool valid;
85         int retry_cnt;
86         struct rdma_class_port_info data;
87 };
88
89 struct ib_sa_port {
90         struct ib_mad_agent *agent;
91         struct ib_sa_sm_ah  *sm_ah;
92         struct work_struct   update_task;
93         struct ib_sa_classport_cache classport_info;
94         struct delayed_work ib_cpi_work;
95         spinlock_t                   classport_lock; /* protects class port info set */
96         spinlock_t           ah_lock;
97         u32                  port_num;
98 };
99
100 struct ib_sa_device {
101         int                     start_port, end_port;
102         struct ib_event_handler event_handler;
103         struct ib_sa_port port[];
104 };
105
106 struct ib_sa_query {
107         void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
108         void (*release)(struct ib_sa_query *);
109         struct ib_sa_client    *client;
110         struct ib_sa_port      *port;
111         struct ib_mad_send_buf *mad_buf;
112         struct ib_sa_sm_ah     *sm_ah;
113         int                     id;
114         u32                     flags;
115         struct list_head        list; /* Local svc request list */
116         u32                     seq; /* Local svc request sequence number */
117         unsigned long           timeout; /* Local svc timeout */
118         u8                      path_use; /* How will the pathrecord be used */
119 };
120
121 #define IB_SA_ENABLE_LOCAL_SERVICE      0x00000001
122 #define IB_SA_CANCEL                    0x00000002
123 #define IB_SA_QUERY_OPA                 0x00000004
124
125 struct ib_sa_path_query {
126         void (*callback)(int, struct sa_path_rec *, void *);
127         void *context;
128         struct ib_sa_query sa_query;
129         struct sa_path_rec *conv_pr;
130 };
131
132 struct ib_sa_guidinfo_query {
133         void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
134         void *context;
135         struct ib_sa_query sa_query;
136 };
137
138 struct ib_sa_classport_info_query {
139         void (*callback)(void *);
140         void *context;
141         struct ib_sa_query sa_query;
142 };
143
144 struct ib_sa_mcmember_query {
145         void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
146         void *context;
147         struct ib_sa_query sa_query;
148 };
149
150 static LIST_HEAD(ib_nl_request_list);
151 static DEFINE_SPINLOCK(ib_nl_request_lock);
152 static atomic_t ib_nl_sa_request_seq;
153 static struct workqueue_struct *ib_nl_wq;
154 static struct delayed_work ib_nl_timed_work;
155 static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
156         [LS_NLA_TYPE_PATH_RECORD]       = {.type = NLA_BINARY,
157                 .len = sizeof(struct ib_path_rec_data)},
158         [LS_NLA_TYPE_TIMEOUT]           = {.type = NLA_U32},
159         [LS_NLA_TYPE_SERVICE_ID]        = {.type = NLA_U64},
160         [LS_NLA_TYPE_DGID]              = {.type = NLA_BINARY,
161                 .len = sizeof(struct rdma_nla_ls_gid)},
162         [LS_NLA_TYPE_SGID]              = {.type = NLA_BINARY,
163                 .len = sizeof(struct rdma_nla_ls_gid)},
164         [LS_NLA_TYPE_TCLASS]            = {.type = NLA_U8},
165         [LS_NLA_TYPE_PKEY]              = {.type = NLA_U16},
166         [LS_NLA_TYPE_QOS_CLASS]         = {.type = NLA_U16},
167 };
168
169
170 static int ib_sa_add_one(struct ib_device *device);
171 static void ib_sa_remove_one(struct ib_device *device, void *client_data);
172
173 static struct ib_client sa_client = {
174         .name   = "sa",
175         .add    = ib_sa_add_one,
176         .remove = ib_sa_remove_one
177 };
178
179 static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
180
181 static DEFINE_SPINLOCK(tid_lock);
182 static u32 tid;
183
184 #define PATH_REC_FIELD(field) \
185         .struct_offset_bytes = offsetof(struct sa_path_rec, field),     \
186         .struct_size_bytes   = sizeof_field(struct sa_path_rec, field), \
187         .field_name          = "sa_path_rec:" #field
188
189 static const struct ib_field path_rec_table[] = {
190         { PATH_REC_FIELD(service_id),
191           .offset_words = 0,
192           .offset_bits  = 0,
193           .size_bits    = 64 },
194         { PATH_REC_FIELD(dgid),
195           .offset_words = 2,
196           .offset_bits  = 0,
197           .size_bits    = 128 },
198         { PATH_REC_FIELD(sgid),
199           .offset_words = 6,
200           .offset_bits  = 0,
201           .size_bits    = 128 },
202         { PATH_REC_FIELD(ib.dlid),
203           .offset_words = 10,
204           .offset_bits  = 0,
205           .size_bits    = 16 },
206         { PATH_REC_FIELD(ib.slid),
207           .offset_words = 10,
208           .offset_bits  = 16,
209           .size_bits    = 16 },
210         { PATH_REC_FIELD(ib.raw_traffic),
211           .offset_words = 11,
212           .offset_bits  = 0,
213           .size_bits    = 1 },
214         { RESERVED,
215           .offset_words = 11,
216           .offset_bits  = 1,
217           .size_bits    = 3 },
218         { PATH_REC_FIELD(flow_label),
219           .offset_words = 11,
220           .offset_bits  = 4,
221           .size_bits    = 20 },
222         { PATH_REC_FIELD(hop_limit),
223           .offset_words = 11,
224           .offset_bits  = 24,
225           .size_bits    = 8 },
226         { PATH_REC_FIELD(traffic_class),
227           .offset_words = 12,
228           .offset_bits  = 0,
229           .size_bits    = 8 },
230         { PATH_REC_FIELD(reversible),
231           .offset_words = 12,
232           .offset_bits  = 8,
233           .size_bits    = 1 },
234         { PATH_REC_FIELD(numb_path),
235           .offset_words = 12,
236           .offset_bits  = 9,
237           .size_bits    = 7 },
238         { PATH_REC_FIELD(pkey),
239           .offset_words = 12,
240           .offset_bits  = 16,
241           .size_bits    = 16 },
242         { PATH_REC_FIELD(qos_class),
243           .offset_words = 13,
244           .offset_bits  = 0,
245           .size_bits    = 12 },
246         { PATH_REC_FIELD(sl),
247           .offset_words = 13,
248           .offset_bits  = 12,
249           .size_bits    = 4 },
250         { PATH_REC_FIELD(mtu_selector),
251           .offset_words = 13,
252           .offset_bits  = 16,
253           .size_bits    = 2 },
254         { PATH_REC_FIELD(mtu),
255           .offset_words = 13,
256           .offset_bits  = 18,
257           .size_bits    = 6 },
258         { PATH_REC_FIELD(rate_selector),
259           .offset_words = 13,
260           .offset_bits  = 24,
261           .size_bits    = 2 },
262         { PATH_REC_FIELD(rate),
263           .offset_words = 13,
264           .offset_bits  = 26,
265           .size_bits    = 6 },
266         { PATH_REC_FIELD(packet_life_time_selector),
267           .offset_words = 14,
268           .offset_bits  = 0,
269           .size_bits    = 2 },
270         { PATH_REC_FIELD(packet_life_time),
271           .offset_words = 14,
272           .offset_bits  = 2,
273           .size_bits    = 6 },
274         { PATH_REC_FIELD(preference),
275           .offset_words = 14,
276           .offset_bits  = 8,
277           .size_bits    = 8 },
278         { RESERVED,
279           .offset_words = 14,
280           .offset_bits  = 16,
281           .size_bits    = 48 },
282 };
283
284 #define OPA_PATH_REC_FIELD(field) \
285         .struct_offset_bytes = \
286                 offsetof(struct sa_path_rec, field), \
287         .struct_size_bytes   = \
288                 sizeof_field(struct sa_path_rec, field),        \
289         .field_name          = "sa_path_rec:" #field
290
291 static const struct ib_field opa_path_rec_table[] = {
292         { OPA_PATH_REC_FIELD(service_id),
293           .offset_words = 0,
294           .offset_bits  = 0,
295           .size_bits    = 64 },
296         { OPA_PATH_REC_FIELD(dgid),
297           .offset_words = 2,
298           .offset_bits  = 0,
299           .size_bits    = 128 },
300         { OPA_PATH_REC_FIELD(sgid),
301           .offset_words = 6,
302           .offset_bits  = 0,
303           .size_bits    = 128 },
304         { OPA_PATH_REC_FIELD(opa.dlid),
305           .offset_words = 10,
306           .offset_bits  = 0,
307           .size_bits    = 32 },
308         { OPA_PATH_REC_FIELD(opa.slid),
309           .offset_words = 11,
310           .offset_bits  = 0,
311           .size_bits    = 32 },
312         { OPA_PATH_REC_FIELD(opa.raw_traffic),
313           .offset_words = 12,
314           .offset_bits  = 0,
315           .size_bits    = 1 },
316         { RESERVED,
317           .offset_words = 12,
318           .offset_bits  = 1,
319           .size_bits    = 3 },
320         { OPA_PATH_REC_FIELD(flow_label),
321           .offset_words = 12,
322           .offset_bits  = 4,
323           .size_bits    = 20 },
324         { OPA_PATH_REC_FIELD(hop_limit),
325           .offset_words = 12,
326           .offset_bits  = 24,
327           .size_bits    = 8 },
328         { OPA_PATH_REC_FIELD(traffic_class),
329           .offset_words = 13,
330           .offset_bits  = 0,
331           .size_bits    = 8 },
332         { OPA_PATH_REC_FIELD(reversible),
333           .offset_words = 13,
334           .offset_bits  = 8,
335           .size_bits    = 1 },
336         { OPA_PATH_REC_FIELD(numb_path),
337           .offset_words = 13,
338           .offset_bits  = 9,
339           .size_bits    = 7 },
340         { OPA_PATH_REC_FIELD(pkey),
341           .offset_words = 13,
342           .offset_bits  = 16,
343           .size_bits    = 16 },
344         { OPA_PATH_REC_FIELD(opa.l2_8B),
345           .offset_words = 14,
346           .offset_bits  = 0,
347           .size_bits    = 1 },
348         { OPA_PATH_REC_FIELD(opa.l2_10B),
349           .offset_words = 14,
350           .offset_bits  = 1,
351           .size_bits    = 1 },
352         { OPA_PATH_REC_FIELD(opa.l2_9B),
353           .offset_words = 14,
354           .offset_bits  = 2,
355           .size_bits    = 1 },
356         { OPA_PATH_REC_FIELD(opa.l2_16B),
357           .offset_words = 14,
358           .offset_bits  = 3,
359           .size_bits    = 1 },
360         { RESERVED,
361           .offset_words = 14,
362           .offset_bits  = 4,
363           .size_bits    = 2 },
364         { OPA_PATH_REC_FIELD(opa.qos_type),
365           .offset_words = 14,
366           .offset_bits  = 6,
367           .size_bits    = 2 },
368         { OPA_PATH_REC_FIELD(opa.qos_priority),
369           .offset_words = 14,
370           .offset_bits  = 8,
371           .size_bits    = 8 },
372         { RESERVED,
373           .offset_words = 14,
374           .offset_bits  = 16,
375           .size_bits    = 3 },
376         { OPA_PATH_REC_FIELD(sl),
377           .offset_words = 14,
378           .offset_bits  = 19,
379           .size_bits    = 5 },
380         { RESERVED,
381           .offset_words = 14,
382           .offset_bits  = 24,
383           .size_bits    = 8 },
384         { OPA_PATH_REC_FIELD(mtu_selector),
385           .offset_words = 15,
386           .offset_bits  = 0,
387           .size_bits    = 2 },
388         { OPA_PATH_REC_FIELD(mtu),
389           .offset_words = 15,
390           .offset_bits  = 2,
391           .size_bits    = 6 },
392         { OPA_PATH_REC_FIELD(rate_selector),
393           .offset_words = 15,
394           .offset_bits  = 8,
395           .size_bits    = 2 },
396         { OPA_PATH_REC_FIELD(rate),
397           .offset_words = 15,
398           .offset_bits  = 10,
399           .size_bits    = 6 },
400         { OPA_PATH_REC_FIELD(packet_life_time_selector),
401           .offset_words = 15,
402           .offset_bits  = 16,
403           .size_bits    = 2 },
404         { OPA_PATH_REC_FIELD(packet_life_time),
405           .offset_words = 15,
406           .offset_bits  = 18,
407           .size_bits    = 6 },
408         { OPA_PATH_REC_FIELD(preference),
409           .offset_words = 15,
410           .offset_bits  = 24,
411           .size_bits    = 8 },
412 };
413
414 #define MCMEMBER_REC_FIELD(field) \
415         .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),      \
416         .struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),  \
417         .field_name          = "sa_mcmember_rec:" #field
418
419 static const struct ib_field mcmember_rec_table[] = {
420         { MCMEMBER_REC_FIELD(mgid),
421           .offset_words = 0,
422           .offset_bits  = 0,
423           .size_bits    = 128 },
424         { MCMEMBER_REC_FIELD(port_gid),
425           .offset_words = 4,
426           .offset_bits  = 0,
427           .size_bits    = 128 },
428         { MCMEMBER_REC_FIELD(qkey),
429           .offset_words = 8,
430           .offset_bits  = 0,
431           .size_bits    = 32 },
432         { MCMEMBER_REC_FIELD(mlid),
433           .offset_words = 9,
434           .offset_bits  = 0,
435           .size_bits    = 16 },
436         { MCMEMBER_REC_FIELD(mtu_selector),
437           .offset_words = 9,
438           .offset_bits  = 16,
439           .size_bits    = 2 },
440         { MCMEMBER_REC_FIELD(mtu),
441           .offset_words = 9,
442           .offset_bits  = 18,
443           .size_bits    = 6 },
444         { MCMEMBER_REC_FIELD(traffic_class),
445           .offset_words = 9,
446           .offset_bits  = 24,
447           .size_bits    = 8 },
448         { MCMEMBER_REC_FIELD(pkey),
449           .offset_words = 10,
450           .offset_bits  = 0,
451           .size_bits    = 16 },
452         { MCMEMBER_REC_FIELD(rate_selector),
453           .offset_words = 10,
454           .offset_bits  = 16,
455           .size_bits    = 2 },
456         { MCMEMBER_REC_FIELD(rate),
457           .offset_words = 10,
458           .offset_bits  = 18,
459           .size_bits    = 6 },
460         { MCMEMBER_REC_FIELD(packet_life_time_selector),
461           .offset_words = 10,
462           .offset_bits  = 24,
463           .size_bits    = 2 },
464         { MCMEMBER_REC_FIELD(packet_life_time),
465           .offset_words = 10,
466           .offset_bits  = 26,
467           .size_bits    = 6 },
468         { MCMEMBER_REC_FIELD(sl),
469           .offset_words = 11,
470           .offset_bits  = 0,
471           .size_bits    = 4 },
472         { MCMEMBER_REC_FIELD(flow_label),
473           .offset_words = 11,
474           .offset_bits  = 4,
475           .size_bits    = 20 },
476         { MCMEMBER_REC_FIELD(hop_limit),
477           .offset_words = 11,
478           .offset_bits  = 24,
479           .size_bits    = 8 },
480         { MCMEMBER_REC_FIELD(scope),
481           .offset_words = 12,
482           .offset_bits  = 0,
483           .size_bits    = 4 },
484         { MCMEMBER_REC_FIELD(join_state),
485           .offset_words = 12,
486           .offset_bits  = 4,
487           .size_bits    = 4 },
488         { MCMEMBER_REC_FIELD(proxy_join),
489           .offset_words = 12,
490           .offset_bits  = 8,
491           .size_bits    = 1 },
492         { RESERVED,
493           .offset_words = 12,
494           .offset_bits  = 9,
495           .size_bits    = 23 },
496 };
497
498 #define CLASSPORTINFO_REC_FIELD(field) \
499         .struct_offset_bytes = offsetof(struct ib_class_port_info, field),      \
500         .struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),  \
501         .field_name          = "ib_class_port_info:" #field
502
503 static const struct ib_field ib_classport_info_rec_table[] = {
504         { CLASSPORTINFO_REC_FIELD(base_version),
505           .offset_words = 0,
506           .offset_bits  = 0,
507           .size_bits    = 8 },
508         { CLASSPORTINFO_REC_FIELD(class_version),
509           .offset_words = 0,
510           .offset_bits  = 8,
511           .size_bits    = 8 },
512         { CLASSPORTINFO_REC_FIELD(capability_mask),
513           .offset_words = 0,
514           .offset_bits  = 16,
515           .size_bits    = 16 },
516         { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
517           .offset_words = 1,
518           .offset_bits  = 0,
519           .size_bits    = 32 },
520         { CLASSPORTINFO_REC_FIELD(redirect_gid),
521           .offset_words = 2,
522           .offset_bits  = 0,
523           .size_bits    = 128 },
524         { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
525           .offset_words = 6,
526           .offset_bits  = 0,
527           .size_bits    = 32 },
528         { CLASSPORTINFO_REC_FIELD(redirect_lid),
529           .offset_words = 7,
530           .offset_bits  = 0,
531           .size_bits    = 16 },
532         { CLASSPORTINFO_REC_FIELD(redirect_pkey),
533           .offset_words = 7,
534           .offset_bits  = 16,
535           .size_bits    = 16 },
536
537         { CLASSPORTINFO_REC_FIELD(redirect_qp),
538           .offset_words = 8,
539           .offset_bits  = 0,
540           .size_bits    = 32 },
541         { CLASSPORTINFO_REC_FIELD(redirect_qkey),
542           .offset_words = 9,
543           .offset_bits  = 0,
544           .size_bits    = 32 },
545
546         { CLASSPORTINFO_REC_FIELD(trap_gid),
547           .offset_words = 10,
548           .offset_bits  = 0,
549           .size_bits    = 128 },
550         { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
551           .offset_words = 14,
552           .offset_bits  = 0,
553           .size_bits    = 32 },
554
555         { CLASSPORTINFO_REC_FIELD(trap_lid),
556           .offset_words = 15,
557           .offset_bits  = 0,
558           .size_bits    = 16 },
559         { CLASSPORTINFO_REC_FIELD(trap_pkey),
560           .offset_words = 15,
561           .offset_bits  = 16,
562           .size_bits    = 16 },
563
564         { CLASSPORTINFO_REC_FIELD(trap_hlqp),
565           .offset_words = 16,
566           .offset_bits  = 0,
567           .size_bits    = 32 },
568         { CLASSPORTINFO_REC_FIELD(trap_qkey),
569           .offset_words = 17,
570           .offset_bits  = 0,
571           .size_bits    = 32 },
572 };
573
574 #define OPA_CLASSPORTINFO_REC_FIELD(field) \
575         .struct_offset_bytes =\
576                 offsetof(struct opa_class_port_info, field),    \
577         .struct_size_bytes   = \
578                 sizeof_field(struct opa_class_port_info, field),        \
579         .field_name          = "opa_class_port_info:" #field
580
581 static const struct ib_field opa_classport_info_rec_table[] = {
582         { OPA_CLASSPORTINFO_REC_FIELD(base_version),
583           .offset_words = 0,
584           .offset_bits  = 0,
585           .size_bits    = 8 },
586         { OPA_CLASSPORTINFO_REC_FIELD(class_version),
587           .offset_words = 0,
588           .offset_bits  = 8,
589           .size_bits    = 8 },
590         { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
591           .offset_words = 0,
592           .offset_bits  = 16,
593           .size_bits    = 16 },
594         { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
595           .offset_words = 1,
596           .offset_bits  = 0,
597           .size_bits    = 32 },
598         { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
599           .offset_words = 2,
600           .offset_bits  = 0,
601           .size_bits    = 128 },
602         { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
603           .offset_words = 6,
604           .offset_bits  = 0,
605           .size_bits    = 32 },
606         { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
607           .offset_words = 7,
608           .offset_bits  = 0,
609           .size_bits    = 32 },
610         { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
611           .offset_words = 8,
612           .offset_bits  = 0,
613           .size_bits    = 32 },
614         { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
615           .offset_words = 9,
616           .offset_bits  = 0,
617           .size_bits    = 32 },
618         { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
619           .offset_words = 10,
620           .offset_bits  = 0,
621           .size_bits    = 128 },
622         { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
623           .offset_words = 14,
624           .offset_bits  = 0,
625           .size_bits    = 32 },
626         { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
627           .offset_words = 15,
628           .offset_bits  = 0,
629           .size_bits    = 32 },
630         { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
631           .offset_words = 16,
632           .offset_bits  = 0,
633           .size_bits    = 32 },
634         { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
635           .offset_words = 17,
636           .offset_bits  = 0,
637           .size_bits    = 32 },
638         { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
639           .offset_words = 18,
640           .offset_bits  = 0,
641           .size_bits    = 16 },
642         { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
643           .offset_words = 18,
644           .offset_bits  = 16,
645           .size_bits    = 16 },
646         { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
647           .offset_words = 19,
648           .offset_bits  = 0,
649           .size_bits    = 8 },
650         { RESERVED,
651           .offset_words = 19,
652           .offset_bits  = 8,
653           .size_bits    = 24 },
654 };
655
656 #define GUIDINFO_REC_FIELD(field) \
657         .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),      \
658         .struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),  \
659         .field_name          = "sa_guidinfo_rec:" #field
660
661 static const struct ib_field guidinfo_rec_table[] = {
662         { GUIDINFO_REC_FIELD(lid),
663           .offset_words = 0,
664           .offset_bits  = 0,
665           .size_bits    = 16 },
666         { GUIDINFO_REC_FIELD(block_num),
667           .offset_words = 0,
668           .offset_bits  = 16,
669           .size_bits    = 8 },
670         { GUIDINFO_REC_FIELD(res1),
671           .offset_words = 0,
672           .offset_bits  = 24,
673           .size_bits    = 8 },
674         { GUIDINFO_REC_FIELD(res2),
675           .offset_words = 1,
676           .offset_bits  = 0,
677           .size_bits    = 32 },
678         { GUIDINFO_REC_FIELD(guid_info_list),
679           .offset_words = 2,
680           .offset_bits  = 0,
681           .size_bits    = 512 },
682 };
683
684 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
685 {
686         query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
687 }
688
689 static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
690 {
691         return (query->flags & IB_SA_CANCEL);
692 }
693
694 static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
695                                      struct ib_sa_query *query)
696 {
697         struct sa_path_rec *sa_rec = query->mad_buf->context[1];
698         struct ib_sa_mad *mad = query->mad_buf->mad;
699         ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
700         u16 val16;
701         u64 val64;
702         struct rdma_ls_resolve_header *header;
703
704         query->mad_buf->context[1] = NULL;
705
706         /* Construct the family header first */
707         header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
708         strscpy_pad(header->device_name,
709                     dev_name(&query->port->agent->device->dev),
710                     LS_DEVICE_NAME_MAX);
711         header->port_num = query->port->port_num;
712
713         if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
714             sa_rec->reversible != 0)
715                 query->path_use = LS_RESOLVE_PATH_USE_GMP;
716         else
717                 query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
718         header->path_use = query->path_use;
719
720         /* Now build the attributes */
721         if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
722                 val64 = be64_to_cpu(sa_rec->service_id);
723                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
724                         sizeof(val64), &val64);
725         }
726         if (comp_mask & IB_SA_PATH_REC_DGID)
727                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
728                         sizeof(sa_rec->dgid), &sa_rec->dgid);
729         if (comp_mask & IB_SA_PATH_REC_SGID)
730                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
731                         sizeof(sa_rec->sgid), &sa_rec->sgid);
732         if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
733                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
734                         sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
735
736         if (comp_mask & IB_SA_PATH_REC_PKEY) {
737                 val16 = be16_to_cpu(sa_rec->pkey);
738                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
739                         sizeof(val16), &val16);
740         }
741         if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
742                 val16 = be16_to_cpu(sa_rec->qos_class);
743                 nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
744                         sizeof(val16), &val16);
745         }
746 }
747
748 static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
749 {
750         int len = 0;
751
752         if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
753                 len += nla_total_size(sizeof(u64));
754         if (comp_mask & IB_SA_PATH_REC_DGID)
755                 len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
756         if (comp_mask & IB_SA_PATH_REC_SGID)
757                 len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
758         if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
759                 len += nla_total_size(sizeof(u8));
760         if (comp_mask & IB_SA_PATH_REC_PKEY)
761                 len += nla_total_size(sizeof(u16));
762         if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
763                 len += nla_total_size(sizeof(u16));
764
765         /*
766          * Make sure that at least some of the required comp_mask bits are
767          * set.
768          */
769         if (WARN_ON(len == 0))
770                 return len;
771
772         /* Add the family header */
773         len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
774
775         return len;
776 }
777
778 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
779 {
780         struct sk_buff *skb = NULL;
781         struct nlmsghdr *nlh;
782         void *data;
783         struct ib_sa_mad *mad;
784         int len;
785         unsigned long flags;
786         unsigned long delay;
787         gfp_t gfp_flag;
788         int ret;
789
790         INIT_LIST_HEAD(&query->list);
791         query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
792
793         mad = query->mad_buf->mad;
794         len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
795         if (len <= 0)
796                 return -EMSGSIZE;
797
798         skb = nlmsg_new(len, gfp_mask);
799         if (!skb)
800                 return -ENOMEM;
801
802         /* Put nlmsg header only for now */
803         data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
804                             RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
805         if (!data) {
806                 nlmsg_free(skb);
807                 return -EMSGSIZE;
808         }
809
810         /* Add attributes */
811         ib_nl_set_path_rec_attrs(skb, query);
812
813         /* Repair the nlmsg header length */
814         nlmsg_end(skb, nlh);
815
816         gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
817                 GFP_NOWAIT;
818
819         spin_lock_irqsave(&ib_nl_request_lock, flags);
820         ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
821
822         if (ret)
823                 goto out;
824
825         /* Put the request on the list.*/
826         delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
827         query->timeout = delay + jiffies;
828         list_add_tail(&query->list, &ib_nl_request_list);
829         /* Start the timeout if this is the only request */
830         if (ib_nl_request_list.next == &query->list)
831                 queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
832
833 out:
834         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
835
836         return ret;
837 }
838
839 static int ib_nl_cancel_request(struct ib_sa_query *query)
840 {
841         unsigned long flags;
842         struct ib_sa_query *wait_query;
843         int found = 0;
844
845         spin_lock_irqsave(&ib_nl_request_lock, flags);
846         list_for_each_entry(wait_query, &ib_nl_request_list, list) {
847                 /* Let the timeout to take care of the callback */
848                 if (query == wait_query) {
849                         query->flags |= IB_SA_CANCEL;
850                         query->timeout = jiffies;
851                         list_move(&query->list, &ib_nl_request_list);
852                         found = 1;
853                         mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
854                         break;
855                 }
856         }
857         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
858
859         return found;
860 }
861
862 static void send_handler(struct ib_mad_agent *agent,
863                          struct ib_mad_send_wc *mad_send_wc);
864
865 static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
866                                            const struct nlmsghdr *nlh)
867 {
868         struct ib_mad_send_wc mad_send_wc;
869         struct ib_sa_mad *mad = NULL;
870         const struct nlattr *head, *curr;
871         struct ib_path_rec_data  *rec;
872         int len, rem;
873         u32 mask = 0;
874         int status = -EIO;
875
876         if (query->callback) {
877                 head = (const struct nlattr *) nlmsg_data(nlh);
878                 len = nlmsg_len(nlh);
879                 switch (query->path_use) {
880                 case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
881                         mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
882                         break;
883
884                 case LS_RESOLVE_PATH_USE_ALL:
885                 case LS_RESOLVE_PATH_USE_GMP:
886                 default:
887                         mask = IB_PATH_PRIMARY | IB_PATH_GMP |
888                                 IB_PATH_BIDIRECTIONAL;
889                         break;
890                 }
891                 nla_for_each_attr(curr, head, len, rem) {
892                         if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
893                                 rec = nla_data(curr);
894                                 /*
895                                  * Get the first one. In the future, we may
896                                  * need to get up to 6 pathrecords.
897                                  */
898                                 if ((rec->flags & mask) == mask) {
899                                         mad = query->mad_buf->mad;
900                                         mad->mad_hdr.method |=
901                                                 IB_MGMT_METHOD_RESP;
902                                         memcpy(mad->data, rec->path_rec,
903                                                sizeof(rec->path_rec));
904                                         status = 0;
905                                         break;
906                                 }
907                         }
908                 }
909                 query->callback(query, status, mad);
910         }
911
912         mad_send_wc.send_buf = query->mad_buf;
913         mad_send_wc.status = IB_WC_SUCCESS;
914         send_handler(query->mad_buf->mad_agent, &mad_send_wc);
915 }
916
917 static void ib_nl_request_timeout(struct work_struct *work)
918 {
919         unsigned long flags;
920         struct ib_sa_query *query;
921         unsigned long delay;
922         struct ib_mad_send_wc mad_send_wc;
923         int ret;
924
925         spin_lock_irqsave(&ib_nl_request_lock, flags);
926         while (!list_empty(&ib_nl_request_list)) {
927                 query = list_entry(ib_nl_request_list.next,
928                                    struct ib_sa_query, list);
929
930                 if (time_after(query->timeout, jiffies)) {
931                         delay = query->timeout - jiffies;
932                         if ((long)delay <= 0)
933                                 delay = 1;
934                         queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
935                         break;
936                 }
937
938                 list_del(&query->list);
939                 ib_sa_disable_local_svc(query);
940                 /* Hold the lock to protect against query cancellation */
941                 if (ib_sa_query_cancelled(query))
942                         ret = -1;
943                 else
944                         ret = ib_post_send_mad(query->mad_buf, NULL);
945                 if (ret) {
946                         mad_send_wc.send_buf = query->mad_buf;
947                         mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
948                         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
949                         send_handler(query->port->agent, &mad_send_wc);
950                         spin_lock_irqsave(&ib_nl_request_lock, flags);
951                 }
952         }
953         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
954 }
955
956 int ib_nl_handle_set_timeout(struct sk_buff *skb,
957                              struct nlmsghdr *nlh,
958                              struct netlink_ext_ack *extack)
959 {
960         int timeout, delta, abs_delta;
961         const struct nlattr *attr;
962         unsigned long flags;
963         struct ib_sa_query *query;
964         long delay = 0;
965         struct nlattr *tb[LS_NLA_TYPE_MAX];
966         int ret;
967
968         if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
969             !(NETLINK_CB(skb).sk))
970                 return -EPERM;
971
972         ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
973                                    nlmsg_len(nlh), ib_nl_policy, NULL);
974         attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
975         if (ret || !attr)
976                 goto settimeout_out;
977
978         timeout = *(int *) nla_data(attr);
979         if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
980                 timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
981         if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
982                 timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
983
984         delta = timeout - sa_local_svc_timeout_ms;
985         if (delta < 0)
986                 abs_delta = -delta;
987         else
988                 abs_delta = delta;
989
990         if (delta != 0) {
991                 spin_lock_irqsave(&ib_nl_request_lock, flags);
992                 sa_local_svc_timeout_ms = timeout;
993                 list_for_each_entry(query, &ib_nl_request_list, list) {
994                         if (delta < 0 && abs_delta > query->timeout)
995                                 query->timeout = 0;
996                         else
997                                 query->timeout += delta;
998
999                         /* Get the new delay from the first entry */
1000                         if (!delay) {
1001                                 delay = query->timeout - jiffies;
1002                                 if (delay <= 0)
1003                                         delay = 1;
1004                         }
1005                 }
1006                 if (delay)
1007                         mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1008                                          (unsigned long)delay);
1009                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1010         }
1011
1012 settimeout_out:
1013         return 0;
1014 }
1015
1016 static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1017 {
1018         struct nlattr *tb[LS_NLA_TYPE_MAX];
1019         int ret;
1020
1021         if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1022                 return 0;
1023
1024         ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1025                                    nlmsg_len(nlh), ib_nl_policy, NULL);
1026         if (ret)
1027                 return 0;
1028
1029         return 1;
1030 }
1031
1032 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1033                               struct nlmsghdr *nlh,
1034                               struct netlink_ext_ack *extack)
1035 {
1036         unsigned long flags;
1037         struct ib_sa_query *query = NULL, *iter;
1038         struct ib_mad_send_buf *send_buf;
1039         struct ib_mad_send_wc mad_send_wc;
1040         int ret;
1041
1042         if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1043             !(NETLINK_CB(skb).sk))
1044                 return -EPERM;
1045
1046         spin_lock_irqsave(&ib_nl_request_lock, flags);
1047         list_for_each_entry(iter, &ib_nl_request_list, list) {
1048                 /*
1049                  * If the query is cancelled, let the timeout routine
1050                  * take care of it.
1051                  */
1052                 if (nlh->nlmsg_seq == iter->seq) {
1053                         if (!ib_sa_query_cancelled(iter)) {
1054                                 list_del(&iter->list);
1055                                 query = iter;
1056                         }
1057                         break;
1058                 }
1059         }
1060
1061         if (!query) {
1062                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1063                 goto resp_out;
1064         }
1065
1066         send_buf = query->mad_buf;
1067
1068         if (!ib_nl_is_good_resolve_resp(nlh)) {
1069                 /* if the result is a failure, send out the packet via IB */
1070                 ib_sa_disable_local_svc(query);
1071                 ret = ib_post_send_mad(query->mad_buf, NULL);
1072                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1073                 if (ret) {
1074                         mad_send_wc.send_buf = send_buf;
1075                         mad_send_wc.status = IB_WC_GENERAL_ERR;
1076                         send_handler(query->port->agent, &mad_send_wc);
1077                 }
1078         } else {
1079                 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1080                 ib_nl_process_good_resolve_rsp(query, nlh);
1081         }
1082
1083 resp_out:
1084         return 0;
1085 }
1086
1087 static void free_sm_ah(struct kref *kref)
1088 {
1089         struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1090
1091         rdma_destroy_ah(sm_ah->ah, 0);
1092         kfree(sm_ah);
1093 }
1094
1095 void ib_sa_register_client(struct ib_sa_client *client)
1096 {
1097         atomic_set(&client->users, 1);
1098         init_completion(&client->comp);
1099 }
1100 EXPORT_SYMBOL(ib_sa_register_client);
1101
1102 void ib_sa_unregister_client(struct ib_sa_client *client)
1103 {
1104         ib_sa_client_put(client);
1105         wait_for_completion(&client->comp);
1106 }
1107 EXPORT_SYMBOL(ib_sa_unregister_client);
1108
1109 /**
1110  * ib_sa_cancel_query - try to cancel an SA query
1111  * @id:ID of query to cancel
1112  * @query:query pointer to cancel
1113  *
1114  * Try to cancel an SA query.  If the id and query don't match up or
1115  * the query has already completed, nothing is done.  Otherwise the
1116  * query is canceled and will complete with a status of -EINTR.
1117  */
1118 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1119 {
1120         unsigned long flags;
1121         struct ib_mad_send_buf *mad_buf;
1122
1123         xa_lock_irqsave(&queries, flags);
1124         if (xa_load(&queries, id) != query) {
1125                 xa_unlock_irqrestore(&queries, flags);
1126                 return;
1127         }
1128         mad_buf = query->mad_buf;
1129         xa_unlock_irqrestore(&queries, flags);
1130
1131         /*
1132          * If the query is still on the netlink request list, schedule
1133          * it to be cancelled by the timeout routine. Otherwise, it has been
1134          * sent to the MAD layer and has to be cancelled from there.
1135          */
1136         if (!ib_nl_cancel_request(query))
1137                 ib_cancel_mad(mad_buf);
1138 }
1139 EXPORT_SYMBOL(ib_sa_cancel_query);
1140
1141 static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1142 {
1143         struct ib_sa_device *sa_dev;
1144         struct ib_sa_port   *port;
1145         unsigned long flags;
1146         u8 src_path_mask;
1147
1148         sa_dev = ib_get_client_data(device, &sa_client);
1149         if (!sa_dev)
1150                 return 0x7f;
1151
1152         port  = &sa_dev->port[port_num - sa_dev->start_port];
1153         spin_lock_irqsave(&port->ah_lock, flags);
1154         src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1155         spin_unlock_irqrestore(&port->ah_lock, flags);
1156
1157         return src_path_mask;
1158 }
1159
1160 static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1161                                    struct sa_path_rec *rec,
1162                                    struct rdma_ah_attr *ah_attr,
1163                                    const struct ib_gid_attr *gid_attr)
1164 {
1165         enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1166
1167         if (!gid_attr) {
1168                 gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1169                                                  port_num, NULL);
1170                 if (IS_ERR(gid_attr))
1171                         return PTR_ERR(gid_attr);
1172         } else
1173                 rdma_hold_gid_attr(gid_attr);
1174
1175         rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1176                                 be32_to_cpu(rec->flow_label),
1177                                 rec->hop_limit, rec->traffic_class,
1178                                 gid_attr);
1179         return 0;
1180 }
1181
1182 /**
1183  * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1184  *   an SA path record.
1185  * @device: Device associated ah attributes initialization.
1186  * @port_num: Port on the specified device.
1187  * @rec: path record entry to use for ah attributes initialization.
1188  * @ah_attr: address handle attributes to initialization from path record.
1189  * @gid_attr: SGID attribute to consider during initialization.
1190  *
1191  * When ib_init_ah_attr_from_path() returns success,
1192  * (a) for IB link layer it optionally contains a reference to SGID attribute
1193  * when GRH is present for IB link layer.
1194  * (b) for RoCE link layer it contains a reference to SGID attribute.
1195  * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1196  * attributes which are initialized using ib_init_ah_attr_from_path().
1197  */
1198 int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1199                               struct sa_path_rec *rec,
1200                               struct rdma_ah_attr *ah_attr,
1201                               const struct ib_gid_attr *gid_attr)
1202 {
1203         int ret = 0;
1204
1205         memset(ah_attr, 0, sizeof(*ah_attr));
1206         ah_attr->type = rdma_ah_find_type(device, port_num);
1207         rdma_ah_set_sl(ah_attr, rec->sl);
1208         rdma_ah_set_port_num(ah_attr, port_num);
1209         rdma_ah_set_static_rate(ah_attr, rec->rate);
1210
1211         if (sa_path_is_roce(rec)) {
1212                 ret = roce_resolve_route_from_path(rec, gid_attr);
1213                 if (ret)
1214                         return ret;
1215
1216                 memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1217         } else {
1218                 rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1219                 if (sa_path_is_opa(rec) &&
1220                     rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1221                         rdma_ah_set_make_grd(ah_attr, true);
1222
1223                 rdma_ah_set_path_bits(ah_attr,
1224                                       be32_to_cpu(sa_path_get_slid(rec)) &
1225                                       get_src_path_mask(device, port_num));
1226         }
1227
1228         if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1229                 ret = init_ah_attr_grh_fields(device, port_num,
1230                                               rec, ah_attr, gid_attr);
1231         return ret;
1232 }
1233 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1234
1235 static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1236 {
1237         struct rdma_ah_attr ah_attr;
1238         unsigned long flags;
1239
1240         spin_lock_irqsave(&query->port->ah_lock, flags);
1241         if (!query->port->sm_ah) {
1242                 spin_unlock_irqrestore(&query->port->ah_lock, flags);
1243                 return -EAGAIN;
1244         }
1245         kref_get(&query->port->sm_ah->ref);
1246         query->sm_ah = query->port->sm_ah;
1247         spin_unlock_irqrestore(&query->port->ah_lock, flags);
1248
1249         /*
1250          * Always check if sm_ah has valid dlid assigned,
1251          * before querying for class port info
1252          */
1253         if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1254             !rdma_is_valid_unicast_lid(&ah_attr)) {
1255                 kref_put(&query->sm_ah->ref, free_sm_ah);
1256                 return -EAGAIN;
1257         }
1258         query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1259                                             query->sm_ah->pkey_index,
1260                                             0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1261                                             gfp_mask,
1262                                             ((query->flags & IB_SA_QUERY_OPA) ?
1263                                              OPA_MGMT_BASE_VERSION :
1264                                              IB_MGMT_BASE_VERSION));
1265         if (IS_ERR(query->mad_buf)) {
1266                 kref_put(&query->sm_ah->ref, free_sm_ah);
1267                 return -ENOMEM;
1268         }
1269
1270         query->mad_buf->ah = query->sm_ah->ah;
1271
1272         return 0;
1273 }
1274
1275 static void free_mad(struct ib_sa_query *query)
1276 {
1277         ib_free_send_mad(query->mad_buf);
1278         kref_put(&query->sm_ah->ref, free_sm_ah);
1279 }
1280
1281 static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1282 {
1283         struct ib_sa_mad *mad = query->mad_buf->mad;
1284         unsigned long flags;
1285
1286         memset(mad, 0, sizeof *mad);
1287
1288         if (query->flags & IB_SA_QUERY_OPA) {
1289                 mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
1290                 mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1291         } else {
1292                 mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1293                 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1294         }
1295         mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1296         spin_lock_irqsave(&tid_lock, flags);
1297         mad->mad_hdr.tid           =
1298                 cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1299         spin_unlock_irqrestore(&tid_lock, flags);
1300 }
1301
1302 static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1303                     gfp_t gfp_mask)
1304 {
1305         unsigned long flags;
1306         int ret, id;
1307         const int nmbr_sa_query_retries = 10;
1308
1309         xa_lock_irqsave(&queries, flags);
1310         ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1311         xa_unlock_irqrestore(&queries, flags);
1312         if (ret < 0)
1313                 return ret;
1314
1315         query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
1316         query->mad_buf->retries = nmbr_sa_query_retries;
1317         if (!query->mad_buf->timeout_ms) {
1318                 /* Special case, very small timeout_ms */
1319                 query->mad_buf->timeout_ms = 1;
1320                 query->mad_buf->retries = timeout_ms;
1321         }
1322         query->mad_buf->context[0] = query;
1323         query->id = id;
1324
1325         if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1326             (!(query->flags & IB_SA_QUERY_OPA))) {
1327                 if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1328                         if (!ib_nl_make_request(query, gfp_mask))
1329                                 return id;
1330                 }
1331                 ib_sa_disable_local_svc(query);
1332         }
1333
1334         ret = ib_post_send_mad(query->mad_buf, NULL);
1335         if (ret) {
1336                 xa_lock_irqsave(&queries, flags);
1337                 __xa_erase(&queries, id);
1338                 xa_unlock_irqrestore(&queries, flags);
1339         }
1340
1341         /*
1342          * It's not safe to dereference query any more, because the
1343          * send may already have completed and freed the query in
1344          * another context.
1345          */
1346         return ret ? ret : id;
1347 }
1348
1349 void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1350 {
1351         ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1352 }
1353 EXPORT_SYMBOL(ib_sa_unpack_path);
1354
1355 void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1356 {
1357         ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1358 }
1359 EXPORT_SYMBOL(ib_sa_pack_path);
1360
1361 static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1362                                          struct ib_sa_device *sa_dev,
1363                                          u32 port_num)
1364 {
1365         struct ib_sa_port *port;
1366         unsigned long flags;
1367         bool ret = false;
1368
1369         port = &sa_dev->port[port_num - sa_dev->start_port];
1370         spin_lock_irqsave(&port->classport_lock, flags);
1371         if (!port->classport_info.valid)
1372                 goto ret;
1373
1374         if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1375                 ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1376                         OPA_CLASS_PORT_INFO_PR_SUPPORT;
1377 ret:
1378         spin_unlock_irqrestore(&port->classport_lock, flags);
1379         return ret;
1380 }
1381
1382 enum opa_pr_supported {
1383         PR_NOT_SUPPORTED,
1384         PR_OPA_SUPPORTED,
1385         PR_IB_SUPPORTED
1386 };
1387
1388 /*
1389  * opa_pr_query_possible - Check if current PR query can be an OPA query.
1390  *
1391  * Retuns PR_NOT_SUPPORTED if a path record query is not
1392  * possible, PR_OPA_SUPPORTED if an OPA path record query
1393  * is possible and PR_IB_SUPPORTED if an IB path record
1394  * query is possible.
1395  */
1396 static int opa_pr_query_possible(struct ib_sa_client *client,
1397                                  struct ib_sa_device *sa_dev,
1398                                  struct ib_device *device, u32 port_num)
1399 {
1400         struct ib_port_attr port_attr;
1401
1402         if (ib_query_port(device, port_num, &port_attr))
1403                 return PR_NOT_SUPPORTED;
1404
1405         if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1406                 return PR_OPA_SUPPORTED;
1407
1408         if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1409                 return PR_NOT_SUPPORTED;
1410         else
1411                 return PR_IB_SUPPORTED;
1412 }
1413
1414 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1415                                     int status,
1416                                     struct ib_sa_mad *mad)
1417 {
1418         struct ib_sa_path_query *query =
1419                 container_of(sa_query, struct ib_sa_path_query, sa_query);
1420
1421         if (mad) {
1422                 struct sa_path_rec rec;
1423
1424                 if (sa_query->flags & IB_SA_QUERY_OPA) {
1425                         ib_unpack(opa_path_rec_table,
1426                                   ARRAY_SIZE(opa_path_rec_table),
1427                                   mad->data, &rec);
1428                         rec.rec_type = SA_PATH_REC_TYPE_OPA;
1429                         query->callback(status, &rec, query->context);
1430                 } else {
1431                         ib_unpack(path_rec_table,
1432                                   ARRAY_SIZE(path_rec_table),
1433                                   mad->data, &rec);
1434                         rec.rec_type = SA_PATH_REC_TYPE_IB;
1435                         sa_path_set_dmac_zero(&rec);
1436
1437                         if (query->conv_pr) {
1438                                 struct sa_path_rec opa;
1439
1440                                 memset(&opa, 0, sizeof(struct sa_path_rec));
1441                                 sa_convert_path_ib_to_opa(&opa, &rec);
1442                                 query->callback(status, &opa, query->context);
1443                         } else {
1444                                 query->callback(status, &rec, query->context);
1445                         }
1446                 }
1447         } else
1448                 query->callback(status, NULL, query->context);
1449 }
1450
1451 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1452 {
1453         struct ib_sa_path_query *query =
1454                 container_of(sa_query, struct ib_sa_path_query, sa_query);
1455
1456         kfree(query->conv_pr);
1457         kfree(query);
1458 }
1459
1460 /**
1461  * ib_sa_path_rec_get - Start a Path get query
1462  * @client:SA client
1463  * @device:device to send query on
1464  * @port_num: port number to send query on
1465  * @rec:Path Record to send in query
1466  * @comp_mask:component mask to send in query
1467  * @timeout_ms:time to wait for response
1468  * @gfp_mask:GFP mask to use for internal allocations
1469  * @callback:function called when query completes, times out or is
1470  * canceled
1471  * @context:opaque user context passed to callback
1472  * @sa_query:query context, used to cancel query
1473  *
1474  * Send a Path Record Get query to the SA to look up a path.  The
1475  * callback function will be called when the query completes (or
1476  * fails); status is 0 for a successful response, -EINTR if the query
1477  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1478  * occurred sending the query.  The resp parameter of the callback is
1479  * only valid if status is 0.
1480  *
1481  * If the return value of ib_sa_path_rec_get() is negative, it is an
1482  * error code.  Otherwise it is a query ID that can be used to cancel
1483  * the query.
1484  */
1485 int ib_sa_path_rec_get(struct ib_sa_client *client,
1486                        struct ib_device *device, u32 port_num,
1487                        struct sa_path_rec *rec,
1488                        ib_sa_comp_mask comp_mask,
1489                        unsigned long timeout_ms, gfp_t gfp_mask,
1490                        void (*callback)(int status,
1491                                         struct sa_path_rec *resp,
1492                                         void *context),
1493                        void *context,
1494                        struct ib_sa_query **sa_query)
1495 {
1496         struct ib_sa_path_query *query;
1497         struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1498         struct ib_sa_port   *port;
1499         struct ib_mad_agent *agent;
1500         struct ib_sa_mad *mad;
1501         enum opa_pr_supported status;
1502         int ret;
1503
1504         if (!sa_dev)
1505                 return -ENODEV;
1506
1507         if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1508             (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1509                 return -EINVAL;
1510
1511         port  = &sa_dev->port[port_num - sa_dev->start_port];
1512         agent = port->agent;
1513
1514         query = kzalloc(sizeof(*query), gfp_mask);
1515         if (!query)
1516                 return -ENOMEM;
1517
1518         query->sa_query.port     = port;
1519         if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1520                 status = opa_pr_query_possible(client, sa_dev, device, port_num);
1521                 if (status == PR_NOT_SUPPORTED) {
1522                         ret = -EINVAL;
1523                         goto err1;
1524                 } else if (status == PR_OPA_SUPPORTED) {
1525                         query->sa_query.flags |= IB_SA_QUERY_OPA;
1526                 } else {
1527                         query->conv_pr =
1528                                 kmalloc(sizeof(*query->conv_pr), gfp_mask);
1529                         if (!query->conv_pr) {
1530                                 ret = -ENOMEM;
1531                                 goto err1;
1532                         }
1533                 }
1534         }
1535
1536         ret = alloc_mad(&query->sa_query, gfp_mask);
1537         if (ret)
1538                 goto err2;
1539
1540         ib_sa_client_get(client);
1541         query->sa_query.client = client;
1542         query->callback        = callback;
1543         query->context         = context;
1544
1545         mad = query->sa_query.mad_buf->mad;
1546         init_mad(&query->sa_query, agent);
1547
1548         query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1549         query->sa_query.release  = ib_sa_path_rec_release;
1550         mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1551         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1552         mad->sa_hdr.comp_mask    = comp_mask;
1553
1554         if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1555                 ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1556                         rec, mad->data);
1557         } else if (query->conv_pr) {
1558                 sa_convert_path_opa_to_ib(query->conv_pr, rec);
1559                 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1560                         query->conv_pr, mad->data);
1561         } else {
1562                 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1563                         rec, mad->data);
1564         }
1565
1566         *sa_query = &query->sa_query;
1567
1568         query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1569         query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1570                                                 query->conv_pr : rec;
1571
1572         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1573         if (ret < 0)
1574                 goto err3;
1575
1576         return ret;
1577
1578 err3:
1579         *sa_query = NULL;
1580         ib_sa_client_put(query->sa_query.client);
1581         free_mad(&query->sa_query);
1582 err2:
1583         kfree(query->conv_pr);
1584 err1:
1585         kfree(query);
1586         return ret;
1587 }
1588 EXPORT_SYMBOL(ib_sa_path_rec_get);
1589
1590 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1591                                         int status,
1592                                         struct ib_sa_mad *mad)
1593 {
1594         struct ib_sa_mcmember_query *query =
1595                 container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1596
1597         if (mad) {
1598                 struct ib_sa_mcmember_rec rec;
1599
1600                 ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1601                           mad->data, &rec);
1602                 query->callback(status, &rec, query->context);
1603         } else
1604                 query->callback(status, NULL, query->context);
1605 }
1606
1607 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1608 {
1609         kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1610 }
1611
1612 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1613                              struct ib_device *device, u32 port_num,
1614                              u8 method,
1615                              struct ib_sa_mcmember_rec *rec,
1616                              ib_sa_comp_mask comp_mask,
1617                              unsigned long timeout_ms, gfp_t gfp_mask,
1618                              void (*callback)(int status,
1619                                               struct ib_sa_mcmember_rec *resp,
1620                                               void *context),
1621                              void *context,
1622                              struct ib_sa_query **sa_query)
1623 {
1624         struct ib_sa_mcmember_query *query;
1625         struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1626         struct ib_sa_port   *port;
1627         struct ib_mad_agent *agent;
1628         struct ib_sa_mad *mad;
1629         int ret;
1630
1631         if (!sa_dev)
1632                 return -ENODEV;
1633
1634         port  = &sa_dev->port[port_num - sa_dev->start_port];
1635         agent = port->agent;
1636
1637         query = kzalloc(sizeof(*query), gfp_mask);
1638         if (!query)
1639                 return -ENOMEM;
1640
1641         query->sa_query.port     = port;
1642         ret = alloc_mad(&query->sa_query, gfp_mask);
1643         if (ret)
1644                 goto err1;
1645
1646         ib_sa_client_get(client);
1647         query->sa_query.client = client;
1648         query->callback        = callback;
1649         query->context         = context;
1650
1651         mad = query->sa_query.mad_buf->mad;
1652         init_mad(&query->sa_query, agent);
1653
1654         query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1655         query->sa_query.release  = ib_sa_mcmember_rec_release;
1656         mad->mad_hdr.method      = method;
1657         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1658         mad->sa_hdr.comp_mask    = comp_mask;
1659
1660         ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1661                 rec, mad->data);
1662
1663         *sa_query = &query->sa_query;
1664
1665         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1666         if (ret < 0)
1667                 goto err2;
1668
1669         return ret;
1670
1671 err2:
1672         *sa_query = NULL;
1673         ib_sa_client_put(query->sa_query.client);
1674         free_mad(&query->sa_query);
1675
1676 err1:
1677         kfree(query);
1678         return ret;
1679 }
1680
1681 /* Support GuidInfoRecord */
1682 static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1683                                         int status,
1684                                         struct ib_sa_mad *mad)
1685 {
1686         struct ib_sa_guidinfo_query *query =
1687                 container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1688
1689         if (mad) {
1690                 struct ib_sa_guidinfo_rec rec;
1691
1692                 ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1693                           mad->data, &rec);
1694                 query->callback(status, &rec, query->context);
1695         } else
1696                 query->callback(status, NULL, query->context);
1697 }
1698
1699 static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1700 {
1701         kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1702 }
1703
1704 int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1705                               struct ib_device *device, u32 port_num,
1706                               struct ib_sa_guidinfo_rec *rec,
1707                               ib_sa_comp_mask comp_mask, u8 method,
1708                               unsigned long timeout_ms, gfp_t gfp_mask,
1709                               void (*callback)(int status,
1710                                                struct ib_sa_guidinfo_rec *resp,
1711                                                void *context),
1712                               void *context,
1713                               struct ib_sa_query **sa_query)
1714 {
1715         struct ib_sa_guidinfo_query *query;
1716         struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1717         struct ib_sa_port *port;
1718         struct ib_mad_agent *agent;
1719         struct ib_sa_mad *mad;
1720         int ret;
1721
1722         if (!sa_dev)
1723                 return -ENODEV;
1724
1725         if (method != IB_MGMT_METHOD_GET &&
1726             method != IB_MGMT_METHOD_SET &&
1727             method != IB_SA_METHOD_DELETE) {
1728                 return -EINVAL;
1729         }
1730
1731         port  = &sa_dev->port[port_num - sa_dev->start_port];
1732         agent = port->agent;
1733
1734         query = kzalloc(sizeof(*query), gfp_mask);
1735         if (!query)
1736                 return -ENOMEM;
1737
1738         query->sa_query.port = port;
1739         ret = alloc_mad(&query->sa_query, gfp_mask);
1740         if (ret)
1741                 goto err1;
1742
1743         ib_sa_client_get(client);
1744         query->sa_query.client = client;
1745         query->callback        = callback;
1746         query->context         = context;
1747
1748         mad = query->sa_query.mad_buf->mad;
1749         init_mad(&query->sa_query, agent);
1750
1751         query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1752         query->sa_query.release  = ib_sa_guidinfo_rec_release;
1753
1754         mad->mad_hdr.method      = method;
1755         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1756         mad->sa_hdr.comp_mask    = comp_mask;
1757
1758         ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1759                 mad->data);
1760
1761         *sa_query = &query->sa_query;
1762
1763         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1764         if (ret < 0)
1765                 goto err2;
1766
1767         return ret;
1768
1769 err2:
1770         *sa_query = NULL;
1771         ib_sa_client_put(query->sa_query.client);
1772         free_mad(&query->sa_query);
1773
1774 err1:
1775         kfree(query);
1776         return ret;
1777 }
1778 EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1779
1780 struct ib_classport_info_context {
1781         struct completion       done;
1782         struct ib_sa_query      *sa_query;
1783 };
1784
1785 static void ib_classportinfo_cb(void *context)
1786 {
1787         struct ib_classport_info_context *cb_ctx = context;
1788
1789         complete(&cb_ctx->done);
1790 }
1791
1792 static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1793                                               int status,
1794                                               struct ib_sa_mad *mad)
1795 {
1796         unsigned long flags;
1797         struct ib_sa_classport_info_query *query =
1798                 container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1799         struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1800
1801         if (mad) {
1802                 if (sa_query->flags & IB_SA_QUERY_OPA) {
1803                         struct opa_class_port_info rec;
1804
1805                         ib_unpack(opa_classport_info_rec_table,
1806                                   ARRAY_SIZE(opa_classport_info_rec_table),
1807                                   mad->data, &rec);
1808
1809                         spin_lock_irqsave(&sa_query->port->classport_lock,
1810                                           flags);
1811                         if (!status && !info->valid) {
1812                                 memcpy(&info->data.opa, &rec,
1813                                        sizeof(info->data.opa));
1814
1815                                 info->valid = true;
1816                                 info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1817                         }
1818                         spin_unlock_irqrestore(&sa_query->port->classport_lock,
1819                                                flags);
1820
1821                 } else {
1822                         struct ib_class_port_info rec;
1823
1824                         ib_unpack(ib_classport_info_rec_table,
1825                                   ARRAY_SIZE(ib_classport_info_rec_table),
1826                                   mad->data, &rec);
1827
1828                         spin_lock_irqsave(&sa_query->port->classport_lock,
1829                                           flags);
1830                         if (!status && !info->valid) {
1831                                 memcpy(&info->data.ib, &rec,
1832                                        sizeof(info->data.ib));
1833
1834                                 info->valid = true;
1835                                 info->data.type = RDMA_CLASS_PORT_INFO_IB;
1836                         }
1837                         spin_unlock_irqrestore(&sa_query->port->classport_lock,
1838                                                flags);
1839                 }
1840         }
1841         query->callback(query->context);
1842 }
1843
1844 static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1845 {
1846         kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1847                            sa_query));
1848 }
1849
1850 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1851                                           unsigned long timeout_ms,
1852                                           void (*callback)(void *context),
1853                                           void *context,
1854                                           struct ib_sa_query **sa_query)
1855 {
1856         struct ib_mad_agent *agent;
1857         struct ib_sa_classport_info_query *query;
1858         struct ib_sa_mad *mad;
1859         gfp_t gfp_mask = GFP_KERNEL;
1860         int ret;
1861
1862         agent = port->agent;
1863
1864         query = kzalloc(sizeof(*query), gfp_mask);
1865         if (!query)
1866                 return -ENOMEM;
1867
1868         query->sa_query.port = port;
1869         query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1870                                                  port->port_num) ?
1871                                  IB_SA_QUERY_OPA : 0;
1872         ret = alloc_mad(&query->sa_query, gfp_mask);
1873         if (ret)
1874                 goto err_free;
1875
1876         query->callback = callback;
1877         query->context = context;
1878
1879         mad = query->sa_query.mad_buf->mad;
1880         init_mad(&query->sa_query, agent);
1881
1882         query->sa_query.callback = ib_sa_classport_info_rec_callback;
1883         query->sa_query.release  = ib_sa_classport_info_rec_release;
1884         mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
1885         mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1886         mad->sa_hdr.comp_mask    = 0;
1887         *sa_query = &query->sa_query;
1888
1889         ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1890         if (ret < 0)
1891                 goto err_free_mad;
1892
1893         return ret;
1894
1895 err_free_mad:
1896         *sa_query = NULL;
1897         free_mad(&query->sa_query);
1898
1899 err_free:
1900         kfree(query);
1901         return ret;
1902 }
1903
1904 static void update_ib_cpi(struct work_struct *work)
1905 {
1906         struct ib_sa_port *port =
1907                 container_of(work, struct ib_sa_port, ib_cpi_work.work);
1908         struct ib_classport_info_context *cb_context;
1909         unsigned long flags;
1910         int ret;
1911
1912         /* If the classport info is valid, nothing
1913          * to do here.
1914          */
1915         spin_lock_irqsave(&port->classport_lock, flags);
1916         if (port->classport_info.valid) {
1917                 spin_unlock_irqrestore(&port->classport_lock, flags);
1918                 return;
1919         }
1920         spin_unlock_irqrestore(&port->classport_lock, flags);
1921
1922         cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1923         if (!cb_context)
1924                 goto err_nomem;
1925
1926         init_completion(&cb_context->done);
1927
1928         ret = ib_sa_classport_info_rec_query(port, 3000,
1929                                              ib_classportinfo_cb, cb_context,
1930                                              &cb_context->sa_query);
1931         if (ret < 0)
1932                 goto free_cb_err;
1933         wait_for_completion(&cb_context->done);
1934 free_cb_err:
1935         kfree(cb_context);
1936         spin_lock_irqsave(&port->classport_lock, flags);
1937
1938         /* If the classport info is still not valid, the query should have
1939          * failed for some reason. Retry issuing the query
1940          */
1941         if (!port->classport_info.valid) {
1942                 port->classport_info.retry_cnt++;
1943                 if (port->classport_info.retry_cnt <=
1944                     IB_SA_CPI_MAX_RETRY_CNT) {
1945                         unsigned long delay =
1946                                 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1947
1948                         queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1949                 }
1950         }
1951         spin_unlock_irqrestore(&port->classport_lock, flags);
1952
1953 err_nomem:
1954         return;
1955 }
1956
1957 static void send_handler(struct ib_mad_agent *agent,
1958                          struct ib_mad_send_wc *mad_send_wc)
1959 {
1960         struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1961         unsigned long flags;
1962
1963         if (query->callback)
1964                 switch (mad_send_wc->status) {
1965                 case IB_WC_SUCCESS:
1966                         /* No callback -- already got recv */
1967                         break;
1968                 case IB_WC_RESP_TIMEOUT_ERR:
1969                         query->callback(query, -ETIMEDOUT, NULL);
1970                         break;
1971                 case IB_WC_WR_FLUSH_ERR:
1972                         query->callback(query, -EINTR, NULL);
1973                         break;
1974                 default:
1975                         query->callback(query, -EIO, NULL);
1976                         break;
1977                 }
1978
1979         xa_lock_irqsave(&queries, flags);
1980         __xa_erase(&queries, query->id);
1981         xa_unlock_irqrestore(&queries, flags);
1982
1983         free_mad(query);
1984         if (query->client)
1985                 ib_sa_client_put(query->client);
1986         query->release(query);
1987 }
1988
1989 static void recv_handler(struct ib_mad_agent *mad_agent,
1990                          struct ib_mad_send_buf *send_buf,
1991                          struct ib_mad_recv_wc *mad_recv_wc)
1992 {
1993         struct ib_sa_query *query;
1994
1995         if (!send_buf)
1996                 return;
1997
1998         query = send_buf->context[0];
1999         if (query->callback) {
2000                 if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2001                         query->callback(query,
2002                                         mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2003                                         -EINVAL : 0,
2004                                         (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2005                 else
2006                         query->callback(query, -EIO, NULL);
2007         }
2008
2009         ib_free_recv_mad(mad_recv_wc);
2010 }
2011
2012 static void update_sm_ah(struct work_struct *work)
2013 {
2014         struct ib_sa_port *port =
2015                 container_of(work, struct ib_sa_port, update_task);
2016         struct ib_sa_sm_ah *new_ah;
2017         struct ib_port_attr port_attr;
2018         struct rdma_ah_attr   ah_attr;
2019         bool grh_required;
2020
2021         if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2022                 pr_warn("Couldn't query port\n");
2023                 return;
2024         }
2025
2026         new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2027         if (!new_ah)
2028                 return;
2029
2030         kref_init(&new_ah->ref);
2031         new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2032
2033         new_ah->pkey_index = 0;
2034         if (ib_find_pkey(port->agent->device, port->port_num,
2035                          IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2036                 pr_err("Couldn't find index for default PKey\n");
2037
2038         memset(&ah_attr, 0, sizeof(ah_attr));
2039         ah_attr.type = rdma_ah_find_type(port->agent->device,
2040                                          port->port_num);
2041         rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2042         rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2043         rdma_ah_set_port_num(&ah_attr, port->port_num);
2044
2045         grh_required = rdma_is_grh_required(port->agent->device,
2046                                             port->port_num);
2047
2048         /*
2049          * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2050          * differentiated from a permissive LID of 0xFFFF.  We set the
2051          * grh_required flag here so the SA can program the DGID in the
2052          * address handle appropriately
2053          */
2054         if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2055             (grh_required ||
2056              port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2057                 rdma_ah_set_make_grd(&ah_attr, true);
2058
2059         if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2060                 rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2061                 rdma_ah_set_subnet_prefix(&ah_attr,
2062                                           cpu_to_be64(port_attr.subnet_prefix));
2063                 rdma_ah_set_interface_id(&ah_attr,
2064                                          cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2065         }
2066
2067         new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2068                                     RDMA_CREATE_AH_SLEEPABLE);
2069         if (IS_ERR(new_ah->ah)) {
2070                 pr_warn("Couldn't create new SM AH\n");
2071                 kfree(new_ah);
2072                 return;
2073         }
2074
2075         spin_lock_irq(&port->ah_lock);
2076         if (port->sm_ah)
2077                 kref_put(&port->sm_ah->ref, free_sm_ah);
2078         port->sm_ah = new_ah;
2079         spin_unlock_irq(&port->ah_lock);
2080 }
2081
2082 static void ib_sa_event(struct ib_event_handler *handler,
2083                         struct ib_event *event)
2084 {
2085         if (event->event == IB_EVENT_PORT_ERR    ||
2086             event->event == IB_EVENT_PORT_ACTIVE ||
2087             event->event == IB_EVENT_LID_CHANGE  ||
2088             event->event == IB_EVENT_PKEY_CHANGE ||
2089             event->event == IB_EVENT_SM_CHANGE   ||
2090             event->event == IB_EVENT_CLIENT_REREGISTER) {
2091                 unsigned long flags;
2092                 struct ib_sa_device *sa_dev =
2093                         container_of(handler, typeof(*sa_dev), event_handler);
2094                 u32 port_num = event->element.port_num - sa_dev->start_port;
2095                 struct ib_sa_port *port = &sa_dev->port[port_num];
2096
2097                 if (!rdma_cap_ib_sa(handler->device, port->port_num))
2098                         return;
2099
2100                 spin_lock_irqsave(&port->ah_lock, flags);
2101                 if (port->sm_ah)
2102                         kref_put(&port->sm_ah->ref, free_sm_ah);
2103                 port->sm_ah = NULL;
2104                 spin_unlock_irqrestore(&port->ah_lock, flags);
2105
2106                 if (event->event == IB_EVENT_SM_CHANGE ||
2107                     event->event == IB_EVENT_CLIENT_REREGISTER ||
2108                     event->event == IB_EVENT_LID_CHANGE ||
2109                     event->event == IB_EVENT_PORT_ACTIVE) {
2110                         unsigned long delay =
2111                                 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2112
2113                         spin_lock_irqsave(&port->classport_lock, flags);
2114                         port->classport_info.valid = false;
2115                         port->classport_info.retry_cnt = 0;
2116                         spin_unlock_irqrestore(&port->classport_lock, flags);
2117                         queue_delayed_work(ib_wq,
2118                                            &port->ib_cpi_work, delay);
2119                 }
2120                 queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2121         }
2122 }
2123
2124 static int ib_sa_add_one(struct ib_device *device)
2125 {
2126         struct ib_sa_device *sa_dev;
2127         int s, e, i;
2128         int count = 0;
2129         int ret;
2130
2131         s = rdma_start_port(device);
2132         e = rdma_end_port(device);
2133
2134         sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2135         if (!sa_dev)
2136                 return -ENOMEM;
2137
2138         sa_dev->start_port = s;
2139         sa_dev->end_port   = e;
2140
2141         for (i = 0; i <= e - s; ++i) {
2142                 spin_lock_init(&sa_dev->port[i].ah_lock);
2143                 if (!rdma_cap_ib_sa(device, i + 1))
2144                         continue;
2145
2146                 sa_dev->port[i].sm_ah    = NULL;
2147                 sa_dev->port[i].port_num = i + s;
2148
2149                 spin_lock_init(&sa_dev->port[i].classport_lock);
2150                 sa_dev->port[i].classport_info.valid = false;
2151
2152                 sa_dev->port[i].agent =
2153                         ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2154                                               NULL, 0, send_handler,
2155                                               recv_handler, sa_dev, 0);
2156                 if (IS_ERR(sa_dev->port[i].agent)) {
2157                         ret = PTR_ERR(sa_dev->port[i].agent);
2158                         goto err;
2159                 }
2160
2161                 INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2162                 INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2163                                   update_ib_cpi);
2164
2165                 count++;
2166         }
2167
2168         if (!count) {
2169                 ret = -EOPNOTSUPP;
2170                 goto free;
2171         }
2172
2173         ib_set_client_data(device, &sa_client, sa_dev);
2174
2175         /*
2176          * We register our event handler after everything is set up,
2177          * and then update our cached info after the event handler is
2178          * registered to avoid any problems if a port changes state
2179          * during our initialization.
2180          */
2181
2182         INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2183         ib_register_event_handler(&sa_dev->event_handler);
2184
2185         for (i = 0; i <= e - s; ++i) {
2186                 if (rdma_cap_ib_sa(device, i + 1))
2187                         update_sm_ah(&sa_dev->port[i].update_task);
2188         }
2189
2190         return 0;
2191
2192 err:
2193         while (--i >= 0) {
2194                 if (rdma_cap_ib_sa(device, i + 1))
2195                         ib_unregister_mad_agent(sa_dev->port[i].agent);
2196         }
2197 free:
2198         kfree(sa_dev);
2199         return ret;
2200 }
2201
2202 static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2203 {
2204         struct ib_sa_device *sa_dev = client_data;
2205         int i;
2206
2207         ib_unregister_event_handler(&sa_dev->event_handler);
2208         flush_workqueue(ib_wq);
2209
2210         for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2211                 if (rdma_cap_ib_sa(device, i + 1)) {
2212                         cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2213                         ib_unregister_mad_agent(sa_dev->port[i].agent);
2214                         if (sa_dev->port[i].sm_ah)
2215                                 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2216                 }
2217
2218         }
2219
2220         kfree(sa_dev);
2221 }
2222
2223 int ib_sa_init(void)
2224 {
2225         int ret;
2226
2227         get_random_bytes(&tid, sizeof tid);
2228
2229         atomic_set(&ib_nl_sa_request_seq, 0);
2230
2231         ret = ib_register_client(&sa_client);
2232         if (ret) {
2233                 pr_err("Couldn't register ib_sa client\n");
2234                 goto err1;
2235         }
2236
2237         ret = mcast_init();
2238         if (ret) {
2239                 pr_err("Couldn't initialize multicast handling\n");
2240                 goto err2;
2241         }
2242
2243         ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2244         if (!ib_nl_wq) {
2245                 ret = -ENOMEM;
2246                 goto err3;
2247         }
2248
2249         INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2250
2251         return 0;
2252
2253 err3:
2254         mcast_cleanup();
2255 err2:
2256         ib_unregister_client(&sa_client);
2257 err1:
2258         return ret;
2259 }
2260
2261 void ib_sa_cleanup(void)
2262 {
2263         cancel_delayed_work(&ib_nl_timed_work);
2264         destroy_workqueue(ib_nl_wq);
2265         mcast_cleanup();
2266         ib_unregister_client(&sa_client);
2267         WARN_ON(!xa_empty(&queries));
2268 }