Mention branches and keyring.
[releases.git] / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <linux/mutex.h>
19 #include <linux/list.h>
20 #include <linux/smc.h>
21 #include <net/tcp.h>
22 #include <net/sock.h>
23 #include <rdma/ib_verbs.h>
24 #include <rdma/ib_cache.h>
25
26 #include "smc.h"
27 #include "smc_clc.h"
28 #include "smc_core.h"
29 #include "smc_ib.h"
30 #include "smc_wr.h"
31 #include "smc_llc.h"
32 #include "smc_cdc.h"
33 #include "smc_close.h"
34 #include "smc_ism.h"
35 #include "smc_netlink.h"
36 #include "smc_stats.h"
37 #include "smc_tracepoint.h"
38
39 #define SMC_LGR_NUM_INCR                256
40 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
41 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
42
43 struct smc_lgr_list smc_lgr_list = {    /* established link groups */
44         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
45         .list = LIST_HEAD_INIT(smc_lgr_list.list),
46         .num = 0,
47 };
48
49 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
50 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
51
52 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
53                          struct smc_buf_desc *buf_desc);
54 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
55
56 static void smc_link_down_work(struct work_struct *work);
57
58 /* return head of link group list and its lock for a given link group */
59 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
60                                                   spinlock_t **lgr_lock)
61 {
62         if (lgr->is_smcd) {
63                 *lgr_lock = &lgr->smcd->lgr_lock;
64                 return &lgr->smcd->lgr_list;
65         }
66
67         *lgr_lock = &smc_lgr_list.lock;
68         return &smc_lgr_list.list;
69 }
70
71 static void smc_ibdev_cnt_inc(struct smc_link *lnk)
72 {
73         atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
74 }
75
76 static void smc_ibdev_cnt_dec(struct smc_link *lnk)
77 {
78         atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
79 }
80
81 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
82 {
83         /* client link group creation always follows the server link group
84          * creation. For client use a somewhat higher removal delay time,
85          * otherwise there is a risk of out-of-sync link groups.
86          */
87         if (!lgr->freeing) {
88                 mod_delayed_work(system_wq, &lgr->free_work,
89                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
90                                                 SMC_LGR_FREE_DELAY_CLNT :
91                                                 SMC_LGR_FREE_DELAY_SERV);
92         }
93 }
94
95 /* Register connection's alert token in our lookup structure.
96  * To use rbtrees we have to implement our own insert core.
97  * Requires @conns_lock
98  * @smc         connection to register
99  * Returns 0 on success, != otherwise.
100  */
101 static void smc_lgr_add_alert_token(struct smc_connection *conn)
102 {
103         struct rb_node **link, *parent = NULL;
104         u32 token = conn->alert_token_local;
105
106         link = &conn->lgr->conns_all.rb_node;
107         while (*link) {
108                 struct smc_connection *cur = rb_entry(*link,
109                                         struct smc_connection, alert_node);
110
111                 parent = *link;
112                 if (cur->alert_token_local > token)
113                         link = &parent->rb_left;
114                 else
115                         link = &parent->rb_right;
116         }
117         /* Put the new node there */
118         rb_link_node(&conn->alert_node, parent, link);
119         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
120 }
121
122 /* assign an SMC-R link to the connection */
123 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
124 {
125         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
126                                        SMC_LNK_ACTIVE;
127         int i, j;
128
129         /* do link balancing */
130         conn->lnk = NULL;       /* reset conn->lnk first */
131         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
132                 struct smc_link *lnk = &conn->lgr->lnk[i];
133
134                 if (lnk->state != expected || lnk->link_is_asym)
135                         continue;
136                 if (conn->lgr->role == SMC_CLNT) {
137                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
138                         break;
139                 }
140                 if (conn->lgr->conns_num % 2) {
141                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
142                                 struct smc_link *lnk2;
143
144                                 lnk2 = &conn->lgr->lnk[j];
145                                 if (lnk2->state == expected &&
146                                     !lnk2->link_is_asym) {
147                                         conn->lnk = lnk2;
148                                         break;
149                                 }
150                         }
151                 }
152                 if (!conn->lnk)
153                         conn->lnk = lnk;
154                 break;
155         }
156         if (!conn->lnk)
157                 return SMC_CLC_DECL_NOACTLINK;
158         atomic_inc(&conn->lnk->conn_cnt);
159         return 0;
160 }
161
162 /* Register connection in link group by assigning an alert token
163  * registered in a search tree.
164  * Requires @conns_lock
165  * Note that '0' is a reserved value and not assigned.
166  */
167 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
168 {
169         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
170         static atomic_t nexttoken = ATOMIC_INIT(0);
171         int rc;
172
173         if (!conn->lgr->is_smcd) {
174                 rc = smcr_lgr_conn_assign_link(conn, first);
175                 if (rc) {
176                         conn->lgr = NULL;
177                         return rc;
178                 }
179         }
180         /* find a new alert_token_local value not yet used by some connection
181          * in this link group
182          */
183         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
184         while (!conn->alert_token_local) {
185                 conn->alert_token_local = atomic_inc_return(&nexttoken);
186                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
187                         conn->alert_token_local = 0;
188         }
189         smc_lgr_add_alert_token(conn);
190         conn->lgr->conns_num++;
191         return 0;
192 }
193
194 /* Unregister connection and reset the alert token of the given connection<
195  */
196 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
197 {
198         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
199         struct smc_link_group *lgr = conn->lgr;
200
201         rb_erase(&conn->alert_node, &lgr->conns_all);
202         if (conn->lnk)
203                 atomic_dec(&conn->lnk->conn_cnt);
204         lgr->conns_num--;
205         conn->alert_token_local = 0;
206         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
207 }
208
209 /* Unregister connection from lgr
210  */
211 static void smc_lgr_unregister_conn(struct smc_connection *conn)
212 {
213         struct smc_link_group *lgr = conn->lgr;
214
215         if (!smc_conn_lgr_valid(conn))
216                 return;
217         write_lock_bh(&lgr->conns_lock);
218         if (conn->alert_token_local) {
219                 __smc_lgr_unregister_conn(conn);
220         }
221         write_unlock_bh(&lgr->conns_lock);
222 }
223
224 int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
225 {
226         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
227         char hostname[SMC_MAX_HOSTNAME_LEN + 1];
228         char smc_seid[SMC_MAX_EID_LEN + 1];
229         struct nlattr *attrs;
230         u8 *seid = NULL;
231         u8 *host = NULL;
232         void *nlh;
233
234         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
235                           &smc_gen_nl_family, NLM_F_MULTI,
236                           SMC_NETLINK_GET_SYS_INFO);
237         if (!nlh)
238                 goto errmsg;
239         if (cb_ctx->pos[0])
240                 goto errout;
241         attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
242         if (!attrs)
243                 goto errout;
244         if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
245                 goto errattr;
246         if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
247                 goto errattr;
248         if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
249                 goto errattr;
250         if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
251                 goto errattr;
252         smc_clc_get_hostname(&host);
253         if (host) {
254                 memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
255                 hostname[SMC_MAX_HOSTNAME_LEN] = 0;
256                 if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
257                         goto errattr;
258         }
259         if (smc_ism_is_v2_capable()) {
260                 smc_ism_get_system_eid(&seid);
261                 memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
262                 smc_seid[SMC_MAX_EID_LEN] = 0;
263                 if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
264                         goto errattr;
265         }
266         nla_nest_end(skb, attrs);
267         genlmsg_end(skb, nlh);
268         cb_ctx->pos[0] = 1;
269         return skb->len;
270
271 errattr:
272         nla_nest_cancel(skb, attrs);
273 errout:
274         genlmsg_cancel(skb, nlh);
275 errmsg:
276         return skb->len;
277 }
278
279 /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
280 static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
281                                      struct sk_buff *skb,
282                                      struct netlink_callback *cb,
283                                      struct nlattr *v2_attrs)
284 {
285         char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
286         char smc_eid[SMC_MAX_EID_LEN + 1];
287
288         if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
289                 goto errv2attr;
290         if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
291                 goto errv2attr;
292         if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
293                 goto errv2attr;
294         memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
295         smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
296         if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
297                 goto errv2attr;
298         memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
299         smc_eid[SMC_MAX_EID_LEN] = 0;
300         if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
301                 goto errv2attr;
302
303         nla_nest_end(skb, v2_attrs);
304         return 0;
305
306 errv2attr:
307         nla_nest_cancel(skb, v2_attrs);
308         return -EMSGSIZE;
309 }
310
311 static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
312                                    struct sk_buff *skb,
313                                    struct netlink_callback *cb)
314 {
315         struct nlattr *v2_attrs;
316
317         v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
318         if (!v2_attrs)
319                 goto errattr;
320         if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
321                 goto errv2attr;
322
323         nla_nest_end(skb, v2_attrs);
324         return 0;
325
326 errv2attr:
327         nla_nest_cancel(skb, v2_attrs);
328 errattr:
329         return -EMSGSIZE;
330 }
331
332 static int smc_nl_fill_lgr(struct smc_link_group *lgr,
333                            struct sk_buff *skb,
334                            struct netlink_callback *cb)
335 {
336         char smc_target[SMC_MAX_PNETID_LEN + 1];
337         struct nlattr *attrs, *v2_attrs;
338
339         attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
340         if (!attrs)
341                 goto errout;
342
343         if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
344                 goto errattr;
345         if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
346                 goto errattr;
347         if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
348                 goto errattr;
349         if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
350                 goto errattr;
351         if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
352                 goto errattr;
353         if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
354                 goto errattr;
355         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
356                               lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
357                 goto errattr;
358         memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
359         smc_target[SMC_MAX_PNETID_LEN] = 0;
360         if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
361                 goto errattr;
362         if (lgr->smc_version > SMC_V1) {
363                 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
364                 if (!v2_attrs)
365                         goto errattr;
366                 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
367                         goto errattr;
368                 if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
369                         goto errattr;
370         }
371
372         nla_nest_end(skb, attrs);
373         return 0;
374 errattr:
375         nla_nest_cancel(skb, attrs);
376 errout:
377         return -EMSGSIZE;
378 }
379
380 static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
381                                 struct smc_link *link,
382                                 struct sk_buff *skb,
383                                 struct netlink_callback *cb)
384 {
385         char smc_ibname[IB_DEVICE_NAME_MAX];
386         u8 smc_gid_target[41];
387         struct nlattr *attrs;
388         u32 link_uid = 0;
389         void *nlh;
390
391         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
392                           &smc_gen_nl_family, NLM_F_MULTI,
393                           SMC_NETLINK_GET_LINK_SMCR);
394         if (!nlh)
395                 goto errmsg;
396
397         attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
398         if (!attrs)
399                 goto errout;
400
401         if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
402                 goto errattr;
403         if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
404                 goto errattr;
405         if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
406                         atomic_read(&link->conn_cnt)))
407                 goto errattr;
408         if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
409                 goto errattr;
410         if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
411                 goto errattr;
412         snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
413         if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
414                 goto errattr;
415         memcpy(&link_uid, link->link_uid, sizeof(link_uid));
416         if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
417                 goto errattr;
418         memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
419         if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
420                 goto errattr;
421         memset(smc_gid_target, 0, sizeof(smc_gid_target));
422         smc_gid_be16_convert(smc_gid_target, link->gid);
423         if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
424                 goto errattr;
425         memset(smc_gid_target, 0, sizeof(smc_gid_target));
426         smc_gid_be16_convert(smc_gid_target, link->peer_gid);
427         if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
428                 goto errattr;
429
430         nla_nest_end(skb, attrs);
431         genlmsg_end(skb, nlh);
432         return 0;
433 errattr:
434         nla_nest_cancel(skb, attrs);
435 errout:
436         genlmsg_cancel(skb, nlh);
437 errmsg:
438         return -EMSGSIZE;
439 }
440
441 static int smc_nl_handle_lgr(struct smc_link_group *lgr,
442                              struct sk_buff *skb,
443                              struct netlink_callback *cb,
444                              bool list_links)
445 {
446         void *nlh;
447         int i;
448
449         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
450                           &smc_gen_nl_family, NLM_F_MULTI,
451                           SMC_NETLINK_GET_LGR_SMCR);
452         if (!nlh)
453                 goto errmsg;
454         if (smc_nl_fill_lgr(lgr, skb, cb))
455                 goto errout;
456
457         genlmsg_end(skb, nlh);
458         if (!list_links)
459                 goto out;
460         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
461                 if (!smc_link_usable(&lgr->lnk[i]))
462                         continue;
463                 if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
464                         goto errout;
465         }
466 out:
467         return 0;
468
469 errout:
470         genlmsg_cancel(skb, nlh);
471 errmsg:
472         return -EMSGSIZE;
473 }
474
475 static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
476                                  struct sk_buff *skb,
477                                  struct netlink_callback *cb,
478                                  bool list_links)
479 {
480         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
481         struct smc_link_group *lgr;
482         int snum = cb_ctx->pos[0];
483         int num = 0;
484
485         spin_lock_bh(&smc_lgr->lock);
486         list_for_each_entry(lgr, &smc_lgr->list, list) {
487                 if (num < snum)
488                         goto next;
489                 if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
490                         goto errout;
491 next:
492                 num++;
493         }
494 errout:
495         spin_unlock_bh(&smc_lgr->lock);
496         cb_ctx->pos[0] = num;
497 }
498
499 static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
500                                 struct sk_buff *skb,
501                                 struct netlink_callback *cb)
502 {
503         char smc_pnet[SMC_MAX_PNETID_LEN + 1];
504         struct nlattr *attrs;
505         void *nlh;
506
507         nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
508                           &smc_gen_nl_family, NLM_F_MULTI,
509                           SMC_NETLINK_GET_LGR_SMCD);
510         if (!nlh)
511                 goto errmsg;
512
513         attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
514         if (!attrs)
515                 goto errout;
516
517         if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
518                 goto errattr;
519         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
520                               SMC_NLA_LGR_D_PAD))
521                 goto errattr;
522         if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
523                               SMC_NLA_LGR_D_PAD))
524                 goto errattr;
525         if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
526                 goto errattr;
527         if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
528                 goto errattr;
529         if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
530                 goto errattr;
531         memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
532         smc_pnet[SMC_MAX_PNETID_LEN] = 0;
533         if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
534                 goto errattr;
535         if (lgr->smc_version > SMC_V1) {
536                 struct nlattr *v2_attrs;
537
538                 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
539                 if (!v2_attrs)
540                         goto errattr;
541                 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
542                         goto errattr;
543         }
544         nla_nest_end(skb, attrs);
545         genlmsg_end(skb, nlh);
546         return 0;
547
548 errattr:
549         nla_nest_cancel(skb, attrs);
550 errout:
551         genlmsg_cancel(skb, nlh);
552 errmsg:
553         return -EMSGSIZE;
554 }
555
556 static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
557                                   struct sk_buff *skb,
558                                   struct netlink_callback *cb)
559 {
560         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
561         struct smc_link_group *lgr;
562         int snum = cb_ctx->pos[1];
563         int rc = 0, num = 0;
564
565         spin_lock_bh(&dev->lgr_lock);
566         list_for_each_entry(lgr, &dev->lgr_list, list) {
567                 if (!lgr->is_smcd)
568                         continue;
569                 if (num < snum)
570                         goto next;
571                 rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
572                 if (rc)
573                         goto errout;
574 next:
575                 num++;
576         }
577 errout:
578         spin_unlock_bh(&dev->lgr_lock);
579         cb_ctx->pos[1] = num;
580         return rc;
581 }
582
583 static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
584                                 struct sk_buff *skb,
585                                 struct netlink_callback *cb)
586 {
587         struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
588         struct smcd_dev *smcd_dev;
589         int snum = cb_ctx->pos[0];
590         int rc = 0, num = 0;
591
592         mutex_lock(&dev_list->mutex);
593         list_for_each_entry(smcd_dev, &dev_list->list, list) {
594                 if (list_empty(&smcd_dev->lgr_list))
595                         continue;
596                 if (num < snum)
597                         goto next;
598                 rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
599                 if (rc)
600                         goto errout;
601 next:
602                 num++;
603         }
604 errout:
605         mutex_unlock(&dev_list->mutex);
606         cb_ctx->pos[0] = num;
607         return rc;
608 }
609
610 int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
611 {
612         bool list_links = false;
613
614         smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
615         return skb->len;
616 }
617
618 int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
619 {
620         bool list_links = true;
621
622         smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
623         return skb->len;
624 }
625
626 int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
627 {
628         smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
629         return skb->len;
630 }
631
632 void smc_lgr_cleanup_early(struct smc_link_group *lgr)
633 {
634         spinlock_t *lgr_lock;
635
636         if (!lgr)
637                 return;
638
639         smc_lgr_list_head(lgr, &lgr_lock);
640         spin_lock_bh(lgr_lock);
641         /* do not use this link group for new connections */
642         if (!list_empty(&lgr->list))
643                 list_del_init(&lgr->list);
644         spin_unlock_bh(lgr_lock);
645         __smc_lgr_terminate(lgr, true);
646 }
647
648 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
649 {
650         int i;
651
652         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
653                 struct smc_link *lnk = &lgr->lnk[i];
654
655                 if (smc_link_sendable(lnk))
656                         lnk->state = SMC_LNK_INACTIVE;
657         }
658         wake_up_all(&lgr->llc_msg_waiter);
659         wake_up_all(&lgr->llc_flow_waiter);
660 }
661
662 static void smc_lgr_free(struct smc_link_group *lgr);
663
664 static void smc_lgr_free_work(struct work_struct *work)
665 {
666         struct smc_link_group *lgr = container_of(to_delayed_work(work),
667                                                   struct smc_link_group,
668                                                   free_work);
669         spinlock_t *lgr_lock;
670         bool conns;
671
672         smc_lgr_list_head(lgr, &lgr_lock);
673         spin_lock_bh(lgr_lock);
674         if (lgr->freeing) {
675                 spin_unlock_bh(lgr_lock);
676                 return;
677         }
678         read_lock_bh(&lgr->conns_lock);
679         conns = RB_EMPTY_ROOT(&lgr->conns_all);
680         read_unlock_bh(&lgr->conns_lock);
681         if (!conns) { /* number of lgr connections is no longer zero */
682                 spin_unlock_bh(lgr_lock);
683                 return;
684         }
685         list_del_init(&lgr->list); /* remove from smc_lgr_list */
686         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
687         spin_unlock_bh(lgr_lock);
688         cancel_delayed_work(&lgr->free_work);
689
690         if (!lgr->is_smcd && !lgr->terminating)
691                 smc_llc_send_link_delete_all(lgr, true,
692                                              SMC_LLC_DEL_PROG_INIT_TERM);
693         if (lgr->is_smcd && !lgr->terminating)
694                 smc_ism_signal_shutdown(lgr);
695         if (!lgr->is_smcd)
696                 smcr_lgr_link_deactivate_all(lgr);
697         smc_lgr_free(lgr);
698 }
699
700 static void smc_lgr_terminate_work(struct work_struct *work)
701 {
702         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
703                                                   terminate_work);
704
705         __smc_lgr_terminate(lgr, true);
706 }
707
708 /* return next unique link id for the lgr */
709 static u8 smcr_next_link_id(struct smc_link_group *lgr)
710 {
711         u8 link_id;
712         int i;
713
714         while (1) {
715 again:
716                 link_id = ++lgr->next_link_id;
717                 if (!link_id)   /* skip zero as link_id */
718                         link_id = ++lgr->next_link_id;
719                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
720                         if (smc_link_usable(&lgr->lnk[i]) &&
721                             lgr->lnk[i].link_id == link_id)
722                                 goto again;
723                 }
724                 break;
725         }
726         return link_id;
727 }
728
729 static void smcr_copy_dev_info_to_link(struct smc_link *link)
730 {
731         struct smc_ib_device *smcibdev = link->smcibdev;
732
733         snprintf(link->ibname, sizeof(link->ibname), "%s",
734                  smcibdev->ibdev->name);
735         link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
736 }
737
738 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
739                    u8 link_idx, struct smc_init_info *ini)
740 {
741         struct smc_ib_device *smcibdev;
742         u8 rndvec[3];
743         int rc;
744
745         if (lgr->smc_version == SMC_V2) {
746                 lnk->smcibdev = ini->smcrv2.ib_dev_v2;
747                 lnk->ibport = ini->smcrv2.ib_port_v2;
748         } else {
749                 lnk->smcibdev = ini->ib_dev;
750                 lnk->ibport = ini->ib_port;
751         }
752         get_device(&lnk->smcibdev->ibdev->dev);
753         atomic_inc(&lnk->smcibdev->lnk_cnt);
754         refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
755         lnk->clearing = 0;
756         lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
757         lnk->link_id = smcr_next_link_id(lgr);
758         lnk->lgr = lgr;
759         smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
760         lnk->link_idx = link_idx;
761         lnk->wr_rx_id_compl = 0;
762         smc_ibdev_cnt_inc(lnk);
763         smcr_copy_dev_info_to_link(lnk);
764         atomic_set(&lnk->conn_cnt, 0);
765         smc_llc_link_set_uid(lnk);
766         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
767         if (!lnk->smcibdev->initialized) {
768                 rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
769                 if (rc)
770                         goto out;
771         }
772         get_random_bytes(rndvec, sizeof(rndvec));
773         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
774                 (rndvec[2] << 16);
775         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
776                                   ini->vlan_id, lnk->gid, &lnk->sgid_index,
777                                   lgr->smc_version == SMC_V2 ?
778                                                   &ini->smcrv2 : NULL);
779         if (rc)
780                 goto out;
781         rc = smc_llc_link_init(lnk);
782         if (rc)
783                 goto out;
784         rc = smc_wr_alloc_link_mem(lnk);
785         if (rc)
786                 goto clear_llc_lnk;
787         rc = smc_ib_create_protection_domain(lnk);
788         if (rc)
789                 goto free_link_mem;
790         rc = smc_ib_create_queue_pair(lnk);
791         if (rc)
792                 goto dealloc_pd;
793         rc = smc_wr_create_link(lnk);
794         if (rc)
795                 goto destroy_qp;
796         lnk->state = SMC_LNK_ACTIVATING;
797         return 0;
798
799 destroy_qp:
800         smc_ib_destroy_queue_pair(lnk);
801 dealloc_pd:
802         smc_ib_dealloc_protection_domain(lnk);
803 free_link_mem:
804         smc_wr_free_link_mem(lnk);
805 clear_llc_lnk:
806         smc_llc_link_clear(lnk, false);
807 out:
808         smc_ibdev_cnt_dec(lnk);
809         put_device(&lnk->smcibdev->ibdev->dev);
810         smcibdev = lnk->smcibdev;
811         memset(lnk, 0, sizeof(struct smc_link));
812         lnk->state = SMC_LNK_UNUSED;
813         if (!atomic_dec_return(&smcibdev->lnk_cnt))
814                 wake_up(&smcibdev->lnks_deleted);
815         smc_lgr_put(lgr); /* lgr_hold above */
816         return rc;
817 }
818
819 /* create a new SMC link group */
820 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
821 {
822         struct smc_link_group *lgr;
823         struct list_head *lgr_list;
824         struct smc_link *lnk;
825         spinlock_t *lgr_lock;
826         u8 link_idx;
827         int rc = 0;
828         int i;
829
830         if (ini->is_smcd && ini->vlan_id) {
831                 if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
832                                      ini->vlan_id)) {
833                         rc = SMC_CLC_DECL_ISMVLANERR;
834                         goto out;
835                 }
836         }
837
838         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
839         if (!lgr) {
840                 rc = SMC_CLC_DECL_MEM;
841                 goto ism_put_vlan;
842         }
843         lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
844                                      SMC_LGR_ID_SIZE, &lgr->id);
845         if (!lgr->tx_wq) {
846                 rc = -ENOMEM;
847                 goto free_lgr;
848         }
849         lgr->is_smcd = ini->is_smcd;
850         lgr->sync_err = 0;
851         lgr->terminating = 0;
852         lgr->freeing = 0;
853         lgr->vlan_id = ini->vlan_id;
854         refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
855         init_rwsem(&lgr->sndbufs_lock);
856         init_rwsem(&lgr->rmbs_lock);
857         rwlock_init(&lgr->conns_lock);
858         for (i = 0; i < SMC_RMBE_SIZES; i++) {
859                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
860                 INIT_LIST_HEAD(&lgr->rmbs[i]);
861         }
862         lgr->next_link_id = 0;
863         smc_lgr_list.num += SMC_LGR_NUM_INCR;
864         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
865         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
866         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
867         lgr->conns_all = RB_ROOT;
868         if (ini->is_smcd) {
869                 /* SMC-D specific settings */
870                 get_device(&ini->ism_dev[ini->ism_selected]->dev);
871                 lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
872                 lgr->smcd = ini->ism_dev[ini->ism_selected];
873                 lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
874                 lgr_lock = &lgr->smcd->lgr_lock;
875                 lgr->smc_version = ini->smcd_version;
876                 lgr->peer_shutdown = 0;
877                 atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
878         } else {
879                 /* SMC-R specific settings */
880                 struct smc_ib_device *ibdev;
881                 int ibport;
882
883                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
884                 lgr->smc_version = ini->smcr_version;
885                 memcpy(lgr->peer_systemid, ini->peer_systemid,
886                        SMC_SYSTEMID_LEN);
887                 if (lgr->smc_version == SMC_V2) {
888                         ibdev = ini->smcrv2.ib_dev_v2;
889                         ibport = ini->smcrv2.ib_port_v2;
890                         lgr->saddr = ini->smcrv2.saddr;
891                         lgr->uses_gateway = ini->smcrv2.uses_gateway;
892                         memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
893                                ETH_ALEN);
894                 } else {
895                         ibdev = ini->ib_dev;
896                         ibport = ini->ib_port;
897                 }
898                 memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
899                        SMC_MAX_PNETID_LEN);
900                 rc = smc_wr_alloc_lgr_mem(lgr);
901                 if (rc)
902                         goto free_wq;
903                 smc_llc_lgr_init(lgr, smc);
904
905                 link_idx = SMC_SINGLE_LINK;
906                 lnk = &lgr->lnk[link_idx];
907                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
908                 if (rc) {
909                         smc_wr_free_lgr_mem(lgr);
910                         goto free_wq;
911                 }
912                 lgr->net = smc_ib_net(lnk->smcibdev);
913                 lgr_list = &smc_lgr_list.list;
914                 lgr_lock = &smc_lgr_list.lock;
915                 lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
916                 atomic_inc(&lgr_cnt);
917         }
918         smc->conn.lgr = lgr;
919         spin_lock_bh(lgr_lock);
920         list_add_tail(&lgr->list, lgr_list);
921         spin_unlock_bh(lgr_lock);
922         return 0;
923
924 free_wq:
925         destroy_workqueue(lgr->tx_wq);
926 free_lgr:
927         kfree(lgr);
928 ism_put_vlan:
929         if (ini->is_smcd && ini->vlan_id)
930                 smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
931 out:
932         if (rc < 0) {
933                 if (rc == -ENOMEM)
934                         rc = SMC_CLC_DECL_MEM;
935                 else
936                         rc = SMC_CLC_DECL_INTERR;
937         }
938         return rc;
939 }
940
941 static int smc_write_space(struct smc_connection *conn)
942 {
943         int buffer_len = conn->peer_rmbe_size;
944         union smc_host_cursor prod;
945         union smc_host_cursor cons;
946         int space;
947
948         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
949         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
950         /* determine rx_buf space */
951         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
952         return space;
953 }
954
955 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
956                              struct smc_wr_buf *wr_buf)
957 {
958         struct smc_connection *conn = &smc->conn;
959         union smc_host_cursor cons, fin;
960         int rc = 0;
961         int diff;
962
963         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
964         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
965         /* set prod cursor to old state, enforce tx_rdma_writes() */
966         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
967         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
968
969         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
970                 /* cons cursor advanced more than fin, and prod was set
971                  * fin above, so now prod is smaller than cons. Fix that.
972                  */
973                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
974                 smc_curs_add(conn->sndbuf_desc->len,
975                              &conn->tx_curs_sent, diff);
976                 smc_curs_add(conn->sndbuf_desc->len,
977                              &conn->tx_curs_fin, diff);
978
979                 smp_mb__before_atomic();
980                 atomic_add(diff, &conn->sndbuf_space);
981                 smp_mb__after_atomic();
982
983                 smc_curs_add(conn->peer_rmbe_size,
984                              &conn->local_tx_ctrl.prod, diff);
985                 smc_curs_add(conn->peer_rmbe_size,
986                              &conn->local_tx_ctrl_fin, diff);
987         }
988         /* recalculate, value is used by tx_rdma_writes() */
989         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
990
991         if (smc->sk.sk_state != SMC_INIT &&
992             smc->sk.sk_state != SMC_CLOSED) {
993                 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
994                 if (!rc) {
995                         queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
996                         smc->sk.sk_data_ready(&smc->sk);
997                 }
998         } else {
999                 smc_wr_tx_put_slot(conn->lnk,
1000                                    (struct smc_wr_tx_pend_priv *)pend);
1001         }
1002         return rc;
1003 }
1004
1005 void smc_switch_link_and_count(struct smc_connection *conn,
1006                                struct smc_link *to_lnk)
1007 {
1008         atomic_dec(&conn->lnk->conn_cnt);
1009         /* link_hold in smc_conn_create() */
1010         smcr_link_put(conn->lnk);
1011         conn->lnk = to_lnk;
1012         atomic_inc(&conn->lnk->conn_cnt);
1013         /* link_put in smc_conn_free() */
1014         smcr_link_hold(conn->lnk);
1015 }
1016
1017 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
1018                                   struct smc_link *from_lnk, bool is_dev_err)
1019 {
1020         struct smc_link *to_lnk = NULL;
1021         struct smc_cdc_tx_pend *pend;
1022         struct smc_connection *conn;
1023         struct smc_wr_buf *wr_buf;
1024         struct smc_sock *smc;
1025         struct rb_node *node;
1026         int i, rc = 0;
1027
1028         /* link is inactive, wake up tx waiters */
1029         smc_wr_wakeup_tx_wait(from_lnk);
1030
1031         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1032                 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
1033                         continue;
1034                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
1035                     from_lnk->ibport == lgr->lnk[i].ibport) {
1036                         continue;
1037                 }
1038                 to_lnk = &lgr->lnk[i];
1039                 break;
1040         }
1041         if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
1042                 smc_lgr_terminate_sched(lgr);
1043                 return NULL;
1044         }
1045 again:
1046         read_lock_bh(&lgr->conns_lock);
1047         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
1048                 conn = rb_entry(node, struct smc_connection, alert_node);
1049                 if (conn->lnk != from_lnk)
1050                         continue;
1051                 smc = container_of(conn, struct smc_sock, conn);
1052                 /* conn->lnk not yet set in SMC_INIT state */
1053                 if (smc->sk.sk_state == SMC_INIT)
1054                         continue;
1055                 if (smc->sk.sk_state == SMC_CLOSED ||
1056                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
1057                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
1058                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
1059                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
1060                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
1061                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
1062                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
1063                     smc->sk.sk_state == SMC_PROCESSABORT) {
1064                         spin_lock_bh(&conn->send_lock);
1065                         smc_switch_link_and_count(conn, to_lnk);
1066                         spin_unlock_bh(&conn->send_lock);
1067                         continue;
1068                 }
1069                 sock_hold(&smc->sk);
1070                 read_unlock_bh(&lgr->conns_lock);
1071                 /* pre-fetch buffer outside of send_lock, might sleep */
1072                 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
1073                 if (rc)
1074                         goto err_out;
1075                 /* avoid race with smcr_tx_sndbuf_nonempty() */
1076                 spin_lock_bh(&conn->send_lock);
1077                 smc_switch_link_and_count(conn, to_lnk);
1078                 rc = smc_switch_cursor(smc, pend, wr_buf);
1079                 spin_unlock_bh(&conn->send_lock);
1080                 sock_put(&smc->sk);
1081                 if (rc)
1082                         goto err_out;
1083                 goto again;
1084         }
1085         read_unlock_bh(&lgr->conns_lock);
1086         smc_wr_tx_link_put(to_lnk);
1087         return to_lnk;
1088
1089 err_out:
1090         smcr_link_down_cond_sched(to_lnk);
1091         smc_wr_tx_link_put(to_lnk);
1092         return NULL;
1093 }
1094
1095 static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
1096                            struct smc_link_group *lgr)
1097 {
1098         struct rw_semaphore *lock;      /* lock buffer list */
1099         int rc;
1100
1101         if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
1102                 /* unregister rmb with peer */
1103                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
1104                 if (!rc) {
1105                         /* protect against smc_llc_cli_rkey_exchange() */
1106                         mutex_lock(&lgr->llc_conf_mutex);
1107                         smc_llc_do_delete_rkey(lgr, buf_desc);
1108                         buf_desc->is_conf_rkey = false;
1109                         mutex_unlock(&lgr->llc_conf_mutex);
1110                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1111                 }
1112         }
1113
1114         if (buf_desc->is_reg_err) {
1115                 /* buf registration failed, reuse not possible */
1116                 lock = is_rmb ? &lgr->rmbs_lock :
1117                                 &lgr->sndbufs_lock;
1118                 down_write(lock);
1119                 list_del(&buf_desc->list);
1120                 up_write(lock);
1121
1122                 smc_buf_free(lgr, is_rmb, buf_desc);
1123         } else {
1124                 /* memzero_explicit provides potential memory barrier semantics */
1125                 memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
1126                 WRITE_ONCE(buf_desc->used, 0);
1127         }
1128 }
1129
1130 static void smc_buf_unuse(struct smc_connection *conn,
1131                           struct smc_link_group *lgr)
1132 {
1133         if (conn->sndbuf_desc) {
1134                 if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
1135                         smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
1136                 } else {
1137                         memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
1138                         WRITE_ONCE(conn->sndbuf_desc->used, 0);
1139                 }
1140         }
1141         if (conn->rmb_desc) {
1142                 if (!lgr->is_smcd) {
1143                         smcr_buf_unuse(conn->rmb_desc, true, lgr);
1144                 } else {
1145                         memzero_explicit(conn->rmb_desc->cpu_addr,
1146                                          conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
1147                         WRITE_ONCE(conn->rmb_desc->used, 0);
1148                 }
1149         }
1150 }
1151
1152 /* remove a finished connection from its link group */
1153 void smc_conn_free(struct smc_connection *conn)
1154 {
1155         struct smc_link_group *lgr = conn->lgr;
1156
1157         if (!lgr || conn->freed)
1158                 /* Connection has never been registered in a
1159                  * link group, or has already been freed.
1160                  */
1161                 return;
1162
1163         conn->freed = 1;
1164         if (!smc_conn_lgr_valid(conn))
1165                 /* Connection has already unregistered from
1166                  * link group.
1167                  */
1168                 goto lgr_put;
1169
1170         if (lgr->is_smcd) {
1171                 if (!list_empty(&lgr->list))
1172                         smc_ism_unset_conn(conn);
1173                 tasklet_kill(&conn->rx_tsklet);
1174         } else {
1175                 smc_cdc_wait_pend_tx_wr(conn);
1176                 if (current_work() != &conn->abort_work)
1177                         cancel_work_sync(&conn->abort_work);
1178         }
1179         if (!list_empty(&lgr->list)) {
1180                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
1181                 smc_lgr_unregister_conn(conn);
1182         }
1183
1184         if (!lgr->conns_num)
1185                 smc_lgr_schedule_free_work(lgr);
1186 lgr_put:
1187         if (!lgr->is_smcd)
1188                 smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
1189         smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
1190 }
1191
1192 /* unregister a link from a buf_desc */
1193 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1194                                 struct smc_link *lnk)
1195 {
1196         if (is_rmb || buf_desc->is_vm)
1197                 buf_desc->is_reg_mr[lnk->link_idx] = false;
1198         if (!buf_desc->is_map_ib[lnk->link_idx])
1199                 return;
1200
1201         if ((is_rmb || buf_desc->is_vm) &&
1202             buf_desc->mr[lnk->link_idx]) {
1203                 smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
1204                 buf_desc->mr[lnk->link_idx] = NULL;
1205         }
1206         if (is_rmb)
1207                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
1208         else
1209                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
1210
1211         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1212         buf_desc->is_map_ib[lnk->link_idx] = false;
1213 }
1214
1215 /* unmap all buffers of lgr for a deleted link */
1216 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
1217 {
1218         struct smc_link_group *lgr = lnk->lgr;
1219         struct smc_buf_desc *buf_desc, *bf;
1220         int i;
1221
1222         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1223                 down_write(&lgr->rmbs_lock);
1224                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
1225                         smcr_buf_unmap_link(buf_desc, true, lnk);
1226                 up_write(&lgr->rmbs_lock);
1227
1228                 down_write(&lgr->sndbufs_lock);
1229                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
1230                                          list)
1231                         smcr_buf_unmap_link(buf_desc, false, lnk);
1232                 up_write(&lgr->sndbufs_lock);
1233         }
1234 }
1235
1236 static void smcr_rtoken_clear_link(struct smc_link *lnk)
1237 {
1238         struct smc_link_group *lgr = lnk->lgr;
1239         int i;
1240
1241         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1242                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
1243                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1244         }
1245 }
1246
1247 static void __smcr_link_clear(struct smc_link *lnk)
1248 {
1249         struct smc_link_group *lgr = lnk->lgr;
1250         struct smc_ib_device *smcibdev;
1251
1252         smc_wr_free_link_mem(lnk);
1253         smc_ibdev_cnt_dec(lnk);
1254         put_device(&lnk->smcibdev->ibdev->dev);
1255         smcibdev = lnk->smcibdev;
1256         memset(lnk, 0, sizeof(struct smc_link));
1257         lnk->state = SMC_LNK_UNUSED;
1258         if (!atomic_dec_return(&smcibdev->lnk_cnt))
1259                 wake_up(&smcibdev->lnks_deleted);
1260         smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
1261 }
1262
1263 /* must be called under lgr->llc_conf_mutex lock */
1264 void smcr_link_clear(struct smc_link *lnk, bool log)
1265 {
1266         if (!lnk->lgr || lnk->clearing ||
1267             lnk->state == SMC_LNK_UNUSED)
1268                 return;
1269         lnk->clearing = 1;
1270         lnk->peer_qpn = 0;
1271         smc_llc_link_clear(lnk, log);
1272         smcr_buf_unmap_lgr(lnk);
1273         smcr_rtoken_clear_link(lnk);
1274         smc_ib_modify_qp_error(lnk);
1275         smc_wr_free_link(lnk);
1276         smc_ib_destroy_queue_pair(lnk);
1277         smc_ib_dealloc_protection_domain(lnk);
1278         smcr_link_put(lnk); /* theoretically last link_put */
1279 }
1280
1281 void smcr_link_hold(struct smc_link *lnk)
1282 {
1283         refcount_inc(&lnk->refcnt);
1284 }
1285
1286 void smcr_link_put(struct smc_link *lnk)
1287 {
1288         if (refcount_dec_and_test(&lnk->refcnt))
1289                 __smcr_link_clear(lnk);
1290 }
1291
1292 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
1293                           struct smc_buf_desc *buf_desc)
1294 {
1295         int i;
1296
1297         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1298                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
1299
1300         if (!buf_desc->is_vm && buf_desc->pages)
1301                 __free_pages(buf_desc->pages, buf_desc->order);
1302         else if (buf_desc->is_vm && buf_desc->cpu_addr)
1303                 vfree(buf_desc->cpu_addr);
1304         kfree(buf_desc);
1305 }
1306
1307 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
1308                           struct smc_buf_desc *buf_desc)
1309 {
1310         if (is_dmb) {
1311                 /* restore original buf len */
1312                 buf_desc->len += sizeof(struct smcd_cdc_msg);
1313                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
1314         } else {
1315                 kfree(buf_desc->cpu_addr);
1316         }
1317         kfree(buf_desc);
1318 }
1319
1320 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
1321                          struct smc_buf_desc *buf_desc)
1322 {
1323         if (lgr->is_smcd)
1324                 smcd_buf_free(lgr, is_rmb, buf_desc);
1325         else
1326                 smcr_buf_free(lgr, is_rmb, buf_desc);
1327 }
1328
1329 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
1330 {
1331         struct smc_buf_desc *buf_desc, *bf_desc;
1332         struct list_head *buf_list;
1333         int i;
1334
1335         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1336                 if (is_rmb)
1337                         buf_list = &lgr->rmbs[i];
1338                 else
1339                         buf_list = &lgr->sndbufs[i];
1340                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
1341                                          list) {
1342                         list_del(&buf_desc->list);
1343                         smc_buf_free(lgr, is_rmb, buf_desc);
1344                 }
1345         }
1346 }
1347
1348 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
1349 {
1350         /* free send buffers */
1351         __smc_lgr_free_bufs(lgr, false);
1352         /* free rmbs */
1353         __smc_lgr_free_bufs(lgr, true);
1354 }
1355
1356 /* won't be freed until no one accesses to lgr anymore */
1357 static void __smc_lgr_free(struct smc_link_group *lgr)
1358 {
1359         smc_lgr_free_bufs(lgr);
1360         if (lgr->is_smcd) {
1361                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1362                         wake_up(&lgr->smcd->lgrs_deleted);
1363         } else {
1364                 smc_wr_free_lgr_mem(lgr);
1365                 if (!atomic_dec_return(&lgr_cnt))
1366                         wake_up(&lgrs_deleted);
1367         }
1368         kfree(lgr);
1369 }
1370
1371 /* remove a link group */
1372 static void smc_lgr_free(struct smc_link_group *lgr)
1373 {
1374         int i;
1375
1376         if (!lgr->is_smcd) {
1377                 mutex_lock(&lgr->llc_conf_mutex);
1378                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1379                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
1380                                 smcr_link_clear(&lgr->lnk[i], false);
1381                 }
1382                 mutex_unlock(&lgr->llc_conf_mutex);
1383                 smc_llc_lgr_clear(lgr);
1384         }
1385
1386         destroy_workqueue(lgr->tx_wq);
1387         if (lgr->is_smcd) {
1388                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
1389                 put_device(&lgr->smcd->dev);
1390         }
1391         smc_lgr_put(lgr); /* theoretically last lgr_put */
1392 }
1393
1394 void smc_lgr_hold(struct smc_link_group *lgr)
1395 {
1396         refcount_inc(&lgr->refcnt);
1397 }
1398
1399 void smc_lgr_put(struct smc_link_group *lgr)
1400 {
1401         if (refcount_dec_and_test(&lgr->refcnt))
1402                 __smc_lgr_free(lgr);
1403 }
1404
1405 static void smc_sk_wake_ups(struct smc_sock *smc)
1406 {
1407         smc->sk.sk_write_space(&smc->sk);
1408         smc->sk.sk_data_ready(&smc->sk);
1409         smc->sk.sk_state_change(&smc->sk);
1410 }
1411
1412 /* kill a connection */
1413 static void smc_conn_kill(struct smc_connection *conn, bool soft)
1414 {
1415         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1416
1417         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
1418                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
1419         else
1420                 smc_close_abort(conn);
1421         conn->killed = 1;
1422         smc->sk.sk_err = ECONNABORTED;
1423         smc_sk_wake_ups(smc);
1424         if (conn->lgr->is_smcd) {
1425                 smc_ism_unset_conn(conn);
1426                 if (soft)
1427                         tasklet_kill(&conn->rx_tsklet);
1428                 else
1429                         tasklet_unlock_wait(&conn->rx_tsklet);
1430         } else {
1431                 smc_cdc_wait_pend_tx_wr(conn);
1432         }
1433         smc_lgr_unregister_conn(conn);
1434         smc_close_active_abort(smc);
1435 }
1436
1437 static void smc_lgr_cleanup(struct smc_link_group *lgr)
1438 {
1439         if (lgr->is_smcd) {
1440                 smc_ism_signal_shutdown(lgr);
1441         } else {
1442                 u32 rsn = lgr->llc_termination_rsn;
1443
1444                 if (!rsn)
1445                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
1446                 smc_llc_send_link_delete_all(lgr, false, rsn);
1447                 smcr_lgr_link_deactivate_all(lgr);
1448         }
1449 }
1450
1451 /* terminate link group
1452  * @soft: true if link group shutdown can take its time
1453  *        false if immediate link group shutdown is required
1454  */
1455 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
1456 {
1457         struct smc_connection *conn;
1458         struct smc_sock *smc;
1459         struct rb_node *node;
1460
1461         if (lgr->terminating)
1462                 return; /* lgr already terminating */
1463         /* cancel free_work sync, will terminate when lgr->freeing is set */
1464         cancel_delayed_work(&lgr->free_work);
1465         lgr->terminating = 1;
1466
1467         /* kill remaining link group connections */
1468         read_lock_bh(&lgr->conns_lock);
1469         node = rb_first(&lgr->conns_all);
1470         while (node) {
1471                 read_unlock_bh(&lgr->conns_lock);
1472                 conn = rb_entry(node, struct smc_connection, alert_node);
1473                 smc = container_of(conn, struct smc_sock, conn);
1474                 sock_hold(&smc->sk); /* sock_put below */
1475                 lock_sock(&smc->sk);
1476                 smc_conn_kill(conn, soft);
1477                 release_sock(&smc->sk);
1478                 sock_put(&smc->sk); /* sock_hold above */
1479                 read_lock_bh(&lgr->conns_lock);
1480                 node = rb_first(&lgr->conns_all);
1481         }
1482         read_unlock_bh(&lgr->conns_lock);
1483         smc_lgr_cleanup(lgr);
1484         smc_lgr_free(lgr);
1485 }
1486
1487 /* unlink link group and schedule termination */
1488 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
1489 {
1490         spinlock_t *lgr_lock;
1491
1492         smc_lgr_list_head(lgr, &lgr_lock);
1493         spin_lock_bh(lgr_lock);
1494         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
1495                 spin_unlock_bh(lgr_lock);
1496                 return; /* lgr already terminating */
1497         }
1498         list_del_init(&lgr->list);
1499         lgr->freeing = 1;
1500         spin_unlock_bh(lgr_lock);
1501         schedule_work(&lgr->terminate_work);
1502 }
1503
1504 /* Called when peer lgr shutdown (regularly or abnormally) is received */
1505 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
1506 {
1507         struct smc_link_group *lgr, *l;
1508         LIST_HEAD(lgr_free_list);
1509
1510         /* run common cleanup function and build free list */
1511         spin_lock_bh(&dev->lgr_lock);
1512         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
1513                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
1514                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
1515                         if (peer_gid) /* peer triggered termination */
1516                                 lgr->peer_shutdown = 1;
1517                         list_move(&lgr->list, &lgr_free_list);
1518                         lgr->freeing = 1;
1519                 }
1520         }
1521         spin_unlock_bh(&dev->lgr_lock);
1522
1523         /* cancel the regular free workers and actually free lgrs */
1524         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
1525                 list_del_init(&lgr->list);
1526                 schedule_work(&lgr->terminate_work);
1527         }
1528 }
1529
1530 /* Called when an SMCD device is removed or the smc module is unloaded */
1531 void smc_smcd_terminate_all(struct smcd_dev *smcd)
1532 {
1533         struct smc_link_group *lgr, *lg;
1534         LIST_HEAD(lgr_free_list);
1535
1536         spin_lock_bh(&smcd->lgr_lock);
1537         list_splice_init(&smcd->lgr_list, &lgr_free_list);
1538         list_for_each_entry(lgr, &lgr_free_list, list)
1539                 lgr->freeing = 1;
1540         spin_unlock_bh(&smcd->lgr_lock);
1541
1542         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1543                 list_del_init(&lgr->list);
1544                 __smc_lgr_terminate(lgr, false);
1545         }
1546
1547         if (atomic_read(&smcd->lgr_cnt))
1548                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1549 }
1550
1551 /* Called when an SMCR device is removed or the smc module is unloaded.
1552  * If smcibdev is given, all SMCR link groups using this device are terminated.
1553  * If smcibdev is NULL, all SMCR link groups are terminated.
1554  */
1555 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1556 {
1557         struct smc_link_group *lgr, *lg;
1558         LIST_HEAD(lgr_free_list);
1559         int i;
1560
1561         spin_lock_bh(&smc_lgr_list.lock);
1562         if (!smcibdev) {
1563                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1564                 list_for_each_entry(lgr, &lgr_free_list, list)
1565                         lgr->freeing = 1;
1566         } else {
1567                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1568                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1569                                 if (lgr->lnk[i].smcibdev == smcibdev)
1570                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1571                         }
1572                 }
1573         }
1574         spin_unlock_bh(&smc_lgr_list.lock);
1575
1576         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1577                 list_del_init(&lgr->list);
1578                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1579                 __smc_lgr_terminate(lgr, false);
1580         }
1581
1582         if (smcibdev) {
1583                 if (atomic_read(&smcibdev->lnk_cnt))
1584                         wait_event(smcibdev->lnks_deleted,
1585                                    !atomic_read(&smcibdev->lnk_cnt));
1586         } else {
1587                 if (atomic_read(&lgr_cnt))
1588                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1589         }
1590 }
1591
1592 /* set new lgr type and clear all asymmetric link tagging */
1593 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1594 {
1595         char *lgr_type = "";
1596         int i;
1597
1598         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1599                 if (smc_link_usable(&lgr->lnk[i]))
1600                         lgr->lnk[i].link_is_asym = false;
1601         if (lgr->type == new_type)
1602                 return;
1603         lgr->type = new_type;
1604
1605         switch (lgr->type) {
1606         case SMC_LGR_NONE:
1607                 lgr_type = "NONE";
1608                 break;
1609         case SMC_LGR_SINGLE:
1610                 lgr_type = "SINGLE";
1611                 break;
1612         case SMC_LGR_SYMMETRIC:
1613                 lgr_type = "SYMMETRIC";
1614                 break;
1615         case SMC_LGR_ASYMMETRIC_PEER:
1616                 lgr_type = "ASYMMETRIC_PEER";
1617                 break;
1618         case SMC_LGR_ASYMMETRIC_LOCAL:
1619                 lgr_type = "ASYMMETRIC_LOCAL";
1620                 break;
1621         }
1622         pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
1623                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1624                             lgr->net->net_cookie, lgr_type, lgr->pnet_id);
1625 }
1626
1627 /* set new lgr type and tag a link as asymmetric */
1628 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1629                             enum smc_lgr_type new_type, int asym_lnk_idx)
1630 {
1631         smcr_lgr_set_type(lgr, new_type);
1632         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1633 }
1634
1635 /* abort connection, abort_work scheduled from tasklet context */
1636 static void smc_conn_abort_work(struct work_struct *work)
1637 {
1638         struct smc_connection *conn = container_of(work,
1639                                                    struct smc_connection,
1640                                                    abort_work);
1641         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1642
1643         lock_sock(&smc->sk);
1644         smc_conn_kill(conn, true);
1645         release_sock(&smc->sk);
1646         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1647 }
1648
1649 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1650 {
1651         struct smc_link_group *lgr, *n;
1652
1653         spin_lock_bh(&smc_lgr_list.lock);
1654         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1655                 struct smc_link *link;
1656
1657                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1658                             SMC_MAX_PNETID_LEN) ||
1659                     lgr->type == SMC_LGR_SYMMETRIC ||
1660                     lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
1661                     !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
1662                         continue;
1663
1664                 /* trigger local add link processing */
1665                 link = smc_llc_usable_link(lgr);
1666                 if (link)
1667                         smc_llc_add_link_local(link);
1668         }
1669         spin_unlock_bh(&smc_lgr_list.lock);
1670 }
1671
1672 /* link is down - switch connections to alternate link,
1673  * must be called under lgr->llc_conf_mutex lock
1674  */
1675 static void smcr_link_down(struct smc_link *lnk)
1676 {
1677         struct smc_link_group *lgr = lnk->lgr;
1678         struct smc_link *to_lnk;
1679         int del_link_id;
1680
1681         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1682                 return;
1683
1684         to_lnk = smc_switch_conns(lgr, lnk, true);
1685         if (!to_lnk) { /* no backup link available */
1686                 smcr_link_clear(lnk, true);
1687                 return;
1688         }
1689         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1690         del_link_id = lnk->link_id;
1691
1692         if (lgr->role == SMC_SERV) {
1693                 /* trigger local delete link processing */
1694                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1695         } else {
1696                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1697                         /* another llc task is ongoing */
1698                         mutex_unlock(&lgr->llc_conf_mutex);
1699                         wait_event_timeout(lgr->llc_flow_waiter,
1700                                 (list_empty(&lgr->list) ||
1701                                  lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1702                                 SMC_LLC_WAIT_TIME);
1703                         mutex_lock(&lgr->llc_conf_mutex);
1704                 }
1705                 if (!list_empty(&lgr->list)) {
1706                         smc_llc_send_delete_link(to_lnk, del_link_id,
1707                                                  SMC_LLC_REQ, true,
1708                                                  SMC_LLC_DEL_LOST_PATH);
1709                         smcr_link_clear(lnk, true);
1710                 }
1711                 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
1712         }
1713 }
1714
1715 /* must be called under lgr->llc_conf_mutex lock */
1716 void smcr_link_down_cond(struct smc_link *lnk)
1717 {
1718         if (smc_link_downing(&lnk->state)) {
1719                 trace_smcr_link_down(lnk, __builtin_return_address(0));
1720                 smcr_link_down(lnk);
1721         }
1722 }
1723
1724 /* will get the lgr->llc_conf_mutex lock */
1725 void smcr_link_down_cond_sched(struct smc_link *lnk)
1726 {
1727         if (smc_link_downing(&lnk->state)) {
1728                 trace_smcr_link_down(lnk, __builtin_return_address(0));
1729                 schedule_work(&lnk->link_down_wrk);
1730         }
1731 }
1732
1733 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1734 {
1735         struct smc_link_group *lgr, *n;
1736         int i;
1737
1738         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1739                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1740                             SMC_MAX_PNETID_LEN))
1741                         continue; /* lgr is not affected */
1742                 if (list_empty(&lgr->list))
1743                         continue;
1744                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1745                         struct smc_link *lnk = &lgr->lnk[i];
1746
1747                         if (smc_link_usable(lnk) &&
1748                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1749                                 smcr_link_down_cond_sched(lnk);
1750                 }
1751         }
1752 }
1753
1754 static void smc_link_down_work(struct work_struct *work)
1755 {
1756         struct smc_link *link = container_of(work, struct smc_link,
1757                                              link_down_wrk);
1758         struct smc_link_group *lgr = link->lgr;
1759
1760         if (list_empty(&lgr->list))
1761                 return;
1762         wake_up_all(&lgr->llc_msg_waiter);
1763         mutex_lock(&lgr->llc_conf_mutex);
1764         smcr_link_down(link);
1765         mutex_unlock(&lgr->llc_conf_mutex);
1766 }
1767
1768 static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
1769                                   struct netdev_nested_priv *priv)
1770 {
1771         unsigned short *vlan_id = (unsigned short *)priv->data;
1772
1773         if (is_vlan_dev(lower_dev)) {
1774                 *vlan_id = vlan_dev_vlan_id(lower_dev);
1775                 return 1;
1776         }
1777
1778         return 0;
1779 }
1780
1781 /* Determine vlan of internal TCP socket. */
1782 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1783 {
1784         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1785         struct netdev_nested_priv priv;
1786         struct net_device *ndev;
1787         int rc = 0;
1788
1789         ini->vlan_id = 0;
1790         if (!dst) {
1791                 rc = -ENOTCONN;
1792                 goto out;
1793         }
1794         if (!dst->dev) {
1795                 rc = -ENODEV;
1796                 goto out_rel;
1797         }
1798
1799         ndev = dst->dev;
1800         if (is_vlan_dev(ndev)) {
1801                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1802                 goto out_rel;
1803         }
1804
1805         priv.data = (void *)&ini->vlan_id;
1806         rtnl_lock();
1807         netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
1808         rtnl_unlock();
1809
1810 out_rel:
1811         dst_release(dst);
1812 out:
1813         return rc;
1814 }
1815
1816 static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
1817                            u8 peer_systemid[],
1818                            u8 peer_gid[],
1819                            u8 peer_mac_v1[],
1820                            enum smc_lgr_role role, u32 clcqpn,
1821                            struct net *net)
1822 {
1823         struct smc_link *lnk;
1824         int i;
1825
1826         if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
1827             lgr->role != role)
1828                 return false;
1829
1830         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1831                 lnk = &lgr->lnk[i];
1832
1833                 if (!smc_link_active(lnk))
1834                         continue;
1835                 /* use verbs API to check netns, instead of lgr->net */
1836                 if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
1837                         return false;
1838                 if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
1839                     !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
1840                     (smcr_version == SMC_V2 ||
1841                      !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
1842                         return true;
1843         }
1844         return false;
1845 }
1846
1847 static bool smcd_lgr_match(struct smc_link_group *lgr,
1848                            struct smcd_dev *smcismdev, u64 peer_gid)
1849 {
1850         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1851 }
1852
1853 /* create a new SMC connection (and a new link group if necessary) */
1854 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1855 {
1856         struct smc_connection *conn = &smc->conn;
1857         struct net *net = sock_net(&smc->sk);
1858         struct list_head *lgr_list;
1859         struct smc_link_group *lgr;
1860         enum smc_lgr_role role;
1861         spinlock_t *lgr_lock;
1862         int rc = 0;
1863
1864         lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
1865                                   &smc_lgr_list.list;
1866         lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
1867                                   &smc_lgr_list.lock;
1868         ini->first_contact_local = 1;
1869         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1870         if (role == SMC_CLNT && ini->first_contact_peer)
1871                 /* create new link group as well */
1872                 goto create;
1873
1874         /* determine if an existing link group can be reused */
1875         spin_lock_bh(lgr_lock);
1876         list_for_each_entry(lgr, lgr_list, list) {
1877                 write_lock_bh(&lgr->conns_lock);
1878                 if ((ini->is_smcd ?
1879                      smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
1880                                     ini->ism_peer_gid[ini->ism_selected]) :
1881                      smcr_lgr_match(lgr, ini->smcr_version,
1882                                     ini->peer_systemid,
1883                                     ini->peer_gid, ini->peer_mac, role,
1884                                     ini->ib_clcqpn, net)) &&
1885                     !lgr->sync_err &&
1886                     (ini->smcd_version == SMC_V2 ||
1887                      lgr->vlan_id == ini->vlan_id) &&
1888                     (role == SMC_CLNT || ini->is_smcd ||
1889                     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
1890                       !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
1891                         /* link group found */
1892                         ini->first_contact_local = 0;
1893                         conn->lgr = lgr;
1894                         rc = smc_lgr_register_conn(conn, false);
1895                         write_unlock_bh(&lgr->conns_lock);
1896                         if (!rc && delayed_work_pending(&lgr->free_work))
1897                                 cancel_delayed_work(&lgr->free_work);
1898                         break;
1899                 }
1900                 write_unlock_bh(&lgr->conns_lock);
1901         }
1902         spin_unlock_bh(lgr_lock);
1903         if (rc)
1904                 return rc;
1905
1906         if (role == SMC_CLNT && !ini->first_contact_peer &&
1907             ini->first_contact_local) {
1908                 /* Server reuses a link group, but Client wants to start
1909                  * a new one
1910                  * send out_of_sync decline, reason synchr. error
1911                  */
1912                 return SMC_CLC_DECL_SYNCERR;
1913         }
1914
1915 create:
1916         if (ini->first_contact_local) {
1917                 rc = smc_lgr_create(smc, ini);
1918                 if (rc)
1919                         goto out;
1920                 lgr = conn->lgr;
1921                 write_lock_bh(&lgr->conns_lock);
1922                 rc = smc_lgr_register_conn(conn, true);
1923                 write_unlock_bh(&lgr->conns_lock);
1924                 if (rc) {
1925                         smc_lgr_cleanup_early(lgr);
1926                         goto out;
1927                 }
1928         }
1929         smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
1930         if (!conn->lgr->is_smcd)
1931                 smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
1932         conn->freed = 0;
1933         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1934         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1935         conn->urg_state = SMC_URG_READ;
1936         init_waitqueue_head(&conn->cdc_pend_tx_wq);
1937         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1938         if (ini->is_smcd) {
1939                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1940                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1941         } else {
1942                 conn->rx_off = 0;
1943         }
1944 #ifndef KERNEL_HAS_ATOMIC64
1945         spin_lock_init(&conn->acurs_lock);
1946 #endif
1947
1948 out:
1949         return rc;
1950 }
1951
1952 #define SMCD_DMBE_SIZES         6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1953 #define SMCR_RMBE_SIZES         5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
1954
1955 /* convert the RMB size into the compressed notation (minimum 16K, see
1956  * SMCD/R_DMBE_SIZES.
1957  * In contrast to plain ilog2, this rounds towards the next power of 2,
1958  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1959  */
1960 static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
1961 {
1962         const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
1963         u8 compressed;
1964
1965         if (size <= SMC_BUF_MIN_SIZE)
1966                 return 0;
1967
1968         size = (size - 1) >> 14;  /* convert to 16K multiple */
1969         compressed = min_t(u8, ilog2(size) + 1,
1970                            is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
1971
1972         if (!is_smcd && is_rmb)
1973                 /* RMBs are backed by & limited to max size of scatterlists */
1974                 compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
1975
1976         return compressed;
1977 }
1978
1979 /* convert the RMB size from compressed notation into integer */
1980 int smc_uncompress_bufsize(u8 compressed)
1981 {
1982         u32 size;
1983
1984         size = 0x00000001 << (((int)compressed) + 14);
1985         return (int)size;
1986 }
1987
1988 /* try to reuse a sndbuf or rmb description slot for a certain
1989  * buffer size; if not available, return NULL
1990  */
1991 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1992                                              struct rw_semaphore *lock,
1993                                              struct list_head *buf_list)
1994 {
1995         struct smc_buf_desc *buf_slot;
1996
1997         down_read(lock);
1998         list_for_each_entry(buf_slot, buf_list, list) {
1999                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
2000                         up_read(lock);
2001                         return buf_slot;
2002                 }
2003         }
2004         up_read(lock);
2005         return NULL;
2006 }
2007
2008 /* one of the conditions for announcing a receiver's current window size is
2009  * that it "results in a minimum increase in the window size of 10% of the
2010  * receive buffer space" [RFC7609]
2011  */
2012 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
2013 {
2014         return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
2015 }
2016
2017 /* map an buf to a link */
2018 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
2019                              struct smc_link *lnk)
2020 {
2021         int rc, i, nents, offset, buf_size, size, access_flags;
2022         struct scatterlist *sg;
2023         void *buf;
2024
2025         if (buf_desc->is_map_ib[lnk->link_idx])
2026                 return 0;
2027
2028         if (buf_desc->is_vm) {
2029                 buf = buf_desc->cpu_addr;
2030                 buf_size = buf_desc->len;
2031                 offset = offset_in_page(buf_desc->cpu_addr);
2032                 nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
2033         } else {
2034                 nents = 1;
2035         }
2036
2037         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
2038         if (rc)
2039                 return rc;
2040
2041         if (buf_desc->is_vm) {
2042                 /* virtually contiguous buffer */
2043                 for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
2044                         size = min_t(int, PAGE_SIZE - offset, buf_size);
2045                         sg_set_page(sg, vmalloc_to_page(buf), size, offset);
2046                         buf += size / sizeof(*buf);
2047                         buf_size -= size;
2048                         offset = 0;
2049                 }
2050         } else {
2051                 /* physically contiguous buffer */
2052                 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
2053                            buf_desc->cpu_addr, buf_desc->len);
2054         }
2055
2056         /* map sg table to DMA address */
2057         rc = smc_ib_buf_map_sg(lnk, buf_desc,
2058                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2059         /* SMC protocol depends on mapping to one DMA address only */
2060         if (rc != nents) {
2061                 rc = -EAGAIN;
2062                 goto free_table;
2063         }
2064
2065         buf_desc->is_dma_need_sync |=
2066                 smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
2067
2068         if (is_rmb || buf_desc->is_vm) {
2069                 /* create a new memory region for the RMB or vzalloced sndbuf */
2070                 access_flags = is_rmb ?
2071                                IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
2072                                IB_ACCESS_LOCAL_WRITE;
2073
2074                 rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
2075                                               buf_desc, lnk->link_idx);
2076                 if (rc)
2077                         goto buf_unmap;
2078                 smc_ib_sync_sg_for_device(lnk, buf_desc,
2079                                           is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2080         }
2081         buf_desc->is_map_ib[lnk->link_idx] = true;
2082         return 0;
2083
2084 buf_unmap:
2085         smc_ib_buf_unmap_sg(lnk, buf_desc,
2086                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
2087 free_table:
2088         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
2089         return rc;
2090 }
2091
2092 /* register a new buf on IB device, rmb or vzalloced sndbuf
2093  * must be called under lgr->llc_conf_mutex lock
2094  */
2095 int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
2096 {
2097         if (list_empty(&link->lgr->list))
2098                 return -ENOLINK;
2099         if (!buf_desc->is_reg_mr[link->link_idx]) {
2100                 /* register memory region for new buf */
2101                 if (buf_desc->is_vm)
2102                         buf_desc->mr[link->link_idx]->iova =
2103                                 (uintptr_t)buf_desc->cpu_addr;
2104                 if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
2105                         buf_desc->is_reg_err = true;
2106                         return -EFAULT;
2107                 }
2108                 buf_desc->is_reg_mr[link->link_idx] = true;
2109         }
2110         return 0;
2111 }
2112
2113 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
2114                              struct list_head *lst, bool is_rmb)
2115 {
2116         struct smc_buf_desc *buf_desc, *bf;
2117         int rc = 0;
2118
2119         down_write(lock);
2120         list_for_each_entry_safe(buf_desc, bf, lst, list) {
2121                 if (!buf_desc->used)
2122                         continue;
2123                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
2124                 if (rc)
2125                         goto out;
2126         }
2127 out:
2128         up_write(lock);
2129         return rc;
2130 }
2131
2132 /* map all used buffers of lgr for a new link */
2133 int smcr_buf_map_lgr(struct smc_link *lnk)
2134 {
2135         struct smc_link_group *lgr = lnk->lgr;
2136         int i, rc = 0;
2137
2138         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2139                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
2140                                        &lgr->rmbs[i], true);
2141                 if (rc)
2142                         return rc;
2143                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
2144                                        &lgr->sndbufs[i], false);
2145                 if (rc)
2146                         return rc;
2147         }
2148         return 0;
2149 }
2150
2151 /* register all used buffers of lgr for a new link,
2152  * must be called under lgr->llc_conf_mutex lock
2153  */
2154 int smcr_buf_reg_lgr(struct smc_link *lnk)
2155 {
2156         struct smc_link_group *lgr = lnk->lgr;
2157         struct smc_buf_desc *buf_desc, *bf;
2158         int i, rc = 0;
2159
2160         /* reg all RMBs for a new link */
2161         down_write(&lgr->rmbs_lock);
2162         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2163                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
2164                         if (!buf_desc->used)
2165                                 continue;
2166                         rc = smcr_link_reg_buf(lnk, buf_desc);
2167                         if (rc) {
2168                                 up_write(&lgr->rmbs_lock);
2169                                 return rc;
2170                         }
2171                 }
2172         }
2173         up_write(&lgr->rmbs_lock);
2174
2175         if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2176                 return rc;
2177
2178         /* reg all vzalloced sndbufs for a new link */
2179         down_write(&lgr->sndbufs_lock);
2180         for (i = 0; i < SMC_RMBE_SIZES; i++) {
2181                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
2182                         if (!buf_desc->used || !buf_desc->is_vm)
2183                                 continue;
2184                         rc = smcr_link_reg_buf(lnk, buf_desc);
2185                         if (rc) {
2186                                 up_write(&lgr->sndbufs_lock);
2187                                 return rc;
2188                         }
2189                 }
2190         }
2191         up_write(&lgr->sndbufs_lock);
2192         return rc;
2193 }
2194
2195 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
2196                                                 bool is_rmb, int bufsize)
2197 {
2198         struct smc_buf_desc *buf_desc;
2199
2200         /* try to alloc a new buffer */
2201         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2202         if (!buf_desc)
2203                 return ERR_PTR(-ENOMEM);
2204
2205         switch (lgr->buf_type) {
2206         case SMCR_PHYS_CONT_BUFS:
2207         case SMCR_MIXED_BUFS:
2208                 buf_desc->order = get_order(bufsize);
2209                 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
2210                                               __GFP_NOMEMALLOC | __GFP_COMP |
2211                                               __GFP_NORETRY | __GFP_ZERO,
2212                                               buf_desc->order);
2213                 if (buf_desc->pages) {
2214                         buf_desc->cpu_addr =
2215                                 (void *)page_address(buf_desc->pages);
2216                         buf_desc->len = bufsize;
2217                         buf_desc->is_vm = false;
2218                         break;
2219                 }
2220                 if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
2221                         goto out;
2222                 fallthrough;    // try virtually continguous buf
2223         case SMCR_VIRT_CONT_BUFS:
2224                 buf_desc->order = get_order(bufsize);
2225                 buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
2226                 if (!buf_desc->cpu_addr)
2227                         goto out;
2228                 buf_desc->pages = NULL;
2229                 buf_desc->len = bufsize;
2230                 buf_desc->is_vm = true;
2231                 break;
2232         }
2233         return buf_desc;
2234
2235 out:
2236         kfree(buf_desc);
2237         return ERR_PTR(-EAGAIN);
2238 }
2239
2240 /* map buf_desc on all usable links,
2241  * unused buffers stay mapped as long as the link is up
2242  */
2243 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
2244                                      struct smc_buf_desc *buf_desc, bool is_rmb)
2245 {
2246         int i, rc = 0, cnt = 0;
2247
2248         /* protect against parallel link reconfiguration */
2249         mutex_lock(&lgr->llc_conf_mutex);
2250         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2251                 struct smc_link *lnk = &lgr->lnk[i];
2252
2253                 if (!smc_link_usable(lnk))
2254                         continue;
2255                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
2256                         rc = -ENOMEM;
2257                         goto out;
2258                 }
2259                 cnt++;
2260         }
2261 out:
2262         mutex_unlock(&lgr->llc_conf_mutex);
2263         if (!rc && !cnt)
2264                 rc = -EINVAL;
2265         return rc;
2266 }
2267
2268 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
2269                                                 bool is_dmb, int bufsize)
2270 {
2271         struct smc_buf_desc *buf_desc;
2272         int rc;
2273
2274         /* try to alloc a new DMB */
2275         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2276         if (!buf_desc)
2277                 return ERR_PTR(-ENOMEM);
2278         if (is_dmb) {
2279                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
2280                 if (rc) {
2281                         kfree(buf_desc);
2282                         if (rc == -ENOMEM)
2283                                 return ERR_PTR(-EAGAIN);
2284                         if (rc == -ENOSPC)
2285                                 return ERR_PTR(-ENOSPC);
2286                         return ERR_PTR(-EIO);
2287                 }
2288                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
2289                 /* CDC header stored in buf. So, pretend it was smaller */
2290                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
2291         } else {
2292                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
2293                                              __GFP_NOWARN | __GFP_NORETRY |
2294                                              __GFP_NOMEMALLOC);
2295                 if (!buf_desc->cpu_addr) {
2296                         kfree(buf_desc);
2297                         return ERR_PTR(-EAGAIN);
2298                 }
2299                 buf_desc->len = bufsize;
2300         }
2301         return buf_desc;
2302 }
2303
2304 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
2305 {
2306         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
2307         struct smc_connection *conn = &smc->conn;
2308         struct smc_link_group *lgr = conn->lgr;
2309         struct list_head *buf_list;
2310         int bufsize, bufsize_comp;
2311         struct rw_semaphore *lock;      /* lock buffer list */
2312         bool is_dgraded = false;
2313
2314         if (is_rmb)
2315                 /* use socket recv buffer size (w/o overhead) as start value */
2316                 bufsize = smc->sk.sk_rcvbuf / 2;
2317         else
2318                 /* use socket send buffer size (w/o overhead) as start value */
2319                 bufsize = smc->sk.sk_sndbuf / 2;
2320
2321         for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
2322              bufsize_comp >= 0; bufsize_comp--) {
2323                 if (is_rmb) {
2324                         lock = &lgr->rmbs_lock;
2325                         buf_list = &lgr->rmbs[bufsize_comp];
2326                 } else {
2327                         lock = &lgr->sndbufs_lock;
2328                         buf_list = &lgr->sndbufs[bufsize_comp];
2329                 }
2330                 bufsize = smc_uncompress_bufsize(bufsize_comp);
2331
2332                 /* check for reusable slot in the link group */
2333                 buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
2334                 if (buf_desc) {
2335                         buf_desc->is_dma_need_sync = 0;
2336                         SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2337                         SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
2338                         break; /* found reusable slot */
2339                 }
2340
2341                 if (is_smcd)
2342                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
2343                 else
2344                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
2345
2346                 if (PTR_ERR(buf_desc) == -ENOMEM)
2347                         break;
2348                 if (IS_ERR(buf_desc)) {
2349                         if (!is_dgraded) {
2350                                 is_dgraded = true;
2351                                 SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
2352                         }
2353                         continue;
2354                 }
2355
2356                 SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
2357                 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2358                 buf_desc->used = 1;
2359                 down_write(lock);
2360                 list_add(&buf_desc->list, buf_list);
2361                 up_write(lock);
2362                 break; /* found */
2363         }
2364
2365         if (IS_ERR(buf_desc))
2366                 return PTR_ERR(buf_desc);
2367
2368         if (!is_smcd) {
2369                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
2370                         smcr_buf_unuse(buf_desc, is_rmb, lgr);
2371                         return -ENOMEM;
2372                 }
2373         }
2374
2375         if (is_rmb) {
2376                 conn->rmb_desc = buf_desc;
2377                 conn->rmbe_size_comp = bufsize_comp;
2378                 smc->sk.sk_rcvbuf = bufsize * 2;
2379                 atomic_set(&conn->bytes_to_rcv, 0);
2380                 conn->rmbe_update_limit =
2381                         smc_rmb_wnd_update_limit(buf_desc->len);
2382                 if (is_smcd)
2383                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
2384         } else {
2385                 conn->sndbuf_desc = buf_desc;
2386                 smc->sk.sk_sndbuf = bufsize * 2;
2387                 atomic_set(&conn->sndbuf_space, bufsize);
2388         }
2389         return 0;
2390 }
2391
2392 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
2393 {
2394         if (!conn->sndbuf_desc->is_dma_need_sync)
2395                 return;
2396         if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
2397             !smc_link_active(conn->lnk))
2398                 return;
2399         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
2400 }
2401
2402 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
2403 {
2404         int i;
2405
2406         if (!conn->rmb_desc->is_dma_need_sync)
2407                 return;
2408         if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
2409                 return;
2410         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2411                 if (!smc_link_active(&conn->lgr->lnk[i]))
2412                         continue;
2413                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
2414                                        DMA_FROM_DEVICE);
2415         }
2416 }
2417
2418 /* create the send and receive buffer for an SMC socket;
2419  * receive buffers are called RMBs;
2420  * (even though the SMC protocol allows more than one RMB-element per RMB,
2421  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2422  * extra RMB for every connection in a link group
2423  */
2424 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
2425 {
2426         int rc;
2427
2428         /* create send buffer */
2429         rc = __smc_buf_create(smc, is_smcd, false);
2430         if (rc)
2431                 return rc;
2432         /* create rmb */
2433         rc = __smc_buf_create(smc, is_smcd, true);
2434         if (rc) {
2435                 down_write(&smc->conn.lgr->sndbufs_lock);
2436                 list_del(&smc->conn.sndbuf_desc->list);
2437                 up_write(&smc->conn.lgr->sndbufs_lock);
2438                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
2439                 smc->conn.sndbuf_desc = NULL;
2440         }
2441         return rc;
2442 }
2443
2444 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
2445 {
2446         int i;
2447
2448         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
2449                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
2450                         return i;
2451         }
2452         return -ENOSPC;
2453 }
2454
2455 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
2456                                    u32 rkey)
2457 {
2458         int i;
2459
2460         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2461                 if (test_bit(i, lgr->rtokens_used_mask) &&
2462                     lgr->rtokens[i][lnk_idx].rkey == rkey)
2463                         return i;
2464         }
2465         return -ENOENT;
2466 }
2467
2468 /* set rtoken for a new link to an existing rmb */
2469 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
2470                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
2471 {
2472         int rtok_idx;
2473
2474         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
2475         if (rtok_idx == -ENOENT)
2476                 return;
2477         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
2478         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
2479 }
2480
2481 /* set rtoken for a new link whose link_id is given */
2482 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
2483                      __be64 nw_vaddr, __be32 nw_rkey)
2484 {
2485         u64 dma_addr = be64_to_cpu(nw_vaddr);
2486         u32 rkey = ntohl(nw_rkey);
2487         bool found = false;
2488         int link_idx;
2489
2490         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
2491                 if (lgr->lnk[link_idx].link_id == link_id) {
2492                         found = true;
2493                         break;
2494                 }
2495         }
2496         if (!found)
2497                 return;
2498         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
2499         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
2500 }
2501
2502 /* add a new rtoken from peer */
2503 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
2504 {
2505         struct smc_link_group *lgr = smc_get_lgr(lnk);
2506         u64 dma_addr = be64_to_cpu(nw_vaddr);
2507         u32 rkey = ntohl(nw_rkey);
2508         int i;
2509
2510         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2511                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2512                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
2513                     test_bit(i, lgr->rtokens_used_mask)) {
2514                         /* already in list */
2515                         return i;
2516                 }
2517         }
2518         i = smc_rmb_reserve_rtoken_idx(lgr);
2519         if (i < 0)
2520                 return i;
2521         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
2522         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
2523         return i;
2524 }
2525
2526 /* delete an rtoken from all links */
2527 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
2528 {
2529         struct smc_link_group *lgr = smc_get_lgr(lnk);
2530         u32 rkey = ntohl(nw_rkey);
2531         int i, j;
2532
2533         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2534                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2535                     test_bit(i, lgr->rtokens_used_mask)) {
2536                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
2537                                 lgr->rtokens[i][j].rkey = 0;
2538                                 lgr->rtokens[i][j].dma_addr = 0;
2539                         }
2540                         clear_bit(i, lgr->rtokens_used_mask);
2541                         return 0;
2542                 }
2543         }
2544         return -ENOENT;
2545 }
2546
2547 /* save rkey and dma_addr received from peer during clc handshake */
2548 int smc_rmb_rtoken_handling(struct smc_connection *conn,
2549                             struct smc_link *lnk,
2550                             struct smc_clc_msg_accept_confirm *clc)
2551 {
2552         conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
2553                                           clc->r0.rmb_rkey);
2554         if (conn->rtoken_idx < 0)
2555                 return conn->rtoken_idx;
2556         return 0;
2557 }
2558
2559 static void smc_core_going_away(void)
2560 {
2561         struct smc_ib_device *smcibdev;
2562         struct smcd_dev *smcd;
2563
2564         mutex_lock(&smc_ib_devices.mutex);
2565         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
2566                 int i;
2567
2568                 for (i = 0; i < SMC_MAX_PORTS; i++)
2569                         set_bit(i, smcibdev->ports_going_away);
2570         }
2571         mutex_unlock(&smc_ib_devices.mutex);
2572
2573         mutex_lock(&smcd_dev_list.mutex);
2574         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2575                 smcd->going_away = 1;
2576         }
2577         mutex_unlock(&smcd_dev_list.mutex);
2578 }
2579
2580 /* Clean up all SMC link groups */
2581 static void smc_lgrs_shutdown(void)
2582 {
2583         struct smcd_dev *smcd;
2584
2585         smc_core_going_away();
2586
2587         smc_smcr_terminate_all(NULL);
2588
2589         mutex_lock(&smcd_dev_list.mutex);
2590         list_for_each_entry(smcd, &smcd_dev_list.list, list)
2591                 smc_smcd_terminate_all(smcd);
2592         mutex_unlock(&smcd_dev_list.mutex);
2593 }
2594
2595 static int smc_core_reboot_event(struct notifier_block *this,
2596                                  unsigned long event, void *ptr)
2597 {
2598         smc_lgrs_shutdown();
2599         smc_ib_unregister_client();
2600         return 0;
2601 }
2602
2603 static struct notifier_block smc_reboot_notifier = {
2604         .notifier_call = smc_core_reboot_event,
2605 };
2606
2607 int __init smc_core_init(void)
2608 {
2609         return register_reboot_notifier(&smc_reboot_notifier);
2610 }
2611
2612 /* Called (from smc_exit) when module is removed */
2613 void smc_core_exit(void)
2614 {
2615         unregister_reboot_notifier(&smc_reboot_notifier);
2616         smc_lgrs_shutdown();
2617 }