GNU Linux-libre 6.1.90-gnu
[releases.git] / net / smc / smc_close.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Socket Closing - normal and abnormal
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11
12 #include <linux/workqueue.h>
13 #include <linux/sched/signal.h>
14
15 #include <net/sock.h>
16 #include <net/tcp.h>
17
18 #include "smc.h"
19 #include "smc_tx.h"
20 #include "smc_cdc.h"
21 #include "smc_close.h"
22
23 /* release the clcsock that is assigned to the smc_sock */
24 void smc_clcsock_release(struct smc_sock *smc)
25 {
26         struct socket *tcp;
27
28         if (smc->listen_smc && current_work() != &smc->smc_listen_work)
29                 cancel_work_sync(&smc->smc_listen_work);
30         mutex_lock(&smc->clcsock_release_lock);
31         if (smc->clcsock) {
32                 tcp = smc->clcsock;
33                 smc->clcsock = NULL;
34                 sock_release(tcp);
35         }
36         mutex_unlock(&smc->clcsock_release_lock);
37 }
38
39 static void smc_close_cleanup_listen(struct sock *parent)
40 {
41         struct sock *sk;
42
43         /* Close non-accepted connections */
44         while ((sk = smc_accept_dequeue(parent, NULL)))
45                 smc_close_non_accepted(sk);
46 }
47
48 /* wait for sndbuf data being transmitted */
49 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
50 {
51         DEFINE_WAIT_FUNC(wait, woken_wake_function);
52         struct sock *sk = &smc->sk;
53
54         if (!timeout)
55                 return;
56
57         if (!smc_tx_prepared_sends(&smc->conn))
58                 return;
59
60         /* Send out corked data remaining in sndbuf */
61         smc_tx_pending(&smc->conn);
62
63         smc->wait_close_tx_prepared = 1;
64         add_wait_queue(sk_sleep(sk), &wait);
65         while (!signal_pending(current) && timeout) {
66                 int rc;
67
68                 rc = sk_wait_event(sk, &timeout,
69                                    !smc_tx_prepared_sends(&smc->conn) ||
70                                    READ_ONCE(sk->sk_err) == ECONNABORTED ||
71                                    READ_ONCE(sk->sk_err) == ECONNRESET ||
72                                    smc->conn.killed,
73                                    &wait);
74                 if (rc)
75                         break;
76         }
77         remove_wait_queue(sk_sleep(sk), &wait);
78         smc->wait_close_tx_prepared = 0;
79 }
80
81 void smc_close_wake_tx_prepared(struct smc_sock *smc)
82 {
83         if (smc->wait_close_tx_prepared)
84                 /* wake up socket closing */
85                 smc->sk.sk_state_change(&smc->sk);
86 }
87
88 static int smc_close_wr(struct smc_connection *conn)
89 {
90         conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
91
92         return smc_cdc_get_slot_and_msg_send(conn);
93 }
94
95 static int smc_close_final(struct smc_connection *conn)
96 {
97         if (atomic_read(&conn->bytes_to_rcv))
98                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
99         else
100                 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
101         if (conn->killed)
102                 return -EPIPE;
103
104         return smc_cdc_get_slot_and_msg_send(conn);
105 }
106
107 int smc_close_abort(struct smc_connection *conn)
108 {
109         conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
110
111         return smc_cdc_get_slot_and_msg_send(conn);
112 }
113
114 static void smc_close_cancel_work(struct smc_sock *smc)
115 {
116         struct sock *sk = &smc->sk;
117
118         release_sock(sk);
119         if (cancel_work_sync(&smc->conn.close_work))
120                 sock_put(sk);
121         cancel_delayed_work_sync(&smc->conn.tx_work);
122         lock_sock(sk);
123 }
124
125 /* terminate smc socket abnormally - active abort
126  * link group is terminated, i.e. RDMA communication no longer possible
127  */
128 void smc_close_active_abort(struct smc_sock *smc)
129 {
130         struct sock *sk = &smc->sk;
131         bool release_clcsock = false;
132
133         if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
134                 sk->sk_err = ECONNABORTED;
135                 if (smc->clcsock && smc->clcsock->sk)
136                         tcp_abort(smc->clcsock->sk, ECONNABORTED);
137         }
138         switch (sk->sk_state) {
139         case SMC_ACTIVE:
140         case SMC_APPCLOSEWAIT1:
141         case SMC_APPCLOSEWAIT2:
142                 sk->sk_state = SMC_PEERABORTWAIT;
143                 smc_close_cancel_work(smc);
144                 if (sk->sk_state != SMC_PEERABORTWAIT)
145                         break;
146                 sk->sk_state = SMC_CLOSED;
147                 sock_put(sk); /* (postponed) passive closing */
148                 break;
149         case SMC_PEERCLOSEWAIT1:
150         case SMC_PEERCLOSEWAIT2:
151         case SMC_PEERFINCLOSEWAIT:
152                 sk->sk_state = SMC_PEERABORTWAIT;
153                 smc_close_cancel_work(smc);
154                 if (sk->sk_state != SMC_PEERABORTWAIT)
155                         break;
156                 sk->sk_state = SMC_CLOSED;
157                 smc_conn_free(&smc->conn);
158                 release_clcsock = true;
159                 sock_put(sk); /* passive closing */
160                 break;
161         case SMC_PROCESSABORT:
162         case SMC_APPFINCLOSEWAIT:
163                 sk->sk_state = SMC_PEERABORTWAIT;
164                 smc_close_cancel_work(smc);
165                 if (sk->sk_state != SMC_PEERABORTWAIT)
166                         break;
167                 sk->sk_state = SMC_CLOSED;
168                 smc_conn_free(&smc->conn);
169                 release_clcsock = true;
170                 break;
171         case SMC_INIT:
172         case SMC_PEERABORTWAIT:
173         case SMC_CLOSED:
174                 break;
175         }
176
177         smc_sock_set_flag(sk, SOCK_DEAD);
178         sk->sk_state_change(sk);
179
180         if (release_clcsock) {
181                 release_sock(sk);
182                 smc_clcsock_release(smc);
183                 lock_sock(sk);
184         }
185 }
186
187 static inline bool smc_close_sent_any_close(struct smc_connection *conn)
188 {
189         return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
190                conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
191 }
192
193 int smc_close_active(struct smc_sock *smc)
194 {
195         struct smc_cdc_conn_state_flags *txflags =
196                 &smc->conn.local_tx_ctrl.conn_state_flags;
197         struct smc_connection *conn = &smc->conn;
198         struct sock *sk = &smc->sk;
199         int old_state;
200         long timeout;
201         int rc = 0;
202         int rc1 = 0;
203
204         timeout = current->flags & PF_EXITING ?
205                   0 : sock_flag(sk, SOCK_LINGER) ?
206                       sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
207
208         old_state = sk->sk_state;
209 again:
210         switch (sk->sk_state) {
211         case SMC_INIT:
212                 sk->sk_state = SMC_CLOSED;
213                 break;
214         case SMC_LISTEN:
215                 sk->sk_state = SMC_CLOSED;
216                 sk->sk_state_change(sk); /* wake up accept */
217                 if (smc->clcsock && smc->clcsock->sk) {
218                         write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
219                         smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
220                                                &smc->clcsk_data_ready);
221                         smc->clcsock->sk->sk_user_data = NULL;
222                         write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
223                         rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
224                 }
225                 smc_close_cleanup_listen(sk);
226                 release_sock(sk);
227                 flush_work(&smc->tcp_listen_work);
228                 lock_sock(sk);
229                 break;
230         case SMC_ACTIVE:
231                 smc_close_stream_wait(smc, timeout);
232                 release_sock(sk);
233                 cancel_delayed_work_sync(&conn->tx_work);
234                 lock_sock(sk);
235                 if (sk->sk_state == SMC_ACTIVE) {
236                         /* send close request */
237                         rc = smc_close_final(conn);
238                         sk->sk_state = SMC_PEERCLOSEWAIT1;
239
240                         /* actively shutdown clcsock before peer close it,
241                          * prevent peer from entering TIME_WAIT state.
242                          */
243                         if (smc->clcsock && smc->clcsock->sk) {
244                                 rc1 = kernel_sock_shutdown(smc->clcsock,
245                                                            SHUT_RDWR);
246                                 rc = rc ? rc : rc1;
247                         }
248                 } else {
249                         /* peer event has changed the state */
250                         goto again;
251                 }
252                 break;
253         case SMC_APPFINCLOSEWAIT:
254                 /* socket already shutdown wr or both (active close) */
255                 if (txflags->peer_done_writing &&
256                     !smc_close_sent_any_close(conn)) {
257                         /* just shutdown wr done, send close request */
258                         rc = smc_close_final(conn);
259                 }
260                 sk->sk_state = SMC_CLOSED;
261                 break;
262         case SMC_APPCLOSEWAIT1:
263         case SMC_APPCLOSEWAIT2:
264                 if (!smc_cdc_rxed_any_close(conn))
265                         smc_close_stream_wait(smc, timeout);
266                 release_sock(sk);
267                 cancel_delayed_work_sync(&conn->tx_work);
268                 lock_sock(sk);
269                 if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
270                     sk->sk_state != SMC_APPCLOSEWAIT2)
271                         goto again;
272                 /* confirm close from peer */
273                 rc = smc_close_final(conn);
274                 if (smc_cdc_rxed_any_close(conn)) {
275                         /* peer has closed the socket already */
276                         sk->sk_state = SMC_CLOSED;
277                         sock_put(sk); /* postponed passive closing */
278                 } else {
279                         /* peer has just issued a shutdown write */
280                         sk->sk_state = SMC_PEERFINCLOSEWAIT;
281                 }
282                 break;
283         case SMC_PEERCLOSEWAIT1:
284         case SMC_PEERCLOSEWAIT2:
285                 if (txflags->peer_done_writing &&
286                     !smc_close_sent_any_close(conn)) {
287                         /* just shutdown wr done, send close request */
288                         rc = smc_close_final(conn);
289                 }
290                 /* peer sending PeerConnectionClosed will cause transition */
291                 break;
292         case SMC_PEERFINCLOSEWAIT:
293                 /* peer sending PeerConnectionClosed will cause transition */
294                 break;
295         case SMC_PROCESSABORT:
296                 rc = smc_close_abort(conn);
297                 sk->sk_state = SMC_CLOSED;
298                 break;
299         case SMC_PEERABORTWAIT:
300                 sk->sk_state = SMC_CLOSED;
301                 break;
302         case SMC_CLOSED:
303                 /* nothing to do, add tracing in future patch */
304                 break;
305         }
306
307         if (old_state != sk->sk_state)
308                 sk->sk_state_change(sk);
309         return rc;
310 }
311
312 static void smc_close_passive_abort_received(struct smc_sock *smc)
313 {
314         struct smc_cdc_conn_state_flags *txflags =
315                 &smc->conn.local_tx_ctrl.conn_state_flags;
316         struct sock *sk = &smc->sk;
317
318         switch (sk->sk_state) {
319         case SMC_INIT:
320         case SMC_ACTIVE:
321         case SMC_APPCLOSEWAIT1:
322                 sk->sk_state = SMC_PROCESSABORT;
323                 sock_put(sk); /* passive closing */
324                 break;
325         case SMC_APPFINCLOSEWAIT:
326                 sk->sk_state = SMC_PROCESSABORT;
327                 break;
328         case SMC_PEERCLOSEWAIT1:
329         case SMC_PEERCLOSEWAIT2:
330                 if (txflags->peer_done_writing &&
331                     !smc_close_sent_any_close(&smc->conn))
332                         /* just shutdown, but not yet closed locally */
333                         sk->sk_state = SMC_PROCESSABORT;
334                 else
335                         sk->sk_state = SMC_CLOSED;
336                 sock_put(sk); /* passive closing */
337                 break;
338         case SMC_APPCLOSEWAIT2:
339         case SMC_PEERFINCLOSEWAIT:
340                 sk->sk_state = SMC_CLOSED;
341                 sock_put(sk); /* passive closing */
342                 break;
343         case SMC_PEERABORTWAIT:
344                 sk->sk_state = SMC_CLOSED;
345                 break;
346         case SMC_PROCESSABORT:
347         /* nothing to do, add tracing in future patch */
348                 break;
349         }
350 }
351
352 /* Either some kind of closing has been received: peer_conn_closed,
353  * peer_conn_abort, or peer_done_writing
354  * or the link group of the connection terminates abnormally.
355  */
356 static void smc_close_passive_work(struct work_struct *work)
357 {
358         struct smc_connection *conn = container_of(work,
359                                                    struct smc_connection,
360                                                    close_work);
361         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
362         struct smc_cdc_conn_state_flags *rxflags;
363         bool release_clcsock = false;
364         struct sock *sk = &smc->sk;
365         int old_state;
366
367         lock_sock(sk);
368         old_state = sk->sk_state;
369
370         rxflags = &conn->local_rx_ctrl.conn_state_flags;
371         if (rxflags->peer_conn_abort) {
372                 /* peer has not received all data */
373                 smc_close_passive_abort_received(smc);
374                 release_sock(sk);
375                 cancel_delayed_work_sync(&conn->tx_work);
376                 lock_sock(sk);
377                 goto wakeup;
378         }
379
380         switch (sk->sk_state) {
381         case SMC_INIT:
382                 sk->sk_state = SMC_APPCLOSEWAIT1;
383                 break;
384         case SMC_ACTIVE:
385                 sk->sk_state = SMC_APPCLOSEWAIT1;
386                 /* postpone sock_put() for passive closing to cover
387                  * received SEND_SHUTDOWN as well
388                  */
389                 break;
390         case SMC_PEERCLOSEWAIT1:
391                 if (rxflags->peer_done_writing)
392                         sk->sk_state = SMC_PEERCLOSEWAIT2;
393                 fallthrough;
394                 /* to check for closing */
395         case SMC_PEERCLOSEWAIT2:
396                 if (!smc_cdc_rxed_any_close(conn))
397                         break;
398                 if (sock_flag(sk, SOCK_DEAD) &&
399                     smc_close_sent_any_close(conn)) {
400                         /* smc_release has already been called locally */
401                         sk->sk_state = SMC_CLOSED;
402                 } else {
403                         /* just shutdown, but not yet closed locally */
404                         sk->sk_state = SMC_APPFINCLOSEWAIT;
405                 }
406                 sock_put(sk); /* passive closing */
407                 break;
408         case SMC_PEERFINCLOSEWAIT:
409                 if (smc_cdc_rxed_any_close(conn)) {
410                         sk->sk_state = SMC_CLOSED;
411                         sock_put(sk); /* passive closing */
412                 }
413                 break;
414         case SMC_APPCLOSEWAIT1:
415         case SMC_APPCLOSEWAIT2:
416                 /* postpone sock_put() for passive closing to cover
417                  * received SEND_SHUTDOWN as well
418                  */
419                 break;
420         case SMC_APPFINCLOSEWAIT:
421         case SMC_PEERABORTWAIT:
422         case SMC_PROCESSABORT:
423         case SMC_CLOSED:
424                 /* nothing to do, add tracing in future patch */
425                 break;
426         }
427
428 wakeup:
429         sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
430         sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
431
432         if (old_state != sk->sk_state) {
433                 sk->sk_state_change(sk);
434                 if ((sk->sk_state == SMC_CLOSED) &&
435                     (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
436                         smc_conn_free(conn);
437                         if (smc->clcsock)
438                                 release_clcsock = true;
439                 }
440         }
441         release_sock(sk);
442         if (release_clcsock)
443                 smc_clcsock_release(smc);
444         sock_put(sk); /* sock_hold done by schedulers of close_work */
445 }
446
447 int smc_close_shutdown_write(struct smc_sock *smc)
448 {
449         struct smc_connection *conn = &smc->conn;
450         struct sock *sk = &smc->sk;
451         int old_state;
452         long timeout;
453         int rc = 0;
454
455         timeout = current->flags & PF_EXITING ?
456                   0 : sock_flag(sk, SOCK_LINGER) ?
457                       sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
458
459         old_state = sk->sk_state;
460 again:
461         switch (sk->sk_state) {
462         case SMC_ACTIVE:
463                 smc_close_stream_wait(smc, timeout);
464                 release_sock(sk);
465                 cancel_delayed_work_sync(&conn->tx_work);
466                 lock_sock(sk);
467                 if (sk->sk_state != SMC_ACTIVE)
468                         goto again;
469                 /* send close wr request */
470                 rc = smc_close_wr(conn);
471                 sk->sk_state = SMC_PEERCLOSEWAIT1;
472                 break;
473         case SMC_APPCLOSEWAIT1:
474                 /* passive close */
475                 if (!smc_cdc_rxed_any_close(conn))
476                         smc_close_stream_wait(smc, timeout);
477                 release_sock(sk);
478                 cancel_delayed_work_sync(&conn->tx_work);
479                 lock_sock(sk);
480                 if (sk->sk_state != SMC_APPCLOSEWAIT1)
481                         goto again;
482                 /* confirm close from peer */
483                 rc = smc_close_wr(conn);
484                 sk->sk_state = SMC_APPCLOSEWAIT2;
485                 break;
486         case SMC_APPCLOSEWAIT2:
487         case SMC_PEERFINCLOSEWAIT:
488         case SMC_PEERCLOSEWAIT1:
489         case SMC_PEERCLOSEWAIT2:
490         case SMC_APPFINCLOSEWAIT:
491         case SMC_PROCESSABORT:
492         case SMC_PEERABORTWAIT:
493                 /* nothing to do, add tracing in future patch */
494                 break;
495         }
496
497         if (old_state != sk->sk_state)
498                 sk->sk_state_change(sk);
499         return rc;
500 }
501
502 /* Initialize close properties on connection establishment. */
503 void smc_close_init(struct smc_sock *smc)
504 {
505         INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
506 }