GNU Linux-libre 5.4.257-gnu1
[releases.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20         atomic_inc(&net->servers_outstanding);
21 }
22
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25         if (atomic_dec_and_test(&net->servers_outstanding))
26                 wake_up_var(&net->servers_outstanding);
27 }
28
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33                                    const struct sockaddr_rxrpc *srx)
34 {
35         const struct afs_addr_list *alist;
36         struct afs_server *server = NULL;
37         unsigned int i;
38         int seq = 0, diff;
39
40         rcu_read_lock();
41
42         do {
43                 if (server)
44                         afs_put_server(net, server, afs_server_trace_put_find_rsq);
45                 server = NULL;
46                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
47
48                 if (srx->transport.family == AF_INET6) {
49                         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
50                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
51                                 alist = rcu_dereference(server->addresses);
52                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
53                                         b = &alist->addrs[i].transport.sin6;
54                                         diff = ((u16 __force)a->sin6_port -
55                                                 (u16 __force)b->sin6_port);
56                                         if (diff == 0)
57                                                 diff = memcmp(&a->sin6_addr,
58                                                               &b->sin6_addr,
59                                                               sizeof(struct in6_addr));
60                                         if (diff == 0)
61                                                 goto found;
62                                 }
63                         }
64                 } else {
65                         const struct sockaddr_in *a = &srx->transport.sin, *b;
66                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
67                                 alist = rcu_dereference(server->addresses);
68                                 for (i = 0; i < alist->nr_ipv4; i++) {
69                                         b = &alist->addrs[i].transport.sin;
70                                         diff = ((u16 __force)a->sin_port -
71                                                 (u16 __force)b->sin_port);
72                                         if (diff == 0)
73                                                 diff = ((u32 __force)a->sin_addr.s_addr -
74                                                         (u32 __force)b->sin_addr.s_addr);
75                                         if (diff == 0)
76                                                 goto found;
77                                 }
78                         }
79                 }
80
81                 server = NULL;
82         found:
83                 if (server && !atomic_inc_not_zero(&server->usage))
84                         server = NULL;
85
86         } while (need_seqretry(&net->fs_addr_lock, seq));
87
88         done_seqretry(&net->fs_addr_lock, seq);
89
90         rcu_read_unlock();
91         return server;
92 }
93
94 /*
95  * Look up a server by its UUID
96  */
97 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
98 {
99         struct afs_server *server = NULL;
100         struct rb_node *p;
101         int diff, seq = 0;
102
103         _enter("%pU", uuid);
104
105         do {
106                 /* Unfortunately, rbtree walking doesn't give reliable results
107                  * under just the RCU read lock, so we have to check for
108                  * changes.
109                  */
110                 if (server)
111                         afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
112                 server = NULL;
113
114                 read_seqbegin_or_lock(&net->fs_lock, &seq);
115
116                 p = net->fs_servers.rb_node;
117                 while (p) {
118                         server = rb_entry(p, struct afs_server, uuid_rb);
119
120                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
121                         if (diff < 0) {
122                                 p = p->rb_left;
123                         } else if (diff > 0) {
124                                 p = p->rb_right;
125                         } else {
126                                 afs_get_server(server, afs_server_trace_get_by_uuid);
127                                 break;
128                         }
129
130                         server = NULL;
131                 }
132         } while (need_seqretry(&net->fs_lock, seq));
133
134         done_seqretry(&net->fs_lock, seq);
135
136         _leave(" = %p", server);
137         return server;
138 }
139
140 /*
141  * Install a server record in the namespace tree
142  */
143 static struct afs_server *afs_install_server(struct afs_net *net,
144                                              struct afs_server *candidate)
145 {
146         const struct afs_addr_list *alist;
147         struct afs_server *server;
148         struct rb_node **pp, *p;
149         int ret = -EEXIST, diff;
150
151         _enter("%p", candidate);
152
153         write_seqlock(&net->fs_lock);
154
155         /* Firstly install the server in the UUID lookup tree */
156         pp = &net->fs_servers.rb_node;
157         p = NULL;
158         while (*pp) {
159                 p = *pp;
160                 _debug("- consider %p", p);
161                 server = rb_entry(p, struct afs_server, uuid_rb);
162                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
163                 if (diff < 0)
164                         pp = &(*pp)->rb_left;
165                 else if (diff > 0)
166                         pp = &(*pp)->rb_right;
167                 else
168                         goto exists;
169         }
170
171         server = candidate;
172         rb_link_node(&server->uuid_rb, p, pp);
173         rb_insert_color(&server->uuid_rb, &net->fs_servers);
174         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
175
176         write_seqlock(&net->fs_addr_lock);
177         alist = rcu_dereference_protected(server->addresses,
178                                           lockdep_is_held(&net->fs_addr_lock.lock));
179
180         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
181          * it in the IPv4 and/or IPv6 reverse-map lists.
182          *
183          * TODO: For speed we want to use something other than a flat list
184          * here; even sorting the list in terms of lowest address would help a
185          * bit, but anything we might want to do gets messy and memory
186          * intensive.
187          */
188         if (alist->nr_ipv4 > 0)
189                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
190         if (alist->nr_addrs > alist->nr_ipv4)
191                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
192
193         write_sequnlock(&net->fs_addr_lock);
194         ret = 0;
195
196 exists:
197         afs_get_server(server, afs_server_trace_get_install);
198         write_sequnlock(&net->fs_lock);
199         return server;
200 }
201
202 /*
203  * allocate a new server record
204  */
205 static struct afs_server *afs_alloc_server(struct afs_net *net,
206                                            const uuid_t *uuid,
207                                            struct afs_addr_list *alist)
208 {
209         struct afs_server *server;
210
211         _enter("");
212
213         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
214         if (!server)
215                 goto enomem;
216
217         atomic_set(&server->usage, 1);
218         server->debug_id = atomic_inc_return(&afs_server_debug_id);
219         RCU_INIT_POINTER(server->addresses, alist);
220         server->addr_version = alist->version;
221         server->uuid = *uuid;
222         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
223         rwlock_init(&server->fs_lock);
224         INIT_HLIST_HEAD(&server->cb_volumes);
225         rwlock_init(&server->cb_break_lock);
226         init_waitqueue_head(&server->probe_wq);
227         spin_lock_init(&server->probe_lock);
228
229         afs_inc_servers_outstanding(net);
230         trace_afs_server(server, 1, afs_server_trace_alloc);
231         _leave(" = %p", server);
232         return server;
233
234 enomem:
235         _leave(" = NULL [nomem]");
236         return NULL;
237 }
238
239 /*
240  * Look up an address record for a server
241  */
242 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
243                                                  struct key *key, const uuid_t *uuid)
244 {
245         struct afs_vl_cursor vc;
246         struct afs_addr_list *alist = NULL;
247         int ret;
248
249         ret = -ERESTARTSYS;
250         if (afs_begin_vlserver_operation(&vc, cell, key)) {
251                 while (afs_select_vlserver(&vc)) {
252                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
253                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
254                         else
255                                 alist = afs_vl_get_addrs_u(&vc, uuid);
256                 }
257
258                 ret = afs_end_vlserver_operation(&vc);
259         }
260
261         return ret < 0 ? ERR_PTR(ret) : alist;
262 }
263
264 /*
265  * Get or create a fileserver record.
266  */
267 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
268                                      const uuid_t *uuid)
269 {
270         struct afs_addr_list *alist;
271         struct afs_server *server, *candidate;
272
273         _enter("%p,%pU", cell->net, uuid);
274
275         server = afs_find_server_by_uuid(cell->net, uuid);
276         if (server)
277                 return server;
278
279         alist = afs_vl_lookup_addrs(cell, key, uuid);
280         if (IS_ERR(alist))
281                 return ERR_CAST(alist);
282
283         candidate = afs_alloc_server(cell->net, uuid, alist);
284         if (!candidate) {
285                 afs_put_addrlist(alist);
286                 return ERR_PTR(-ENOMEM);
287         }
288
289         server = afs_install_server(cell->net, candidate);
290         if (server != candidate) {
291                 afs_put_addrlist(alist);
292                 kfree(candidate);
293         }
294
295         _leave(" = %p{%d}", server, atomic_read(&server->usage));
296         return server;
297 }
298
299 /*
300  * Set the server timer to fire after a given delay, assuming it's not already
301  * set for an earlier time.
302  */
303 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
304 {
305         if (net->live) {
306                 afs_inc_servers_outstanding(net);
307                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
308                         afs_dec_servers_outstanding(net);
309         }
310 }
311
312 /*
313  * Server management timer.  We have an increment on fs_outstanding that we
314  * need to pass along to the work item.
315  */
316 void afs_servers_timer(struct timer_list *timer)
317 {
318         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
319
320         _enter("");
321         if (!queue_work(afs_wq, &net->fs_manager))
322                 afs_dec_servers_outstanding(net);
323 }
324
325 /*
326  * Get a reference on a server object.
327  */
328 struct afs_server *afs_get_server(struct afs_server *server,
329                                   enum afs_server_trace reason)
330 {
331         unsigned int u = atomic_inc_return(&server->usage);
332
333         trace_afs_server(server, u, reason);
334         return server;
335 }
336
337 /*
338  * Release a reference on a server record.
339  */
340 void afs_put_server(struct afs_net *net, struct afs_server *server,
341                     enum afs_server_trace reason)
342 {
343         unsigned int usage;
344
345         if (!server)
346                 return;
347
348         server->put_time = ktime_get_real_seconds();
349
350         usage = atomic_dec_return(&server->usage);
351
352         trace_afs_server(server, usage, reason);
353
354         if (likely(usage > 0))
355                 return;
356
357         afs_set_server_timer(net, afs_server_gc_delay);
358 }
359
360 static void afs_server_rcu(struct rcu_head *rcu)
361 {
362         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
363
364         trace_afs_server(server, atomic_read(&server->usage),
365                          afs_server_trace_free);
366         afs_put_addrlist(rcu_access_pointer(server->addresses));
367         kfree(server);
368 }
369
370 /*
371  * destroy a dead server
372  */
373 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
374 {
375         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
376         struct afs_addr_cursor ac = {
377                 .alist  = alist,
378                 .index  = alist->preferred,
379                 .error  = 0,
380         };
381
382         trace_afs_server(server, atomic_read(&server->usage),
383                          afs_server_trace_give_up_cb);
384
385         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
386                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
387
388         wait_var_event(&server->probe_outstanding,
389                        atomic_read(&server->probe_outstanding) == 0);
390
391         trace_afs_server(server, atomic_read(&server->usage),
392                          afs_server_trace_destroy);
393         call_rcu(&server->rcu, afs_server_rcu);
394         afs_dec_servers_outstanding(net);
395 }
396
397 /*
398  * Garbage collect any expired servers.
399  */
400 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
401 {
402         struct afs_server *server;
403         bool deleted;
404         int usage;
405
406         while ((server = gc_list)) {
407                 gc_list = server->gc_next;
408
409                 write_seqlock(&net->fs_lock);
410                 usage = 1;
411                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
412                 trace_afs_server(server, usage, afs_server_trace_gc);
413                 if (deleted) {
414                         rb_erase(&server->uuid_rb, &net->fs_servers);
415                         hlist_del_rcu(&server->proc_link);
416                 }
417                 write_sequnlock(&net->fs_lock);
418
419                 if (deleted) {
420                         write_seqlock(&net->fs_addr_lock);
421                         if (!hlist_unhashed(&server->addr4_link))
422                                 hlist_del_rcu(&server->addr4_link);
423                         if (!hlist_unhashed(&server->addr6_link))
424                                 hlist_del_rcu(&server->addr6_link);
425                         write_sequnlock(&net->fs_addr_lock);
426                         afs_destroy_server(net, server);
427                 }
428         }
429 }
430
431 /*
432  * Manage the records of servers known to be within a network namespace.  This
433  * includes garbage collecting unused servers.
434  *
435  * Note also that we were given an increment on net->servers_outstanding by
436  * whoever queued us that we need to deal with before returning.
437  */
438 void afs_manage_servers(struct work_struct *work)
439 {
440         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
441         struct afs_server *gc_list = NULL;
442         struct rb_node *cursor;
443         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
444         bool purging = !net->live;
445
446         _enter("");
447
448         /* Trawl the server list looking for servers that have expired from
449          * lack of use.
450          */
451         read_seqlock_excl(&net->fs_lock);
452
453         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
454                 struct afs_server *server =
455                         rb_entry(cursor, struct afs_server, uuid_rb);
456                 int usage = atomic_read(&server->usage);
457
458                 _debug("manage %pU %u", &server->uuid, usage);
459
460                 ASSERTCMP(usage, >=, 1);
461                 ASSERTIFCMP(purging, usage, ==, 1);
462
463                 if (usage == 1) {
464                         time64_t expire_at = server->put_time;
465
466                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
467                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
468                                 expire_at += afs_server_gc_delay;
469                         if (purging || expire_at <= now) {
470                                 server->gc_next = gc_list;
471                                 gc_list = server;
472                         } else if (expire_at < next_manage) {
473                                 next_manage = expire_at;
474                         }
475                 }
476         }
477
478         read_sequnlock_excl(&net->fs_lock);
479
480         /* Update the timer on the way out.  We have to pass an increment on
481          * servers_outstanding in the namespace that we are in to the timer or
482          * the work scheduler.
483          */
484         if (!purging && next_manage < TIME64_MAX) {
485                 now = ktime_get_real_seconds();
486
487                 if (next_manage - now <= 0) {
488                         if (queue_work(afs_wq, &net->fs_manager))
489                                 afs_inc_servers_outstanding(net);
490                 } else {
491                         afs_set_server_timer(net, next_manage - now);
492                 }
493         }
494
495         afs_gc_servers(net, gc_list);
496
497         afs_dec_servers_outstanding(net);
498         _leave(" [%d]", atomic_read(&net->servers_outstanding));
499 }
500
501 static void afs_queue_server_manager(struct afs_net *net)
502 {
503         afs_inc_servers_outstanding(net);
504         if (!queue_work(afs_wq, &net->fs_manager))
505                 afs_dec_servers_outstanding(net);
506 }
507
508 /*
509  * Purge list of servers.
510  */
511 void afs_purge_servers(struct afs_net *net)
512 {
513         _enter("");
514
515         if (del_timer_sync(&net->fs_timer))
516                 atomic_dec(&net->servers_outstanding);
517
518         afs_queue_server_manager(net);
519
520         _debug("wait");
521         wait_var_event(&net->servers_outstanding,
522                        !atomic_read(&net->servers_outstanding));
523         _leave("");
524 }
525
526 /*
527  * Get an update for a server's address list.
528  */
529 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
530 {
531         struct afs_addr_list *alist, *discard;
532
533         _enter("");
534
535         trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
536
537         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
538                                     &server->uuid);
539         if (IS_ERR(alist)) {
540                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
541                      PTR_ERR(alist) == -EINTR) &&
542                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
543                     server->addresses) {
544                         _leave(" = t [intr]");
545                         return true;
546                 }
547                 fc->error = PTR_ERR(alist);
548                 _leave(" = f [%d]", fc->error);
549                 return false;
550         }
551
552         discard = alist;
553         if (server->addr_version != alist->version) {
554                 write_lock(&server->fs_lock);
555                 discard = rcu_dereference_protected(server->addresses,
556                                                     lockdep_is_held(&server->fs_lock));
557                 rcu_assign_pointer(server->addresses, alist);
558                 server->addr_version = alist->version;
559                 write_unlock(&server->fs_lock);
560         }
561
562         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
563         afs_put_addrlist(discard);
564         _leave(" = t");
565         return true;
566 }
567
568 /*
569  * See if a server's address list needs updating.
570  */
571 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
572 {
573         time64_t now = ktime_get_real_seconds();
574         long diff;
575         bool success;
576         int ret, retries = 0;
577
578         _enter("");
579
580         ASSERT(server);
581
582 retry:
583         diff = READ_ONCE(server->update_at) - now;
584         if (diff > 0) {
585                 _leave(" = t [not now %ld]", diff);
586                 return true;
587         }
588
589         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
590                 success = afs_update_server_record(fc, server);
591                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
592                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
593                 _leave(" = %d", success);
594                 return success;
595         }
596
597         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
598                           (fc->flags & AFS_FS_CURSOR_INTR) ?
599                           TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
600         if (ret == -ERESTARTSYS) {
601                 fc->error = ret;
602                 _leave(" = f [intr]");
603                 return false;
604         }
605
606         retries++;
607         if (retries == 4) {
608                 _leave(" = f [stale]");
609                 ret = -ESTALE;
610                 return false;
611         }
612         goto retry;
613 }