4 * Copyright (c) 2016, Ericsson AB
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the names of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * Alternatively, this software may be distributed under the terms of the
20 * GNU General Public License ("GPL") version 2 as published by the Free
21 * Software Foundation.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
36 #include <net/genetlink.h>
42 #define MAX_MON_DOMAIN 64
43 #define MON_TIMEOUT 120000
44 #define MAX_PEER_DOWN_EVENTS 4
46 /* struct tipc_mon_domain: domain record to be transferred between peers
47 * @len: actual size of domain record
48 * @gen: current generation of sender's domain
49 * @ack_gen: most recent generation of self's domain acked by peer
50 * @member_cnt: number of domain member nodes described in this record
51 * @up_map: bit map indicating which of the members the sender considers up
52 * @members: identity of the domain members
54 struct tipc_mon_domain {
60 u32 members[MAX_MON_DOMAIN];
63 /* struct tipc_peer: state of a peer node and its domain
64 * @addr: tipc node identity of peer
65 * @head_map: shows which other nodes currently consider peer 'up'
66 * @domain: most recent domain record from peer
67 * @hash: position in hashed lookup list
68 * @list: position in linked list, in circular ascending order by 'addr'
69 * @applied: number of reported domain members applied on this monitor list
70 * @is_up: peer is up as seen from this node
71 * @is_head: peer is assigned domain head as seen from this node
72 * @is_local: peer is in local domain and should be continuously monitored
73 * @down_cnt: - numbers of other peers which have reported this on lost
77 struct tipc_mon_domain *domain;
78 struct hlist_node hash;
79 struct list_head list;
88 struct hlist_head peers[NODE_HTABLE_SIZE];
90 struct tipc_peer *self;
92 struct tipc_mon_domain cache;
96 struct timer_list timer;
97 unsigned long timer_intv;
100 static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
102 return tipc_net(net)->monitors[bearer_id];
105 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
107 /* dom_rec_len(): actual length of domain record for transport
109 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
111 return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
114 /* dom_size() : calculate size of own domain based on number of peers
116 static int dom_size(int peers)
120 while ((i * i) < peers)
122 return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
125 static void map_set(u64 *up_map, int i, unsigned int v)
127 *up_map &= ~(1ULL << i);
128 *up_map |= ((u64)v << i);
131 static int map_get(u64 up_map, int i)
133 return (up_map & (1ULL << i)) >> i;
136 static struct tipc_peer *peer_prev(struct tipc_peer *peer)
138 return list_last_entry(&peer->list, struct tipc_peer, list);
141 static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
143 return list_first_entry(&peer->list, struct tipc_peer, list);
146 static struct tipc_peer *peer_head(struct tipc_peer *peer)
148 while (!peer->is_head)
149 peer = peer_prev(peer);
153 static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
155 struct tipc_peer *peer;
156 unsigned int thash = tipc_hashfn(addr);
158 hlist_for_each_entry(peer, &mon->peers[thash], hash) {
159 if (peer->addr == addr)
165 static struct tipc_peer *get_self(struct net *net, int bearer_id)
167 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
172 static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
174 struct tipc_net *tn = tipc_net(net);
176 return mon->peer_cnt > tn->mon_threshold;
179 /* mon_identify_lost_members() : - identify amd mark potentially lost members
181 static void mon_identify_lost_members(struct tipc_peer *peer,
182 struct tipc_mon_domain *dom_bef,
185 struct tipc_peer *member = peer;
186 struct tipc_mon_domain *dom_aft = peer->domain;
187 int applied_aft = peer->applied;
190 for (i = 0; i < applied_bef; i++) {
191 member = peer_nxt(member);
193 /* Do nothing if self or peer already see member as down */
194 if (!member->is_up || !map_get(dom_bef->up_map, i))
197 /* Loss of local node must be detected by active probing */
198 if (member->is_local)
201 /* Start probing if member was removed from applied domain */
202 if (!applied_aft || (applied_aft < i)) {
203 member->down_cnt = 1;
207 /* Member loss is confirmed if it is still in applied domain */
208 if (!map_get(dom_aft->up_map, i))
213 /* mon_apply_domain() : match a peer's domain record against monitor list
215 static void mon_apply_domain(struct tipc_monitor *mon,
216 struct tipc_peer *peer)
218 struct tipc_mon_domain *dom = peer->domain;
219 struct tipc_peer *member;
223 if (!dom || !peer->is_up)
226 /* Scan across domain members and match against monitor list */
228 member = peer_nxt(peer);
229 for (i = 0; i < dom->member_cnt; i++) {
230 addr = dom->members[i];
231 if (addr != member->addr)
234 member = peer_nxt(member);
238 /* mon_update_local_domain() : update after peer addition/removal/up/down
240 static void mon_update_local_domain(struct tipc_monitor *mon)
242 struct tipc_peer *self = mon->self;
243 struct tipc_mon_domain *cache = &mon->cache;
244 struct tipc_mon_domain *dom = self->domain;
245 struct tipc_peer *peer = self;
246 u64 prev_up_map = dom->up_map;
250 /* Update local domain size based on current size of cluster */
251 member_cnt = dom_size(mon->peer_cnt) - 1;
252 self->applied = member_cnt;
254 /* Update native and cached outgoing local domain records */
255 dom->len = dom_rec_len(dom, member_cnt);
256 diff = dom->member_cnt != member_cnt;
257 dom->member_cnt = member_cnt;
258 for (i = 0; i < member_cnt; i++) {
259 peer = peer_nxt(peer);
260 diff |= dom->members[i] != peer->addr;
261 dom->members[i] = peer->addr;
262 map_set(&dom->up_map, i, peer->is_up);
263 cache->members[i] = htonl(peer->addr);
265 diff |= dom->up_map != prev_up_map;
268 dom->gen = ++mon->dom_gen;
269 cache->len = htons(dom->len);
270 cache->gen = htons(dom->gen);
271 cache->member_cnt = htons(member_cnt);
272 cache->up_map = cpu_to_be64(dom->up_map);
273 mon_apply_domain(mon, self);
276 /* mon_update_neighbors() : update preceding neighbors of added/removed peer
278 static void mon_update_neighbors(struct tipc_monitor *mon,
279 struct tipc_peer *peer)
283 dz = dom_size(mon->peer_cnt);
284 for (i = 0; i < dz; i++) {
285 mon_apply_domain(mon, peer);
286 peer = peer_prev(peer);
290 /* mon_assign_roles() : reassign peer roles after a network change
291 * The monitor list is consistent at this stage; i.e., each peer is monitoring
292 * a set of domain members as matched between domain record and the monitor list
294 static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
296 struct tipc_peer *peer = peer_nxt(head);
297 struct tipc_peer *self = mon->self;
300 for (; peer != self; peer = peer_nxt(peer)) {
301 peer->is_local = false;
303 /* Update domain member */
304 if (i++ < head->applied) {
305 peer->is_head = false;
307 peer->is_local = true;
310 /* Assign next domain head */
316 head->is_head = true;
322 void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
324 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
325 struct tipc_peer *self;
326 struct tipc_peer *peer, *prev, *head;
331 self = get_self(net, bearer_id);
332 write_lock_bh(&mon->lock);
333 peer = get_peer(mon, addr);
336 prev = peer_prev(peer);
337 list_del(&peer->list);
338 hlist_del(&peer->hash);
342 head = peer_head(prev);
344 mon_update_local_domain(mon);
345 mon_update_neighbors(mon, prev);
347 /* Revert to full-mesh monitoring if we reach threshold */
348 if (!tipc_mon_is_active(net, mon)) {
349 list_for_each_entry(peer, &self->list, list) {
355 mon_assign_roles(mon, head);
357 write_unlock_bh(&mon->lock);
360 static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
361 struct tipc_peer **peer)
363 struct tipc_peer *self = mon->self;
364 struct tipc_peer *cur, *prev, *p;
366 p = kzalloc(sizeof(*p), GFP_ATOMIC);
372 /* Add new peer to lookup list */
373 INIT_LIST_HEAD(&p->list);
374 hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
376 /* Sort new peer into iterator list, in ascending circular order */
378 list_for_each_entry(cur, &self->list, list) {
379 if ((addr > prev->addr) && (addr < cur->addr))
381 if (((addr < cur->addr) || (addr > prev->addr)) &&
382 (prev->addr > cur->addr))
386 list_add_tail(&p->list, &cur->list);
388 mon_update_neighbors(mon, p);
392 void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
394 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
395 struct tipc_peer *self = get_self(net, bearer_id);
396 struct tipc_peer *peer, *head;
398 write_lock_bh(&mon->lock);
399 peer = get_peer(mon, addr);
400 if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
403 head = peer_head(peer);
405 mon_update_local_domain(mon);
406 mon_assign_roles(mon, head);
408 write_unlock_bh(&mon->lock);
411 void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
413 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
414 struct tipc_peer *self;
415 struct tipc_peer *peer, *head;
416 struct tipc_mon_domain *dom;
422 self = get_self(net, bearer_id);
423 write_lock_bh(&mon->lock);
424 peer = get_peer(mon, addr);
426 pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
429 applied = peer->applied;
434 mon_identify_lost_members(peer, dom, applied);
437 peer->is_head = false;
438 peer->is_local = false;
440 head = peer_head(peer);
442 mon_update_local_domain(mon);
443 mon_assign_roles(mon, head);
445 write_unlock_bh(&mon->lock);
448 /* tipc_mon_rcv - process monitor domain event message
450 void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
451 struct tipc_mon_state *state, int bearer_id)
453 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
454 struct tipc_mon_domain *arrv_dom = data;
455 struct tipc_mon_domain dom_bef;
456 struct tipc_mon_domain *dom;
457 struct tipc_peer *peer;
458 u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
459 int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
460 u16 new_gen = ntohs(arrv_dom->gen);
461 u16 acked_gen = ntohs(arrv_dom->ack_gen);
462 bool probing = state->probing;
465 state->probing = false;
467 /* Sanity check received domain record */
468 if (new_member_cnt > MAX_MON_DOMAIN)
470 if (dlen < dom_rec_len(arrv_dom, 0))
472 if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
474 if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen)
477 /* Synch generation numbers with peer if link just came up */
478 if (!state->synched) {
479 state->peer_gen = new_gen - 1;
480 state->acked_gen = acked_gen;
481 state->synched = true;
484 if (more(acked_gen, state->acked_gen))
485 state->acked_gen = acked_gen;
487 /* Drop duplicate unless we are waiting for a probe response */
488 if (!more(new_gen, state->peer_gen) && !probing)
491 write_lock_bh(&mon->lock);
492 peer = get_peer(mon, addr);
493 if (!peer || !peer->is_up)
496 /* Peer is confirmed, stop any ongoing probing */
499 /* Task is done for duplicate record */
500 if (!more(new_gen, state->peer_gen))
503 state->peer_gen = new_gen;
505 /* Cache current domain record for later use */
506 dom_bef.member_cnt = 0;
509 memcpy(&dom_bef, dom, dom->len);
511 /* Transform and store received domain record */
512 if (!dom || (dom->len < new_dlen)) {
514 dom = kmalloc(new_dlen, GFP_ATOMIC);
521 dom->member_cnt = new_member_cnt;
522 dom->up_map = be64_to_cpu(arrv_dom->up_map);
523 for (i = 0; i < new_member_cnt; i++)
524 dom->members[i] = ntohl(arrv_dom->members[i]);
526 /* Update peers affected by this domain record */
527 applied_bef = peer->applied;
528 mon_apply_domain(mon, peer);
529 mon_identify_lost_members(peer, &dom_bef, applied_bef);
530 mon_assign_roles(mon, peer_head(peer));
532 write_unlock_bh(&mon->lock);
535 void tipc_mon_prep(struct net *net, void *data, int *dlen,
536 struct tipc_mon_state *state, int bearer_id)
538 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
539 struct tipc_mon_domain *dom = data;
540 u16 gen = mon->dom_gen;
543 /* Send invalid record if not active */
544 if (!tipc_mon_is_active(net, mon)) {
549 /* Send only a dummy record with ack if peer has acked our last sent */
550 if (likely(state->acked_gen == gen)) {
551 len = dom_rec_len(dom, 0);
553 dom->len = htons(len);
554 dom->gen = htons(gen);
555 dom->ack_gen = htons(state->peer_gen);
559 /* Send the full record */
560 read_lock_bh(&mon->lock);
561 len = ntohs(mon->cache.len);
563 memcpy(data, &mon->cache, len);
564 read_unlock_bh(&mon->lock);
565 dom->ack_gen = htons(state->peer_gen);
568 void tipc_mon_get_state(struct net *net, u32 addr,
569 struct tipc_mon_state *state,
572 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
573 struct tipc_peer *peer;
575 if (!tipc_mon_is_active(net, mon)) {
576 state->probing = false;
577 state->monitoring = true;
581 /* Used cached state if table has not changed */
582 if (!state->probing &&
583 (state->list_gen == mon->list_gen) &&
584 (state->acked_gen == mon->dom_gen))
587 read_lock_bh(&mon->lock);
588 peer = get_peer(mon, addr);
590 state->probing = state->acked_gen != mon->dom_gen;
591 state->probing |= peer->down_cnt;
592 state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
593 state->monitoring = peer->is_local;
594 state->monitoring |= peer->is_head;
595 state->list_gen = mon->list_gen;
597 read_unlock_bh(&mon->lock);
600 static void mon_timeout(struct timer_list *t)
602 struct tipc_monitor *mon = from_timer(mon, t, timer);
603 struct tipc_peer *self;
604 int best_member_cnt = dom_size(mon->peer_cnt) - 1;
606 write_lock_bh(&mon->lock);
608 if (self && (best_member_cnt != self->applied)) {
609 mon_update_local_domain(mon);
610 mon_assign_roles(mon, self);
612 write_unlock_bh(&mon->lock);
613 mod_timer(&mon->timer, jiffies + mon->timer_intv);
616 int tipc_mon_create(struct net *net, int bearer_id)
618 struct tipc_net *tn = tipc_net(net);
619 struct tipc_monitor *mon;
620 struct tipc_peer *self;
621 struct tipc_mon_domain *dom;
623 if (tn->monitors[bearer_id])
626 mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
627 self = kzalloc(sizeof(*self), GFP_ATOMIC);
628 dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
629 if (!mon || !self || !dom) {
635 tn->monitors[bearer_id] = mon;
636 rwlock_init(&mon->lock);
641 self->addr = tipc_own_addr(net);
643 self->is_head = true;
644 INIT_LIST_HEAD(&self->list);
645 timer_setup(&mon->timer, mon_timeout, 0);
646 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
647 mod_timer(&mon->timer, jiffies + mon->timer_intv);
651 void tipc_mon_delete(struct net *net, int bearer_id)
653 struct tipc_net *tn = tipc_net(net);
654 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
655 struct tipc_peer *self;
656 struct tipc_peer *peer, *tmp;
661 self = get_self(net, bearer_id);
662 write_lock_bh(&mon->lock);
663 tn->monitors[bearer_id] = NULL;
664 list_for_each_entry_safe(peer, tmp, &self->list, list) {
665 list_del(&peer->list);
666 hlist_del(&peer->hash);
671 write_unlock_bh(&mon->lock);
672 del_timer_sync(&mon->timer);
678 void tipc_mon_reinit_self(struct net *net)
680 struct tipc_monitor *mon;
683 for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
684 mon = tipc_monitor(net, bearer_id);
687 write_lock_bh(&mon->lock);
688 mon->self->addr = tipc_own_addr(net);
689 write_unlock_bh(&mon->lock);
693 int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size)
695 struct tipc_net *tn = tipc_net(net);
697 if (cluster_size > TIPC_CLUSTER_SIZE)
700 tn->mon_threshold = cluster_size;
705 int tipc_nl_monitor_get_threshold(struct net *net)
707 struct tipc_net *tn = tipc_net(net);
709 return tn->mon_threshold;
712 static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer,
713 struct tipc_nl_msg *msg)
715 struct tipc_mon_domain *dom = peer->domain;
716 struct nlattr *attrs;
719 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
720 NLM_F_MULTI, TIPC_NL_MON_PEER_GET);
724 attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON_PEER);
728 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr))
730 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied))
734 if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP))
737 if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL))
740 if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD))
744 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen))
746 if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP,
747 dom->up_map, TIPC_NLA_MON_PEER_PAD))
749 if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS,
750 dom->member_cnt * sizeof(u32), &dom->members))
754 nla_nest_end(msg->skb, attrs);
755 genlmsg_end(msg->skb, hdr);
759 nla_nest_cancel(msg->skb, attrs);
761 genlmsg_cancel(msg->skb, hdr);
766 int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
767 u32 bearer_id, u32 *prev_node)
769 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
770 struct tipc_peer *peer;
775 read_lock_bh(&mon->lock);
779 if (peer->addr == *prev_node)
784 if (__tipc_nl_add_monitor_peer(peer, msg)) {
785 *prev_node = peer->addr;
786 read_unlock_bh(&mon->lock);
789 } while ((peer = peer_nxt(peer)) != mon->self);
790 read_unlock_bh(&mon->lock);
795 int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg,
798 struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
799 char bearer_name[TIPC_MAX_BEARER_NAME];
800 struct nlattr *attrs;
804 ret = tipc_bearer_get_name(net, bearer_name, bearer_id);
808 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
809 NLM_F_MULTI, TIPC_NL_MON_GET);
813 attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_MON);
817 read_lock_bh(&mon->lock);
818 if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id))
820 if (tipc_mon_is_active(net, mon))
821 if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE))
823 if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name))
825 if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt))
827 if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen))
830 read_unlock_bh(&mon->lock);
831 nla_nest_end(msg->skb, attrs);
832 genlmsg_end(msg->skb, hdr);
837 read_unlock_bh(&mon->lock);
838 nla_nest_cancel(msg->skb, attrs);
840 genlmsg_cancel(msg->skb, hdr);