GNU Linux-libre 6.8.7-gnu
[releases.git] / drivers / misc / sgi-xp / xpc_uv.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9 /*
10  * Cross Partition Communication (XPC) uv-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/interrupt.h>
19 #include <linux/delay.h>
20 #include <linux/device.h>
21 #include <linux/cpu.h>
22 #include <linux/module.h>
23 #include <linux/err.h>
24 #include <linux/slab.h>
25 #include <linux/numa.h>
26 #include <asm/uv/uv_hub.h>
27 #include <asm/uv/bios.h>
28 #include <asm/uv/uv_irq.h>
29 #include "../sgi-gru/gru.h"
30 #include "../sgi-gru/grukservices.h"
31 #include "xpc.h"
32
33 static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
34
35 #define XPC_ACTIVATE_MSG_SIZE_UV        (1 * GRU_CACHE_LINE_BYTES)
36 #define XPC_ACTIVATE_MQ_SIZE_UV         (4 * XP_MAX_NPARTITIONS_UV * \
37                                          XPC_ACTIVATE_MSG_SIZE_UV)
38 #define XPC_ACTIVATE_IRQ_NAME           "xpc_activate"
39
40 #define XPC_NOTIFY_MSG_SIZE_UV          (2 * GRU_CACHE_LINE_BYTES)
41 #define XPC_NOTIFY_MQ_SIZE_UV           (4 * XP_MAX_NPARTITIONS_UV * \
42                                          XPC_NOTIFY_MSG_SIZE_UV)
43 #define XPC_NOTIFY_IRQ_NAME             "xpc_notify"
44
45 static int xpc_mq_node = NUMA_NO_NODE;
46
47 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
48 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
49
50 static int
51 xpc_setup_partitions_uv(void)
52 {
53         short partid;
54         struct xpc_partition_uv *part_uv;
55
56         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
57                 part_uv = &xpc_partitions[partid].sn.uv;
58
59                 mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
60                 spin_lock_init(&part_uv->flags_lock);
61                 part_uv->remote_act_state = XPC_P_AS_INACTIVE;
62         }
63         return 0;
64 }
65
66 static void
67 xpc_teardown_partitions_uv(void)
68 {
69         short partid;
70         struct xpc_partition_uv *part_uv;
71         unsigned long irq_flags;
72
73         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
74                 part_uv = &xpc_partitions[partid].sn.uv;
75
76                 if (part_uv->cached_activate_gru_mq_desc != NULL) {
77                         mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
78                         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
79                         part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
80                         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
81                         kfree(part_uv->cached_activate_gru_mq_desc);
82                         part_uv->cached_activate_gru_mq_desc = NULL;
83                         mutex_unlock(&part_uv->
84                                      cached_activate_gru_mq_desc_mutex);
85                 }
86         }
87 }
88
89 static int
90 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
91 {
92         int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
93
94         mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
95                         UV_AFFINITY_CPU);
96         if (mq->irq < 0)
97                 return mq->irq;
98
99         mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
100
101         return 0;
102 }
103
104 static void
105 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
106 {
107         uv_teardown_irq(mq->irq);
108 }
109
110 static int
111 xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
112 {
113         int ret;
114
115         ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address),
116                                          mq->order, &mq->mmr_offset);
117         if (ret < 0) {
118                 dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
119                         "ret=%d\n", ret);
120                 return ret;
121         }
122
123         mq->watchlist_num = ret;
124         return 0;
125 }
126
127 static void
128 xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
129 {
130         int ret;
131         int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
132
133         ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num);
134         BUG_ON(ret != BIOS_STATUS_SUCCESS);
135 }
136
137 static struct xpc_gru_mq_uv *
138 xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
139                      irq_handler_t irq_handler)
140 {
141         enum xp_retval xp_ret;
142         int ret;
143         int nid;
144         int nasid;
145         int pg_order;
146         struct page *page;
147         struct xpc_gru_mq_uv *mq;
148         struct uv_IO_APIC_route_entry *mmr_value;
149
150         mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
151         if (mq == NULL) {
152                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
153                         "a xpc_gru_mq_uv structure\n");
154                 ret = -ENOMEM;
155                 goto out_0;
156         }
157
158         mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
159                                   GFP_KERNEL);
160         if (mq->gru_mq_desc == NULL) {
161                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
162                         "a gru_message_queue_desc structure\n");
163                 ret = -ENOMEM;
164                 goto out_1;
165         }
166
167         pg_order = get_order(mq_size);
168         mq->order = pg_order + PAGE_SHIFT;
169         mq_size = 1UL << mq->order;
170
171         mq->mmr_blade = uv_cpu_to_blade_id(cpu);
172
173         nid = cpu_to_node(cpu);
174         page = __alloc_pages_node(nid,
175                                       GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
176                                       pg_order);
177         if (page == NULL) {
178                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
179                         "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
180                 ret = -ENOMEM;
181                 goto out_2;
182         }
183         mq->address = page_address(page);
184
185         /* enable generation of irq when GRU mq operation occurs to this mq */
186         ret = xpc_gru_mq_watchlist_alloc_uv(mq);
187         if (ret != 0)
188                 goto out_3;
189
190         ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
191         if (ret != 0)
192                 goto out_4;
193
194         ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
195         if (ret != 0) {
196                 dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
197                         mq->irq, -ret);
198                 goto out_5;
199         }
200
201         nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu));
202
203         mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
204         ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
205                                      nasid, mmr_value->vector, mmr_value->dest);
206         if (ret != 0) {
207                 dev_err(xpc_part, "gru_create_message_queue() returned "
208                         "error=%d\n", ret);
209                 ret = -EINVAL;
210                 goto out_6;
211         }
212
213         /* allow other partitions to access this GRU mq */
214         xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
215         if (xp_ret != xpSuccess) {
216                 ret = -EACCES;
217                 goto out_6;
218         }
219
220         return mq;
221
222         /* something went wrong */
223 out_6:
224         free_irq(mq->irq, NULL);
225 out_5:
226         xpc_release_gru_mq_irq_uv(mq);
227 out_4:
228         xpc_gru_mq_watchlist_free_uv(mq);
229 out_3:
230         free_pages((unsigned long)mq->address, pg_order);
231 out_2:
232         kfree(mq->gru_mq_desc);
233 out_1:
234         kfree(mq);
235 out_0:
236         return ERR_PTR(ret);
237 }
238
239 static void
240 xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
241 {
242         unsigned int mq_size;
243         int pg_order;
244         int ret;
245
246         /* disallow other partitions to access GRU mq */
247         mq_size = 1UL << mq->order;
248         ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
249         BUG_ON(ret != xpSuccess);
250
251         /* unregister irq handler and release mq irq/vector mapping */
252         free_irq(mq->irq, NULL);
253         xpc_release_gru_mq_irq_uv(mq);
254
255         /* disable generation of irq when GRU mq op occurs to this mq */
256         xpc_gru_mq_watchlist_free_uv(mq);
257
258         pg_order = mq->order - PAGE_SHIFT;
259         free_pages((unsigned long)mq->address, pg_order);
260
261         kfree(mq);
262 }
263
264 static enum xp_retval
265 xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
266                  size_t msg_size)
267 {
268         enum xp_retval xp_ret;
269         int ret;
270
271         while (1) {
272                 ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
273                 if (ret == MQE_OK) {
274                         xp_ret = xpSuccess;
275                         break;
276                 }
277
278                 if (ret == MQE_QUEUE_FULL) {
279                         dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
280                                 "error=MQE_QUEUE_FULL\n");
281                         /* !!! handle QLimit reached; delay & try again */
282                         /* ??? Do we add a limit to the number of retries? */
283                         (void)msleep_interruptible(10);
284                 } else if (ret == MQE_CONGESTION) {
285                         dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
286                                 "error=MQE_CONGESTION\n");
287                         /* !!! handle LB Overflow; simply try again */
288                         /* ??? Do we add a limit to the number of retries? */
289                 } else {
290                         /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
291                         dev_err(xpc_chan, "gru_send_message_gpa() returned "
292                                 "error=%d\n", ret);
293                         xp_ret = xpGruSendMqError;
294                         break;
295                 }
296         }
297         return xp_ret;
298 }
299
300 static void
301 xpc_process_activate_IRQ_rcvd_uv(void)
302 {
303         unsigned long irq_flags;
304         short partid;
305         struct xpc_partition *part;
306         u8 act_state_req;
307
308         DBUG_ON(xpc_activate_IRQ_rcvd == 0);
309
310         spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
311         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
312                 part = &xpc_partitions[partid];
313
314                 if (part->sn.uv.act_state_req == 0)
315                         continue;
316
317                 xpc_activate_IRQ_rcvd--;
318                 BUG_ON(xpc_activate_IRQ_rcvd < 0);
319
320                 act_state_req = part->sn.uv.act_state_req;
321                 part->sn.uv.act_state_req = 0;
322                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
323
324                 if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
325                         if (part->act_state == XPC_P_AS_INACTIVE)
326                                 xpc_activate_partition(part);
327                         else if (part->act_state == XPC_P_AS_DEACTIVATING)
328                                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
329
330                 } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
331                         if (part->act_state == XPC_P_AS_INACTIVE)
332                                 xpc_activate_partition(part);
333                         else
334                                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
335
336                 } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
337                         XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
338
339                 } else {
340                         BUG();
341                 }
342
343                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
344                 if (xpc_activate_IRQ_rcvd == 0)
345                         break;
346         }
347         spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
348
349 }
350
351 static void
352 xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
353                               struct xpc_activate_mq_msghdr_uv *msg_hdr,
354                               int part_setup,
355                               int *wakeup_hb_checker)
356 {
357         unsigned long irq_flags;
358         struct xpc_partition_uv *part_uv = &part->sn.uv;
359         struct xpc_openclose_args *args;
360
361         part_uv->remote_act_state = msg_hdr->act_state;
362
363         switch (msg_hdr->type) {
364         case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
365                 /* syncing of remote_act_state was just done above */
366                 break;
367
368         case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
369                 struct xpc_activate_mq_msg_activate_req_uv *msg;
370
371                 /*
372                  * ??? Do we deal here with ts_jiffies being different
373                  * ??? if act_state != XPC_P_AS_INACTIVE instead of
374                  * ??? below?
375                  */
376                 msg = container_of(msg_hdr, struct
377                                    xpc_activate_mq_msg_activate_req_uv, hdr);
378
379                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
380                 if (part_uv->act_state_req == 0)
381                         xpc_activate_IRQ_rcvd++;
382                 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
383                 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
384                 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
385                 part_uv->heartbeat_gpa = msg->heartbeat_gpa;
386
387                 if (msg->activate_gru_mq_desc_gpa !=
388                     part_uv->activate_gru_mq_desc_gpa) {
389                         spin_lock(&part_uv->flags_lock);
390                         part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
391                         spin_unlock(&part_uv->flags_lock);
392                         part_uv->activate_gru_mq_desc_gpa =
393                             msg->activate_gru_mq_desc_gpa;
394                 }
395                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
396
397                 (*wakeup_hb_checker)++;
398                 break;
399         }
400         case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
401                 struct xpc_activate_mq_msg_deactivate_req_uv *msg;
402
403                 msg = container_of(msg_hdr, struct
404                                    xpc_activate_mq_msg_deactivate_req_uv, hdr);
405
406                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
407                 if (part_uv->act_state_req == 0)
408                         xpc_activate_IRQ_rcvd++;
409                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
410                 part_uv->reason = msg->reason;
411                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
412
413                 (*wakeup_hb_checker)++;
414                 return;
415         }
416         case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
417                 struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
418
419                 if (!part_setup)
420                         break;
421
422                 msg = container_of(msg_hdr, struct
423                                    xpc_activate_mq_msg_chctl_closerequest_uv,
424                                    hdr);
425                 args = &part->remote_openclose_args[msg->ch_number];
426                 args->reason = msg->reason;
427
428                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
429                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
430                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
431
432                 xpc_wakeup_channel_mgr(part);
433                 break;
434         }
435         case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
436                 struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
437
438                 if (!part_setup)
439                         break;
440
441                 msg = container_of(msg_hdr, struct
442                                    xpc_activate_mq_msg_chctl_closereply_uv,
443                                    hdr);
444
445                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
446                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
447                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
448
449                 xpc_wakeup_channel_mgr(part);
450                 break;
451         }
452         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
453                 struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
454
455                 if (!part_setup)
456                         break;
457
458                 msg = container_of(msg_hdr, struct
459                                    xpc_activate_mq_msg_chctl_openrequest_uv,
460                                    hdr);
461                 args = &part->remote_openclose_args[msg->ch_number];
462                 args->entry_size = msg->entry_size;
463                 args->local_nentries = msg->local_nentries;
464
465                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
466                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
467                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
468
469                 xpc_wakeup_channel_mgr(part);
470                 break;
471         }
472         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
473                 struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
474
475                 if (!part_setup)
476                         break;
477
478                 msg = container_of(msg_hdr, struct
479                                    xpc_activate_mq_msg_chctl_openreply_uv, hdr);
480                 args = &part->remote_openclose_args[msg->ch_number];
481                 args->remote_nentries = msg->remote_nentries;
482                 args->local_nentries = msg->local_nentries;
483                 args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
484
485                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
486                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
487                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
488
489                 xpc_wakeup_channel_mgr(part);
490                 break;
491         }
492         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
493                 struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
494
495                 if (!part_setup)
496                         break;
497
498                 msg = container_of(msg_hdr, struct
499                                 xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
500                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
501                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
502                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
503
504                 xpc_wakeup_channel_mgr(part);
505         }
506                 fallthrough;
507         case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
508                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
509                 part_uv->flags |= XPC_P_ENGAGED_UV;
510                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
511                 break;
512
513         case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
514                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
515                 part_uv->flags &= ~XPC_P_ENGAGED_UV;
516                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
517                 break;
518
519         default:
520                 dev_err(xpc_part, "received unknown activate_mq msg type=%d "
521                         "from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
522
523                 /* get hb checker to deactivate from the remote partition */
524                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
525                 if (part_uv->act_state_req == 0)
526                         xpc_activate_IRQ_rcvd++;
527                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
528                 part_uv->reason = xpBadMsgType;
529                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
530
531                 (*wakeup_hb_checker)++;
532                 return;
533         }
534
535         if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
536             part->remote_rp_ts_jiffies != 0) {
537                 /*
538                  * ??? Does what we do here need to be sensitive to
539                  * ??? act_state or remote_act_state?
540                  */
541                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
542                 if (part_uv->act_state_req == 0)
543                         xpc_activate_IRQ_rcvd++;
544                 part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
545                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
546
547                 (*wakeup_hb_checker)++;
548         }
549 }
550
551 static irqreturn_t
552 xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
553 {
554         struct xpc_activate_mq_msghdr_uv *msg_hdr;
555         short partid;
556         struct xpc_partition *part;
557         int wakeup_hb_checker = 0;
558         int part_referenced;
559
560         while (1) {
561                 msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
562                 if (msg_hdr == NULL)
563                         break;
564
565                 partid = msg_hdr->partid;
566                 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
567                         dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
568                                 "received invalid partid=0x%x in message\n",
569                                 partid);
570                 } else {
571                         part = &xpc_partitions[partid];
572
573                         part_referenced = xpc_part_ref(part);
574                         xpc_handle_activate_mq_msg_uv(part, msg_hdr,
575                                                       part_referenced,
576                                                       &wakeup_hb_checker);
577                         if (part_referenced)
578                                 xpc_part_deref(part);
579                 }
580
581                 gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
582         }
583
584         if (wakeup_hb_checker)
585                 wake_up_interruptible(&xpc_activate_IRQ_wq);
586
587         return IRQ_HANDLED;
588 }
589
590 static enum xp_retval
591 xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
592                                 unsigned long gru_mq_desc_gpa)
593 {
594         enum xp_retval ret;
595
596         ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
597                                sizeof(struct gru_message_queue_desc));
598         if (ret == xpSuccess)
599                 gru_mq_desc->mq = NULL;
600
601         return ret;
602 }
603
604 static enum xp_retval
605 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
606                          int msg_type)
607 {
608         struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
609         struct xpc_partition_uv *part_uv = &part->sn.uv;
610         struct gru_message_queue_desc *gru_mq_desc;
611         unsigned long irq_flags;
612         enum xp_retval ret;
613
614         DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
615
616         msg_hdr->type = msg_type;
617         msg_hdr->partid = xp_partition_id;
618         msg_hdr->act_state = part->act_state;
619         msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
620
621         mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
622 again:
623         if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
624                 gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
625                 if (gru_mq_desc == NULL) {
626                         gru_mq_desc = kmalloc(sizeof(struct
627                                               gru_message_queue_desc),
628                                               GFP_ATOMIC);
629                         if (gru_mq_desc == NULL) {
630                                 ret = xpNoMemory;
631                                 goto done;
632                         }
633                         part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
634                 }
635
636                 ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
637                                                       part_uv->
638                                                       activate_gru_mq_desc_gpa);
639                 if (ret != xpSuccess)
640                         goto done;
641
642                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
643                 part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
644                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
645         }
646
647         /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
648         ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
649                                msg_size);
650         if (ret != xpSuccess) {
651                 smp_rmb();      /* ensure a fresh copy of part_uv->flags */
652                 if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
653                         goto again;
654         }
655 done:
656         mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
657         return ret;
658 }
659
660 static void
661 xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
662                               size_t msg_size, int msg_type)
663 {
664         enum xp_retval ret;
665
666         ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
667         if (unlikely(ret != xpSuccess))
668                 XPC_DEACTIVATE_PARTITION(part, ret);
669 }
670
671 static void
672 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
673                          void *msg, size_t msg_size, int msg_type)
674 {
675         struct xpc_partition *part = &xpc_partitions[ch->partid];
676         enum xp_retval ret;
677
678         ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
679         if (unlikely(ret != xpSuccess)) {
680                 if (irq_flags != NULL)
681                         spin_unlock_irqrestore(&ch->lock, *irq_flags);
682
683                 XPC_DEACTIVATE_PARTITION(part, ret);
684
685                 if (irq_flags != NULL)
686                         spin_lock_irqsave(&ch->lock, *irq_flags);
687         }
688 }
689
690 static void
691 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
692 {
693         unsigned long irq_flags;
694         struct xpc_partition_uv *part_uv = &part->sn.uv;
695
696         /*
697          * !!! Make our side think that the remote partition sent an activate
698          * !!! mq message our way by doing what the activate IRQ handler would
699          * !!! do had one really been sent.
700          */
701
702         spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
703         if (part_uv->act_state_req == 0)
704                 xpc_activate_IRQ_rcvd++;
705         part_uv->act_state_req = act_state_req;
706         spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
707
708         wake_up_interruptible(&xpc_activate_IRQ_wq);
709 }
710
711 static enum xp_retval
712 xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
713                                   size_t *len)
714 {
715         s64 status;
716         enum xp_retval ret;
717
718         status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
719                                           (u64 *)len);
720         if (status == BIOS_STATUS_SUCCESS)
721                 ret = xpSuccess;
722         else if (status == BIOS_STATUS_MORE_PASSES)
723                 ret = xpNeedMoreInfo;
724         else
725                 ret = xpBiosError;
726
727         return ret;
728 }
729
730 static int
731 xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
732 {
733         xpc_heartbeat_uv =
734             &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
735         rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
736         rp->sn.uv.activate_gru_mq_desc_gpa =
737             uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
738         return 0;
739 }
740
741 static void
742 xpc_allow_hb_uv(short partid)
743 {
744 }
745
746 static void
747 xpc_disallow_hb_uv(short partid)
748 {
749 }
750
751 static void
752 xpc_disallow_all_hbs_uv(void)
753 {
754 }
755
756 static void
757 xpc_increment_heartbeat_uv(void)
758 {
759         xpc_heartbeat_uv->value++;
760 }
761
762 static void
763 xpc_offline_heartbeat_uv(void)
764 {
765         xpc_increment_heartbeat_uv();
766         xpc_heartbeat_uv->offline = 1;
767 }
768
769 static void
770 xpc_online_heartbeat_uv(void)
771 {
772         xpc_increment_heartbeat_uv();
773         xpc_heartbeat_uv->offline = 0;
774 }
775
776 static void
777 xpc_heartbeat_init_uv(void)
778 {
779         xpc_heartbeat_uv->value = 1;
780         xpc_heartbeat_uv->offline = 0;
781 }
782
783 static void
784 xpc_heartbeat_exit_uv(void)
785 {
786         xpc_offline_heartbeat_uv();
787 }
788
789 static enum xp_retval
790 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
791 {
792         struct xpc_partition_uv *part_uv = &part->sn.uv;
793         enum xp_retval ret;
794
795         ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
796                                part_uv->heartbeat_gpa,
797                                sizeof(struct xpc_heartbeat_uv));
798         if (ret != xpSuccess)
799                 return ret;
800
801         if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
802             !part_uv->cached_heartbeat.offline) {
803
804                 ret = xpNoHeartbeat;
805         } else {
806                 part->last_heartbeat = part_uv->cached_heartbeat.value;
807         }
808         return ret;
809 }
810
811 static void
812 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
813                                     unsigned long remote_rp_gpa, int nasid)
814 {
815         short partid = remote_rp->SAL_partid;
816         struct xpc_partition *part = &xpc_partitions[partid];
817         struct xpc_activate_mq_msg_activate_req_uv msg;
818
819         part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
820         part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
821         part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
822         part->sn.uv.activate_gru_mq_desc_gpa =
823             remote_rp->sn.uv.activate_gru_mq_desc_gpa;
824
825         /*
826          * ??? Is it a good idea to make this conditional on what is
827          * ??? potentially stale state information?
828          */
829         if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
830                 msg.rp_gpa = uv_gpa(xpc_rsvd_page);
831                 msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
832                 msg.activate_gru_mq_desc_gpa =
833                     xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
834                 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
835                                            XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
836         }
837
838         if (part->act_state == XPC_P_AS_INACTIVE)
839                 xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
840 }
841
842 static void
843 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
844 {
845         xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
846 }
847
848 static void
849 xpc_request_partition_deactivation_uv(struct xpc_partition *part)
850 {
851         struct xpc_activate_mq_msg_deactivate_req_uv msg;
852
853         /*
854          * ??? Is it a good idea to make this conditional on what is
855          * ??? potentially stale state information?
856          */
857         if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
858             part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
859
860                 msg.reason = part->reason;
861                 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
862                                          XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
863         }
864 }
865
866 static void
867 xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
868 {
869         /* nothing needs to be done */
870         return;
871 }
872
873 static void
874 xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
875 {
876         head->first = NULL;
877         head->last = NULL;
878         spin_lock_init(&head->lock);
879         head->n_entries = 0;
880 }
881
882 static void *
883 xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
884 {
885         unsigned long irq_flags;
886         struct xpc_fifo_entry_uv *first;
887
888         spin_lock_irqsave(&head->lock, irq_flags);
889         first = head->first;
890         if (head->first != NULL) {
891                 head->first = first->next;
892                 if (head->first == NULL)
893                         head->last = NULL;
894
895                 head->n_entries--;
896                 BUG_ON(head->n_entries < 0);
897
898                 first->next = NULL;
899         }
900         spin_unlock_irqrestore(&head->lock, irq_flags);
901         return first;
902 }
903
904 static void
905 xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
906                       struct xpc_fifo_entry_uv *last)
907 {
908         unsigned long irq_flags;
909
910         last->next = NULL;
911         spin_lock_irqsave(&head->lock, irq_flags);
912         if (head->last != NULL)
913                 head->last->next = last;
914         else
915                 head->first = last;
916         head->last = last;
917         head->n_entries++;
918         spin_unlock_irqrestore(&head->lock, irq_flags);
919 }
920
921 static int
922 xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
923 {
924         return head->n_entries;
925 }
926
927 /*
928  * Setup the channel structures that are uv specific.
929  */
930 static enum xp_retval
931 xpc_setup_ch_structures_uv(struct xpc_partition *part)
932 {
933         struct xpc_channel_uv *ch_uv;
934         int ch_number;
935
936         for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
937                 ch_uv = &part->channels[ch_number].sn.uv;
938
939                 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
940                 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
941         }
942
943         return xpSuccess;
944 }
945
946 /*
947  * Teardown the channel structures that are uv specific.
948  */
949 static void
950 xpc_teardown_ch_structures_uv(struct xpc_partition *part)
951 {
952         /* nothing needs to be done */
953         return;
954 }
955
956 static enum xp_retval
957 xpc_make_first_contact_uv(struct xpc_partition *part)
958 {
959         struct xpc_activate_mq_msg_uv msg;
960
961         /*
962          * We send a sync msg to get the remote partition's remote_act_state
963          * updated to our current act_state which at this point should
964          * be XPC_P_AS_ACTIVATING.
965          */
966         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
967                                       XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
968
969         while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) ||
970                  (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) {
971
972                 dev_dbg(xpc_part, "waiting to make first contact with "
973                         "partition %d\n", XPC_PARTID(part));
974
975                 /* wait a 1/4 of a second or so */
976                 (void)msleep_interruptible(250);
977
978                 if (part->act_state == XPC_P_AS_DEACTIVATING)
979                         return part->reason;
980         }
981
982         return xpSuccess;
983 }
984
985 static u64
986 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
987 {
988         unsigned long irq_flags;
989         union xpc_channel_ctl_flags chctl;
990
991         spin_lock_irqsave(&part->chctl_lock, irq_flags);
992         chctl = part->chctl;
993         if (chctl.all_flags != 0)
994                 part->chctl.all_flags = 0;
995
996         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
997         return chctl.all_flags;
998 }
999
1000 static enum xp_retval
1001 xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
1002 {
1003         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1004         struct xpc_send_msg_slot_uv *msg_slot;
1005         unsigned long irq_flags;
1006         int nentries;
1007         int entry;
1008         size_t nbytes;
1009
1010         for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1011                 nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
1012                 ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1013                 if (ch_uv->send_msg_slots == NULL)
1014                         continue;
1015
1016                 for (entry = 0; entry < nentries; entry++) {
1017                         msg_slot = &ch_uv->send_msg_slots[entry];
1018
1019                         msg_slot->msg_slot_number = entry;
1020                         xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
1021                                               &msg_slot->next);
1022                 }
1023
1024                 spin_lock_irqsave(&ch->lock, irq_flags);
1025                 if (nentries < ch->local_nentries)
1026                         ch->local_nentries = nentries;
1027                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1028                 return xpSuccess;
1029         }
1030
1031         return xpNoMemory;
1032 }
1033
1034 static enum xp_retval
1035 xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
1036 {
1037         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1038         struct xpc_notify_mq_msg_uv *msg_slot;
1039         unsigned long irq_flags;
1040         int nentries;
1041         int entry;
1042         size_t nbytes;
1043
1044         for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1045                 nbytes = nentries * ch->entry_size;
1046                 ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1047                 if (ch_uv->recv_msg_slots == NULL)
1048                         continue;
1049
1050                 for (entry = 0; entry < nentries; entry++) {
1051                         msg_slot = ch_uv->recv_msg_slots +
1052                             entry * ch->entry_size;
1053
1054                         msg_slot->hdr.msg_slot_number = entry;
1055                 }
1056
1057                 spin_lock_irqsave(&ch->lock, irq_flags);
1058                 if (nentries < ch->remote_nentries)
1059                         ch->remote_nentries = nentries;
1060                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1061                 return xpSuccess;
1062         }
1063
1064         return xpNoMemory;
1065 }
1066
1067 /*
1068  * Allocate msg_slots associated with the channel.
1069  */
1070 static enum xp_retval
1071 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
1072 {
1073         static enum xp_retval ret;
1074         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1075
1076         DBUG_ON(ch->flags & XPC_C_SETUP);
1077
1078         ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
1079                                                    gru_message_queue_desc),
1080                                                    GFP_KERNEL);
1081         if (ch_uv->cached_notify_gru_mq_desc == NULL)
1082                 return xpNoMemory;
1083
1084         ret = xpc_allocate_send_msg_slot_uv(ch);
1085         if (ret == xpSuccess) {
1086
1087                 ret = xpc_allocate_recv_msg_slot_uv(ch);
1088                 if (ret != xpSuccess) {
1089                         kfree(ch_uv->send_msg_slots);
1090                         xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1091                 }
1092         }
1093         return ret;
1094 }
1095
1096 /*
1097  * Free up msg_slots and clear other stuff that were setup for the specified
1098  * channel.
1099  */
1100 static void
1101 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
1102 {
1103         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1104
1105         lockdep_assert_held(&ch->lock);
1106
1107         kfree(ch_uv->cached_notify_gru_mq_desc);
1108         ch_uv->cached_notify_gru_mq_desc = NULL;
1109
1110         if (ch->flags & XPC_C_SETUP) {
1111                 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1112                 kfree(ch_uv->send_msg_slots);
1113                 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1114                 kfree(ch_uv->recv_msg_slots);
1115         }
1116 }
1117
1118 static void
1119 xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1120 {
1121         struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
1122
1123         msg.ch_number = ch->number;
1124         msg.reason = ch->reason;
1125         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1126                                     XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
1127 }
1128
1129 static void
1130 xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1131 {
1132         struct xpc_activate_mq_msg_chctl_closereply_uv msg;
1133
1134         msg.ch_number = ch->number;
1135         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1136                                     XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
1137 }
1138
1139 static void
1140 xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1141 {
1142         struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
1143
1144         msg.ch_number = ch->number;
1145         msg.entry_size = ch->entry_size;
1146         msg.local_nentries = ch->local_nentries;
1147         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1148                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
1149 }
1150
1151 static void
1152 xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1153 {
1154         struct xpc_activate_mq_msg_chctl_openreply_uv msg;
1155
1156         msg.ch_number = ch->number;
1157         msg.local_nentries = ch->local_nentries;
1158         msg.remote_nentries = ch->remote_nentries;
1159         msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
1160         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1161                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
1162 }
1163
1164 static void
1165 xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1166 {
1167         struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
1168
1169         msg.ch_number = ch->number;
1170         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1171                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
1172 }
1173
1174 static void
1175 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1176 {
1177         unsigned long irq_flags;
1178
1179         spin_lock_irqsave(&part->chctl_lock, irq_flags);
1180         part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
1181         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1182
1183         xpc_wakeup_channel_mgr(part);
1184 }
1185
1186 static enum xp_retval
1187 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
1188                                unsigned long gru_mq_desc_gpa)
1189 {
1190         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1191
1192         DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
1193         return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
1194                                                gru_mq_desc_gpa);
1195 }
1196
1197 static void
1198 xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
1199 {
1200         struct xpc_activate_mq_msg_uv msg;
1201
1202         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1203                                       XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
1204 }
1205
1206 static void
1207 xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
1208 {
1209         struct xpc_activate_mq_msg_uv msg;
1210
1211         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1212                                       XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
1213 }
1214
1215 static void
1216 xpc_assume_partition_disengaged_uv(short partid)
1217 {
1218         struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
1219         unsigned long irq_flags;
1220
1221         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
1222         part_uv->flags &= ~XPC_P_ENGAGED_UV;
1223         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
1224 }
1225
1226 static int
1227 xpc_partition_engaged_uv(short partid)
1228 {
1229         return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
1230 }
1231
1232 static int
1233 xpc_any_partition_engaged_uv(void)
1234 {
1235         struct xpc_partition_uv *part_uv;
1236         short partid;
1237
1238         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
1239                 part_uv = &xpc_partitions[partid].sn.uv;
1240                 if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
1241                         return 1;
1242         }
1243         return 0;
1244 }
1245
1246 static enum xp_retval
1247 xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1248                          struct xpc_send_msg_slot_uv **address_of_msg_slot)
1249 {
1250         enum xp_retval ret;
1251         struct xpc_send_msg_slot_uv *msg_slot;
1252         struct xpc_fifo_entry_uv *entry;
1253
1254         while (1) {
1255                 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1256                 if (entry != NULL)
1257                         break;
1258
1259                 if (flags & XPC_NOWAIT)
1260                         return xpNoWait;
1261
1262                 ret = xpc_allocate_msg_wait(ch);
1263                 if (ret != xpInterrupted && ret != xpTimeout)
1264                         return ret;
1265         }
1266
1267         msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1268         *address_of_msg_slot = msg_slot;
1269         return xpSuccess;
1270 }
1271
1272 static void
1273 xpc_free_msg_slot_uv(struct xpc_channel *ch,
1274                      struct xpc_send_msg_slot_uv *msg_slot)
1275 {
1276         xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1277
1278         /* wakeup anyone waiting for a free msg slot */
1279         if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1280                 wake_up(&ch->msg_allocate_wq);
1281 }
1282
1283 static void
1284 xpc_notify_sender_uv(struct xpc_channel *ch,
1285                      struct xpc_send_msg_slot_uv *msg_slot,
1286                      enum xp_retval reason)
1287 {
1288         xpc_notify_func func = msg_slot->func;
1289
1290         if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1291
1292                 atomic_dec(&ch->n_to_notify);
1293
1294                 dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1295                         "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1296                         msg_slot->msg_slot_number, ch->partid, ch->number);
1297
1298                 func(reason, ch->partid, ch->number, msg_slot->key);
1299
1300                 dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1301                         "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1302                         msg_slot->msg_slot_number, ch->partid, ch->number);
1303         }
1304 }
1305
1306 static void
1307 xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1308                             struct xpc_notify_mq_msg_uv *msg)
1309 {
1310         struct xpc_send_msg_slot_uv *msg_slot;
1311         int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1312
1313         msg_slot = &ch->sn.uv.send_msg_slots[entry];
1314
1315         BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1316         msg_slot->msg_slot_number += ch->local_nentries;
1317
1318         if (msg_slot->func != NULL)
1319                 xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1320
1321         xpc_free_msg_slot_uv(ch, msg_slot);
1322 }
1323
1324 static void
1325 xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1326                             struct xpc_notify_mq_msg_uv *msg)
1327 {
1328         struct xpc_partition_uv *part_uv = &part->sn.uv;
1329         struct xpc_channel *ch;
1330         struct xpc_channel_uv *ch_uv;
1331         struct xpc_notify_mq_msg_uv *msg_slot;
1332         unsigned long irq_flags;
1333         int ch_number = msg->hdr.ch_number;
1334
1335         if (unlikely(ch_number >= part->nchannels)) {
1336                 dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1337                         "channel number=0x%x in message from partid=%d\n",
1338                         ch_number, XPC_PARTID(part));
1339
1340                 /* get hb checker to deactivate from the remote partition */
1341                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1342                 if (part_uv->act_state_req == 0)
1343                         xpc_activate_IRQ_rcvd++;
1344                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1345                 part_uv->reason = xpBadChannelNumber;
1346                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1347
1348                 wake_up_interruptible(&xpc_activate_IRQ_wq);
1349                 return;
1350         }
1351
1352         ch = &part->channels[ch_number];
1353         xpc_msgqueue_ref(ch);
1354
1355         if (!(ch->flags & XPC_C_CONNECTED)) {
1356                 xpc_msgqueue_deref(ch);
1357                 return;
1358         }
1359
1360         /* see if we're really dealing with an ACK for a previously sent msg */
1361         if (msg->hdr.size == 0) {
1362                 xpc_handle_notify_mq_ack_uv(ch, msg);
1363                 xpc_msgqueue_deref(ch);
1364                 return;
1365         }
1366
1367         /* we're dealing with a normal message sent via the notify_mq */
1368         ch_uv = &ch->sn.uv;
1369
1370         msg_slot = ch_uv->recv_msg_slots +
1371             (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
1372
1373         BUG_ON(msg_slot->hdr.size != 0);
1374
1375         memcpy(msg_slot, msg, msg->hdr.size);
1376
1377         xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1378
1379         if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1380                 /*
1381                  * If there is an existing idle kthread get it to deliver
1382                  * the payload, otherwise we'll have to get the channel mgr
1383                  * for this partition to create a kthread to do the delivery.
1384                  */
1385                 if (atomic_read(&ch->kthreads_idle) > 0)
1386                         wake_up_nr(&ch->idle_wq, 1);
1387                 else
1388                         xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1389         }
1390         xpc_msgqueue_deref(ch);
1391 }
1392
1393 static irqreturn_t
1394 xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1395 {
1396         struct xpc_notify_mq_msg_uv *msg;
1397         short partid;
1398         struct xpc_partition *part;
1399
1400         while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
1401                NULL) {
1402
1403                 partid = msg->hdr.partid;
1404                 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1405                         dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1406                                 "invalid partid=0x%x in message\n", partid);
1407                 } else {
1408                         part = &xpc_partitions[partid];
1409
1410                         if (xpc_part_ref(part)) {
1411                                 xpc_handle_notify_mq_msg_uv(part, msg);
1412                                 xpc_part_deref(part);
1413                         }
1414                 }
1415
1416                 gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
1417         }
1418
1419         return IRQ_HANDLED;
1420 }
1421
1422 static int
1423 xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1424 {
1425         return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1426 }
1427
1428 static void
1429 xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1430 {
1431         struct xpc_channel *ch = &part->channels[ch_number];
1432         int ndeliverable_payloads;
1433
1434         xpc_msgqueue_ref(ch);
1435
1436         ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1437
1438         if (ndeliverable_payloads > 0 &&
1439             (ch->flags & XPC_C_CONNECTED) &&
1440             (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1441
1442                 xpc_activate_kthreads(ch, ndeliverable_payloads);
1443         }
1444
1445         xpc_msgqueue_deref(ch);
1446 }
1447
1448 static enum xp_retval
1449 xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1450                     u16 payload_size, u8 notify_type, xpc_notify_func func,
1451                     void *key)
1452 {
1453         enum xp_retval ret = xpSuccess;
1454         struct xpc_send_msg_slot_uv *msg_slot = NULL;
1455         struct xpc_notify_mq_msg_uv *msg;
1456         u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1457         size_t msg_size;
1458
1459         DBUG_ON(notify_type != XPC_N_CALL);
1460
1461         msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1462         if (msg_size > ch->entry_size)
1463                 return xpPayloadTooBig;
1464
1465         xpc_msgqueue_ref(ch);
1466
1467         if (ch->flags & XPC_C_DISCONNECTING) {
1468                 ret = ch->reason;
1469                 goto out_1;
1470         }
1471         if (!(ch->flags & XPC_C_CONNECTED)) {
1472                 ret = xpNotConnected;
1473                 goto out_1;
1474         }
1475
1476         ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1477         if (ret != xpSuccess)
1478                 goto out_1;
1479
1480         if (func != NULL) {
1481                 atomic_inc(&ch->n_to_notify);
1482
1483                 msg_slot->key = key;
1484                 smp_wmb(); /* a non-NULL func must hit memory after the key */
1485                 msg_slot->func = func;
1486
1487                 if (ch->flags & XPC_C_DISCONNECTING) {
1488                         ret = ch->reason;
1489                         goto out_2;
1490                 }
1491         }
1492
1493         msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1494         msg->hdr.partid = xp_partition_id;
1495         msg->hdr.ch_number = ch->number;
1496         msg->hdr.size = msg_size;
1497         msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1498         memcpy(&msg->payload, payload, payload_size);
1499
1500         ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1501                                msg_size);
1502         if (ret == xpSuccess)
1503                 goto out_1;
1504
1505         XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1506 out_2:
1507         if (func != NULL) {
1508                 /*
1509                  * Try to NULL the msg_slot's func field. If we fail, then
1510                  * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1511                  * case we need to pretend we succeeded to send the message
1512                  * since the user will get a callout for the disconnect error
1513                  * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1514                  * error returned here will confuse them. Additionally, since
1515                  * in this case the channel is being disconnected we don't need
1516                  * to put the msg_slot back on the free list.
1517                  */
1518                 if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1519                         ret = xpSuccess;
1520                         goto out_1;
1521                 }
1522
1523                 msg_slot->key = NULL;
1524                 atomic_dec(&ch->n_to_notify);
1525         }
1526         xpc_free_msg_slot_uv(ch, msg_slot);
1527 out_1:
1528         xpc_msgqueue_deref(ch);
1529         return ret;
1530 }
1531
1532 /*
1533  * Tell the callers of xpc_send_notify() that the status of their payloads
1534  * is unknown because the channel is now disconnecting.
1535  *
1536  * We don't worry about putting these msg_slots on the free list since the
1537  * msg_slots themselves are about to be kfree'd.
1538  */
1539 static void
1540 xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1541 {
1542         struct xpc_send_msg_slot_uv *msg_slot;
1543         int entry;
1544
1545         DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1546
1547         for (entry = 0; entry < ch->local_nentries; entry++) {
1548
1549                 if (atomic_read(&ch->n_to_notify) == 0)
1550                         break;
1551
1552                 msg_slot = &ch->sn.uv.send_msg_slots[entry];
1553                 if (msg_slot->func != NULL)
1554                         xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1555         }
1556 }
1557
1558 /*
1559  * Get the next deliverable message's payload.
1560  */
1561 static void *
1562 xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1563 {
1564         struct xpc_fifo_entry_uv *entry;
1565         struct xpc_notify_mq_msg_uv *msg;
1566         void *payload = NULL;
1567
1568         if (!(ch->flags & XPC_C_DISCONNECTING)) {
1569                 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1570                 if (entry != NULL) {
1571                         msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1572                                            hdr.u.next);
1573                         payload = &msg->payload;
1574                 }
1575         }
1576         return payload;
1577 }
1578
1579 static void
1580 xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1581 {
1582         struct xpc_notify_mq_msg_uv *msg;
1583         enum xp_retval ret;
1584
1585         msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1586
1587         /* return an ACK to the sender of this message */
1588
1589         msg->hdr.partid = xp_partition_id;
1590         msg->hdr.size = 0;      /* size of zero indicates this is an ACK */
1591
1592         ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1593                                sizeof(struct xpc_notify_mq_msghdr_uv));
1594         if (ret != xpSuccess)
1595                 XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1596 }
1597
1598 static const struct xpc_arch_operations xpc_arch_ops_uv = {
1599         .setup_partitions = xpc_setup_partitions_uv,
1600         .teardown_partitions = xpc_teardown_partitions_uv,
1601         .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
1602         .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
1603         .setup_rsvd_page = xpc_setup_rsvd_page_uv,
1604
1605         .allow_hb = xpc_allow_hb_uv,
1606         .disallow_hb = xpc_disallow_hb_uv,
1607         .disallow_all_hbs = xpc_disallow_all_hbs_uv,
1608         .increment_heartbeat = xpc_increment_heartbeat_uv,
1609         .offline_heartbeat = xpc_offline_heartbeat_uv,
1610         .online_heartbeat = xpc_online_heartbeat_uv,
1611         .heartbeat_init = xpc_heartbeat_init_uv,
1612         .heartbeat_exit = xpc_heartbeat_exit_uv,
1613         .get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
1614
1615         .request_partition_activation =
1616                 xpc_request_partition_activation_uv,
1617         .request_partition_reactivation =
1618                 xpc_request_partition_reactivation_uv,
1619         .request_partition_deactivation =
1620                 xpc_request_partition_deactivation_uv,
1621         .cancel_partition_deactivation_request =
1622                 xpc_cancel_partition_deactivation_request_uv,
1623
1624         .setup_ch_structures = xpc_setup_ch_structures_uv,
1625         .teardown_ch_structures = xpc_teardown_ch_structures_uv,
1626
1627         .make_first_contact = xpc_make_first_contact_uv,
1628
1629         .get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
1630         .send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
1631         .send_chctl_closereply = xpc_send_chctl_closereply_uv,
1632         .send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
1633         .send_chctl_openreply = xpc_send_chctl_openreply_uv,
1634         .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
1635         .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
1636
1637         .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
1638
1639         .setup_msg_structures = xpc_setup_msg_structures_uv,
1640         .teardown_msg_structures = xpc_teardown_msg_structures_uv,
1641
1642         .indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
1643         .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
1644         .assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
1645         .partition_engaged = xpc_partition_engaged_uv,
1646         .any_partition_engaged = xpc_any_partition_engaged_uv,
1647
1648         .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
1649         .send_payload = xpc_send_payload_uv,
1650         .get_deliverable_payload = xpc_get_deliverable_payload_uv,
1651         .received_payload = xpc_received_payload_uv,
1652         .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
1653 };
1654
1655 static int
1656 xpc_init_mq_node(int nid)
1657 {
1658         int cpu;
1659
1660         cpus_read_lock();
1661
1662         for_each_cpu(cpu, cpumask_of_node(nid)) {
1663                 xpc_activate_mq_uv =
1664                         xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid,
1665                                              XPC_ACTIVATE_IRQ_NAME,
1666                                              xpc_handle_activate_IRQ_uv);
1667                 if (!IS_ERR(xpc_activate_mq_uv))
1668                         break;
1669         }
1670         if (IS_ERR(xpc_activate_mq_uv)) {
1671                 cpus_read_unlock();
1672                 return PTR_ERR(xpc_activate_mq_uv);
1673         }
1674
1675         for_each_cpu(cpu, cpumask_of_node(nid)) {
1676                 xpc_notify_mq_uv =
1677                         xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid,
1678                                              XPC_NOTIFY_IRQ_NAME,
1679                                              xpc_handle_notify_IRQ_uv);
1680                 if (!IS_ERR(xpc_notify_mq_uv))
1681                         break;
1682         }
1683         if (IS_ERR(xpc_notify_mq_uv)) {
1684                 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1685                 cpus_read_unlock();
1686                 return PTR_ERR(xpc_notify_mq_uv);
1687         }
1688
1689         cpus_read_unlock();
1690         return 0;
1691 }
1692
1693 int
1694 xpc_init_uv(void)
1695 {
1696         int nid;
1697         int ret = 0;
1698
1699         xpc_arch_ops = xpc_arch_ops_uv;
1700
1701         if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1702                 dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1703                         XPC_MSG_HDR_MAX_SIZE);
1704                 return -E2BIG;
1705         }
1706
1707         if (xpc_mq_node < 0)
1708                 for_each_online_node(nid) {
1709                         ret = xpc_init_mq_node(nid);
1710
1711                         if (!ret)
1712                                 break;
1713                 }
1714         else
1715                 ret = xpc_init_mq_node(xpc_mq_node);
1716
1717         if (ret < 0)
1718                 dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n",
1719                         -ret);
1720
1721         return ret;
1722 }
1723
1724 void
1725 xpc_exit_uv(void)
1726 {
1727         xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1728         xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1729 }
1730
1731 module_param(xpc_mq_node, int, 0);
1732 MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");