GNU Linux-libre 4.19.264-gnu1
[releases.git] / arch / powerpc / platforms / powernv / memtrace.c
1 /*
2  * Copyright (C) IBM Corporation, 2014, 2017
3  * Anton Blanchard, Rashmica Gupta.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  */
10
11 #define pr_fmt(fmt) "memtrace: " fmt
12
13 #include <linux/bitops.h>
14 #include <linux/string.h>
15 #include <linux/memblock.h>
16 #include <linux/init.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs.h>
19 #include <linux/debugfs.h>
20 #include <linux/slab.h>
21 #include <linux/memory.h>
22 #include <linux/memory_hotplug.h>
23 #include <asm/machdep.h>
24 #include <asm/debugfs.h>
25
26 /* This enables us to keep track of the memory removed from each node. */
27 struct memtrace_entry {
28         void *mem;
29         u64 start;
30         u64 size;
31         u32 nid;
32         struct dentry *dir;
33         char name[16];
34 };
35
36 static DEFINE_MUTEX(memtrace_mutex);
37 static u64 memtrace_size;
38
39 static struct memtrace_entry *memtrace_array;
40 static unsigned int memtrace_array_nr;
41
42
43 static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
44                              size_t count, loff_t *ppos)
45 {
46         struct memtrace_entry *ent = filp->private_data;
47
48         return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
49 }
50
51 static const struct file_operations memtrace_fops = {
52         .llseek = default_llseek,
53         .read   = memtrace_read,
54         .open   = simple_open,
55 };
56
57 static int check_memblock_online(struct memory_block *mem, void *arg)
58 {
59         if (mem->state != MEM_ONLINE)
60                 return -1;
61
62         return 0;
63 }
64
65 static int change_memblock_state(struct memory_block *mem, void *arg)
66 {
67         unsigned long state = (unsigned long)arg;
68
69         mem->state = state;
70
71         return 0;
72 }
73
74 static void memtrace_clear_range(unsigned long start_pfn,
75                                  unsigned long nr_pages)
76 {
77         unsigned long pfn;
78
79         /*
80          * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
81          * does not apply, avoid passing around "struct page" and use
82          * clear_page() instead directly.
83          */
84         for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
85                 if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
86                         cond_resched();
87                 clear_page(__va(PFN_PHYS(pfn)));
88         }
89 }
90
91 /* called with device_hotplug_lock held */
92 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
93 {
94         u64 end_pfn = start_pfn + nr_pages - 1;
95
96         if (walk_memory_range(start_pfn, end_pfn, NULL,
97             check_memblock_online))
98                 return false;
99
100         walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
101                           change_memblock_state);
102
103         if (offline_pages(start_pfn, nr_pages)) {
104                 walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
105                                   change_memblock_state);
106                 return false;
107         }
108
109         walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
110                           change_memblock_state);
111
112
113         return true;
114 }
115
116 static u64 memtrace_alloc_node(u32 nid, u64 size)
117 {
118         u64 start_pfn, end_pfn, nr_pages, pfn;
119         u64 base_pfn;
120         u64 bytes = memory_block_size_bytes();
121
122         if (!node_spanned_pages(nid))
123                 return 0;
124
125         start_pfn = node_start_pfn(nid);
126         end_pfn = node_end_pfn(nid);
127         nr_pages = size >> PAGE_SHIFT;
128
129         /* Trace memory needs to be aligned to the size */
130         end_pfn = round_down(end_pfn - nr_pages, nr_pages);
131
132         lock_device_hotplug();
133         for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
134                 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
135                         /*
136                          * Clear the range while we still have a linear
137                          * mapping.
138                          */
139                         memtrace_clear_range(base_pfn, nr_pages);
140                         /*
141                          * Remove memory in memory block size chunks so that
142                          * iomem resources are always split to the same size and
143                          * we never try to remove memory that spans two iomem
144                          * resources.
145                          */
146                         end_pfn = base_pfn + nr_pages;
147                         for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
148                                 __remove_memory(nid, pfn << PAGE_SHIFT, bytes);
149                         }
150                         unlock_device_hotplug();
151                         return base_pfn << PAGE_SHIFT;
152                 }
153         }
154         unlock_device_hotplug();
155
156         return 0;
157 }
158
159 static int memtrace_init_regions_runtime(u64 size)
160 {
161         u32 nid;
162         u64 m;
163
164         memtrace_array = kcalloc(num_online_nodes(),
165                                 sizeof(struct memtrace_entry), GFP_KERNEL);
166         if (!memtrace_array) {
167                 pr_err("Failed to allocate memtrace_array\n");
168                 return -EINVAL;
169         }
170
171         for_each_online_node(nid) {
172                 m = memtrace_alloc_node(nid, size);
173
174                 /*
175                  * A node might not have any local memory, so warn but
176                  * continue on.
177                  */
178                 if (!m) {
179                         pr_err("Failed to allocate trace memory on node %d\n", nid);
180                         continue;
181                 }
182
183                 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
184
185                 memtrace_array[memtrace_array_nr].start = m;
186                 memtrace_array[memtrace_array_nr].size = size;
187                 memtrace_array[memtrace_array_nr].nid = nid;
188                 memtrace_array_nr++;
189         }
190
191         return 0;
192 }
193
194 static struct dentry *memtrace_debugfs_dir;
195
196 static int memtrace_init_debugfs(void)
197 {
198         int ret = 0;
199         int i;
200
201         for (i = 0; i < memtrace_array_nr; i++) {
202                 struct dentry *dir;
203                 struct memtrace_entry *ent = &memtrace_array[i];
204
205                 ent->mem = ioremap(ent->start, ent->size);
206                 /* Warn but continue on */
207                 if (!ent->mem) {
208                         pr_err("Failed to map trace memory at 0x%llx\n",
209                                  ent->start);
210                         ret = -1;
211                         continue;
212                 }
213
214                 snprintf(ent->name, 16, "%08x", ent->nid);
215                 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
216                 if (!dir) {
217                         pr_err("Failed to create debugfs directory for node %d\n",
218                                 ent->nid);
219                         return -1;
220                 }
221
222                 ent->dir = dir;
223                 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
224                 debugfs_create_x64("start", 0400, dir, &ent->start);
225                 debugfs_create_x64("size", 0400, dir, &ent->size);
226         }
227
228         return ret;
229 }
230
231 static int online_mem_block(struct memory_block *mem, void *arg)
232 {
233         return device_online(&mem->dev);
234 }
235
236 /*
237  * Iterate through the chunks of memory we have removed from the kernel
238  * and attempt to add them back to the kernel.
239  */
240 static int memtrace_online(void)
241 {
242         int i, ret = 0;
243         struct memtrace_entry *ent;
244
245         for (i = memtrace_array_nr - 1; i >= 0; i--) {
246                 ent = &memtrace_array[i];
247
248                 /* We have onlined this chunk previously */
249                 if (ent->nid == -1)
250                         continue;
251
252                 /* Remove from io mappings */
253                 if (ent->mem) {
254                         iounmap(ent->mem);
255                         ent->mem = 0;
256                 }
257
258                 if (add_memory(ent->nid, ent->start, ent->size)) {
259                         pr_err("Failed to add trace memory to node %d\n",
260                                 ent->nid);
261                         ret += 1;
262                         continue;
263                 }
264
265                 /*
266                  * If kernel isn't compiled with the auto online option
267                  * we need to online the memory ourselves.
268                  */
269                 if (!memhp_auto_online) {
270                         lock_device_hotplug();
271                         walk_memory_range(PFN_DOWN(ent->start),
272                                           PFN_UP(ent->start + ent->size - 1),
273                                           NULL, online_mem_block);
274                         unlock_device_hotplug();
275                 }
276
277                 /*
278                  * Memory was added successfully so clean up references to it
279                  * so on reentry we can tell that this chunk was added.
280                  */
281                 debugfs_remove_recursive(ent->dir);
282                 pr_info("Added trace memory back to node %d\n", ent->nid);
283                 ent->size = ent->start = ent->nid = -1;
284         }
285         if (ret)
286                 return ret;
287
288         /* If all chunks of memory were added successfully, reset globals */
289         kfree(memtrace_array);
290         memtrace_array = NULL;
291         memtrace_size = 0;
292         memtrace_array_nr = 0;
293         return 0;
294 }
295
296 static int memtrace_enable_set(void *data, u64 val)
297 {
298         int rc = -EAGAIN;
299         u64 bytes;
300
301         /*
302          * Don't attempt to do anything if size isn't aligned to a memory
303          * block or equal to zero.
304          */
305         bytes = memory_block_size_bytes();
306         if (val & (bytes - 1)) {
307                 pr_err("Value must be aligned with 0x%llx\n", bytes);
308                 return -EINVAL;
309         }
310
311         mutex_lock(&memtrace_mutex);
312
313         /* Re-add/online previously removed/offlined memory */
314         if (memtrace_size) {
315                 if (memtrace_online())
316                         goto out_unlock;
317         }
318
319         if (!val) {
320                 rc = 0;
321                 goto out_unlock;
322         }
323
324         /* Offline and remove memory */
325         if (memtrace_init_regions_runtime(val))
326                 goto out_unlock;
327
328         if (memtrace_init_debugfs())
329                 goto out_unlock;
330
331         memtrace_size = val;
332         rc = 0;
333 out_unlock:
334         mutex_unlock(&memtrace_mutex);
335         return rc;
336 }
337
338 static int memtrace_enable_get(void *data, u64 *val)
339 {
340         *val = memtrace_size;
341         return 0;
342 }
343
344 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
345                                         memtrace_enable_set, "0x%016llx\n");
346
347 static int memtrace_init(void)
348 {
349         memtrace_debugfs_dir = debugfs_create_dir("memtrace",
350                                                   powerpc_debugfs_root);
351         if (!memtrace_debugfs_dir)
352                 return -1;
353
354         debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
355                             NULL, &memtrace_init_fops);
356
357         return 0;
358 }
359 machine_device_initcall(powernv, memtrace_init);