GNU Linux-libre 4.14.303-gnu1
[releases.git] / arch / powerpc / platforms / powernv / memtrace.c
1 /*
2  * Copyright (C) IBM Corporation, 2014, 2017
3  * Anton Blanchard, Rashmica Gupta.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  */
10
11 #define pr_fmt(fmt) "memtrace: " fmt
12
13 #include <linux/bitops.h>
14 #include <linux/string.h>
15 #include <linux/memblock.h>
16 #include <linux/init.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs.h>
19 #include <linux/debugfs.h>
20 #include <linux/slab.h>
21 #include <linux/memory.h>
22 #include <linux/memory_hotplug.h>
23 #include <asm/machdep.h>
24 #include <asm/debugfs.h>
25
26 /* This enables us to keep track of the memory removed from each node. */
27 struct memtrace_entry {
28         void *mem;
29         u64 start;
30         u64 size;
31         u32 nid;
32         struct dentry *dir;
33         char name[16];
34 };
35
36 static u64 memtrace_size;
37
38 static struct memtrace_entry *memtrace_array;
39 static unsigned int memtrace_array_nr;
40
41
42 static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
43                              size_t count, loff_t *ppos)
44 {
45         struct memtrace_entry *ent = filp->private_data;
46
47         return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
48 }
49
50 static bool valid_memtrace_range(struct memtrace_entry *dev,
51                                  unsigned long start, unsigned long size)
52 {
53         if ((start >= dev->start) &&
54             ((start + size) <= (dev->start + dev->size)))
55                 return true;
56
57         return false;
58 }
59
60 static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
61 {
62         unsigned long size = vma->vm_end - vma->vm_start;
63         struct memtrace_entry *dev = filp->private_data;
64
65         if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size))
66                 return -EINVAL;
67
68         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
69
70         if (remap_pfn_range(vma, vma->vm_start,
71                             vma->vm_pgoff + (dev->start >> PAGE_SHIFT),
72                             size, vma->vm_page_prot))
73                 return -EAGAIN;
74
75         return 0;
76 }
77
78 static const struct file_operations memtrace_fops = {
79         .llseek = default_llseek,
80         .read   = memtrace_read,
81         .mmap   = memtrace_mmap,
82         .open   = simple_open,
83 };
84
85 static int check_memblock_online(struct memory_block *mem, void *arg)
86 {
87         if (mem->state != MEM_ONLINE)
88                 return -1;
89
90         return 0;
91 }
92
93 static int change_memblock_state(struct memory_block *mem, void *arg)
94 {
95         unsigned long state = (unsigned long)arg;
96
97         mem->state = state;
98
99         return 0;
100 }
101
102 static void memtrace_clear_range(unsigned long start_pfn,
103                                  unsigned long nr_pages)
104 {
105         unsigned long pfn;
106
107         /*
108          * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
109          * does not apply, avoid passing around "struct page" and use
110          * clear_page() instead directly.
111          */
112         for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
113                 if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
114                         cond_resched();
115                 clear_page(__va(PFN_PHYS(pfn)));
116         }
117 }
118
119 /* called with device_hotplug_lock held */
120 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
121 {
122         u64 end_pfn = start_pfn + nr_pages - 1;
123
124         if (walk_memory_range(start_pfn, end_pfn, NULL,
125             check_memblock_online))
126                 return false;
127
128         walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
129                           change_memblock_state);
130
131         if (offline_pages(start_pfn, nr_pages)) {
132                 walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
133                                   change_memblock_state);
134                 return false;
135         }
136
137         walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
138                           change_memblock_state);
139
140
141         return true;
142 }
143
144 static u64 memtrace_alloc_node(u32 nid, u64 size)
145 {
146         u64 start_pfn, end_pfn, nr_pages, pfn;
147         u64 base_pfn;
148         u64 bytes = memory_block_size_bytes();
149
150         if (!NODE_DATA(nid) || !node_spanned_pages(nid))
151                 return 0;
152
153         start_pfn = node_start_pfn(nid);
154         end_pfn = node_end_pfn(nid);
155         nr_pages = size >> PAGE_SHIFT;
156
157         /* Trace memory needs to be aligned to the size */
158         end_pfn = round_down(end_pfn - nr_pages, nr_pages);
159
160         lock_device_hotplug();
161         for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
162                 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
163                         /*
164                          * Clear the range while we still have a linear
165                          * mapping.
166                          */
167                         memtrace_clear_range(base_pfn, nr_pages);
168                         /*
169                          * Remove memory in memory block size chunks so that
170                          * iomem resources are always split to the same size and
171                          * we never try to remove memory that spans two iomem
172                          * resources.
173                          */
174                         end_pfn = base_pfn + nr_pages;
175                         for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
176                                 remove_memory(nid, pfn << PAGE_SHIFT, bytes);
177                         }
178                         unlock_device_hotplug();
179                         return base_pfn << PAGE_SHIFT;
180                 }
181         }
182         unlock_device_hotplug();
183
184         return 0;
185 }
186
187 static int memtrace_init_regions_runtime(u64 size)
188 {
189         u32 nid;
190         u64 m;
191
192         memtrace_array = kcalloc(num_online_nodes(),
193                                 sizeof(struct memtrace_entry), GFP_KERNEL);
194         if (!memtrace_array) {
195                 pr_err("Failed to allocate memtrace_array\n");
196                 return -EINVAL;
197         }
198
199         for_each_online_node(nid) {
200                 m = memtrace_alloc_node(nid, size);
201
202                 /*
203                  * A node might not have any local memory, so warn but
204                  * continue on.
205                  */
206                 if (!m) {
207                         pr_err("Failed to allocate trace memory on node %d\n", nid);
208                         continue;
209                 }
210
211                 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
212
213                 memtrace_array[memtrace_array_nr].start = m;
214                 memtrace_array[memtrace_array_nr].size = size;
215                 memtrace_array[memtrace_array_nr].nid = nid;
216                 memtrace_array_nr++;
217         }
218
219         return 0;
220 }
221
222 static struct dentry *memtrace_debugfs_dir;
223
224 static int memtrace_init_debugfs(void)
225 {
226         int ret = 0;
227         int i;
228
229         for (i = 0; i < memtrace_array_nr; i++) {
230                 struct dentry *dir;
231                 struct memtrace_entry *ent = &memtrace_array[i];
232
233                 ent->mem = ioremap(ent->start, ent->size);
234                 /* Warn but continue on */
235                 if (!ent->mem) {
236                         pr_err("Failed to map trace memory at 0x%llx\n",
237                                  ent->start);
238                         ret = -1;
239                         continue;
240                 }
241
242                 snprintf(ent->name, 16, "%08x", ent->nid);
243                 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
244                 if (!dir)
245                         return -1;
246
247                 ent->dir = dir;
248                 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
249                 debugfs_create_x64("start", 0400, dir, &ent->start);
250                 debugfs_create_x64("size", 0400, dir, &ent->size);
251         }
252
253         return ret;
254 }
255
256 static int memtrace_enable_set(void *data, u64 val)
257 {
258         if (memtrace_size)
259                 return -EINVAL;
260
261         if (!val)
262                 return -EINVAL;
263
264         /* Make sure size is aligned to a memory block */
265         if (val & (memory_block_size_bytes() - 1))
266                 return -EINVAL;
267
268         if (memtrace_init_regions_runtime(val))
269                 return -EINVAL;
270
271         if (memtrace_init_debugfs())
272                 return -EINVAL;
273
274         memtrace_size = val;
275
276         return 0;
277 }
278
279 static int memtrace_enable_get(void *data, u64 *val)
280 {
281         *val = memtrace_size;
282         return 0;
283 }
284
285 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
286                                         memtrace_enable_set, "0x%016llx\n");
287
288 static int memtrace_init(void)
289 {
290         memtrace_debugfs_dir = debugfs_create_dir("memtrace",
291                                                   powerpc_debugfs_root);
292         if (!memtrace_debugfs_dir)
293                 return -1;
294
295         debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
296                             NULL, &memtrace_init_fops);
297
298         return 0;
299 }
300 machine_device_initcall(powernv, memtrace_init);