GNU Linux-libre 4.19.245-gnu1
[releases.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <asm/processor.h>
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40
41 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
42 static int kfd_open(struct inode *, struct file *);
43 static int kfd_mmap(struct file *, struct vm_area_struct *);
44
45 static const char kfd_dev_name[] = "kfd";
46
47 static const struct file_operations kfd_fops = {
48         .owner = THIS_MODULE,
49         .unlocked_ioctl = kfd_ioctl,
50         .compat_ioctl = kfd_ioctl,
51         .open = kfd_open,
52         .mmap = kfd_mmap,
53 };
54
55 static int kfd_char_dev_major = -1;
56 static struct class *kfd_class;
57 struct device *kfd_device;
58
59 int kfd_chardev_init(void)
60 {
61         int err = 0;
62
63         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
64         err = kfd_char_dev_major;
65         if (err < 0)
66                 goto err_register_chrdev;
67
68         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
69         err = PTR_ERR(kfd_class);
70         if (IS_ERR(kfd_class))
71                 goto err_class_create;
72
73         kfd_device = device_create(kfd_class, NULL,
74                                         MKDEV(kfd_char_dev_major, 0),
75                                         NULL, kfd_dev_name);
76         err = PTR_ERR(kfd_device);
77         if (IS_ERR(kfd_device))
78                 goto err_device_create;
79
80         return 0;
81
82 err_device_create:
83         class_destroy(kfd_class);
84 err_class_create:
85         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
86 err_register_chrdev:
87         return err;
88 }
89
90 void kfd_chardev_exit(void)
91 {
92         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
93         class_destroy(kfd_class);
94         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
95 }
96
97 struct device *kfd_chardev(void)
98 {
99         return kfd_device;
100 }
101
102
103 static int kfd_open(struct inode *inode, struct file *filep)
104 {
105         struct kfd_process *process;
106         bool is_32bit_user_mode;
107
108         if (iminor(inode) != 0)
109                 return -ENODEV;
110
111         is_32bit_user_mode = in_compat_syscall();
112
113         if (is_32bit_user_mode) {
114                 dev_warn(kfd_device,
115                         "Process %d (32-bit) failed to open /dev/kfd\n"
116                         "32-bit processes are not supported by amdkfd\n",
117                         current->pid);
118                 return -EPERM;
119         }
120
121         process = kfd_create_process(filep);
122         if (IS_ERR(process))
123                 return PTR_ERR(process);
124
125         if (kfd_is_locked())
126                 return -EAGAIN;
127
128         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
129                 process->pasid, process->is_32bit_user_mode);
130
131         return 0;
132 }
133
134 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
135                                         void *data)
136 {
137         struct kfd_ioctl_get_version_args *args = data;
138
139         args->major_version = KFD_IOCTL_MAJOR_VERSION;
140         args->minor_version = KFD_IOCTL_MINOR_VERSION;
141
142         return 0;
143 }
144
145 static int set_queue_properties_from_user(struct queue_properties *q_properties,
146                                 struct kfd_ioctl_create_queue_args *args)
147 {
148         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
149                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
150                 return -EINVAL;
151         }
152
153         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
154                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
155                 return -EINVAL;
156         }
157
158         if ((args->ring_base_address) &&
159                 (!access_ok(VERIFY_WRITE,
160                         (const void __user *) args->ring_base_address,
161                         sizeof(uint64_t)))) {
162                 pr_err("Can't access ring base address\n");
163                 return -EFAULT;
164         }
165
166         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
167                 pr_err("Ring size must be a power of 2 or 0\n");
168                 return -EINVAL;
169         }
170
171         if (!access_ok(VERIFY_WRITE,
172                         (const void __user *) args->read_pointer_address,
173                         sizeof(uint32_t))) {
174                 pr_err("Can't access read pointer\n");
175                 return -EFAULT;
176         }
177
178         if (!access_ok(VERIFY_WRITE,
179                         (const void __user *) args->write_pointer_address,
180                         sizeof(uint32_t))) {
181                 pr_err("Can't access write pointer\n");
182                 return -EFAULT;
183         }
184
185         if (args->eop_buffer_address &&
186                 !access_ok(VERIFY_WRITE,
187                         (const void __user *) args->eop_buffer_address,
188                         sizeof(uint32_t))) {
189                 pr_debug("Can't access eop buffer");
190                 return -EFAULT;
191         }
192
193         if (args->ctx_save_restore_address &&
194                 !access_ok(VERIFY_WRITE,
195                         (const void __user *) args->ctx_save_restore_address,
196                         sizeof(uint32_t))) {
197                 pr_debug("Can't access ctx save restore buffer");
198                 return -EFAULT;
199         }
200
201         q_properties->is_interop = false;
202         q_properties->queue_percent = args->queue_percentage;
203         q_properties->priority = args->queue_priority;
204         q_properties->queue_address = args->ring_base_address;
205         q_properties->queue_size = args->ring_size;
206         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
207         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
208         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
209         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
210         q_properties->ctx_save_restore_area_address =
211                         args->ctx_save_restore_address;
212         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
213         q_properties->ctl_stack_size = args->ctl_stack_size;
214         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
215                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
216                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
217         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
218                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
219         else
220                 return -ENOTSUPP;
221
222         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
223                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
224         else
225                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
226
227         pr_debug("Queue Percentage: %d, %d\n",
228                         q_properties->queue_percent, args->queue_percentage);
229
230         pr_debug("Queue Priority: %d, %d\n",
231                         q_properties->priority, args->queue_priority);
232
233         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
234                         q_properties->queue_address, args->ring_base_address);
235
236         pr_debug("Queue Size: 0x%llX, %u\n",
237                         q_properties->queue_size, args->ring_size);
238
239         pr_debug("Queue r/w Pointers: %px, %px\n",
240                         q_properties->read_ptr,
241                         q_properties->write_ptr);
242
243         pr_debug("Queue Format: %d\n", q_properties->format);
244
245         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
246
247         pr_debug("Queue CTX save area: 0x%llX\n",
248                         q_properties->ctx_save_restore_area_address);
249
250         return 0;
251 }
252
253 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
254                                         void *data)
255 {
256         struct kfd_ioctl_create_queue_args *args = data;
257         struct kfd_dev *dev;
258         int err = 0;
259         unsigned int queue_id;
260         struct kfd_process_device *pdd;
261         struct queue_properties q_properties;
262
263         memset(&q_properties, 0, sizeof(struct queue_properties));
264
265         pr_debug("Creating queue ioctl\n");
266
267         err = set_queue_properties_from_user(&q_properties, args);
268         if (err)
269                 return err;
270
271         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
272         dev = kfd_device_by_id(args->gpu_id);
273         if (!dev) {
274                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
275                 return -EINVAL;
276         }
277
278         mutex_lock(&p->mutex);
279
280         pdd = kfd_bind_process_to_device(dev, p);
281         if (IS_ERR(pdd)) {
282                 err = -ESRCH;
283                 goto err_bind_process;
284         }
285
286         pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
287                         p->pasid,
288                         dev->id);
289
290         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
291         if (err != 0)
292                 goto err_create_queue;
293
294         args->queue_id = queue_id;
295
296
297         /* Return gpu_id as doorbell offset for mmap usage */
298         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
299         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
300         args->doorbell_offset <<= PAGE_SHIFT;
301         if (KFD_IS_SOC15(dev->device_info->asic_family))
302                 /* On SOC15 ASICs, doorbell allocation must be
303                  * per-device, and independent from the per-process
304                  * queue_id. Return the doorbell offset within the
305                  * doorbell aperture to user mode.
306                  */
307                 args->doorbell_offset |= q_properties.doorbell_off;
308
309         mutex_unlock(&p->mutex);
310
311         pr_debug("Queue id %d was created successfully\n", args->queue_id);
312
313         pr_debug("Ring buffer address == 0x%016llX\n",
314                         args->ring_base_address);
315
316         pr_debug("Read ptr address    == 0x%016llX\n",
317                         args->read_pointer_address);
318
319         pr_debug("Write ptr address   == 0x%016llX\n",
320                         args->write_pointer_address);
321
322         return 0;
323
324 err_create_queue:
325 err_bind_process:
326         mutex_unlock(&p->mutex);
327         return err;
328 }
329
330 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
331                                         void *data)
332 {
333         int retval;
334         struct kfd_ioctl_destroy_queue_args *args = data;
335
336         pr_debug("Destroying queue id %d for pasid %d\n",
337                                 args->queue_id,
338                                 p->pasid);
339
340         mutex_lock(&p->mutex);
341
342         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
343
344         mutex_unlock(&p->mutex);
345         return retval;
346 }
347
348 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
349                                         void *data)
350 {
351         int retval;
352         struct kfd_ioctl_update_queue_args *args = data;
353         struct queue_properties properties;
354
355         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
356                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
357                 return -EINVAL;
358         }
359
360         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
361                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
362                 return -EINVAL;
363         }
364
365         if ((args->ring_base_address) &&
366                 (!access_ok(VERIFY_WRITE,
367                         (const void __user *) args->ring_base_address,
368                         sizeof(uint64_t)))) {
369                 pr_err("Can't access ring base address\n");
370                 return -EFAULT;
371         }
372
373         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
374                 pr_err("Ring size must be a power of 2 or 0\n");
375                 return -EINVAL;
376         }
377
378         properties.queue_address = args->ring_base_address;
379         properties.queue_size = args->ring_size;
380         properties.queue_percent = args->queue_percentage;
381         properties.priority = args->queue_priority;
382
383         pr_debug("Updating queue id %d for pasid %d\n",
384                         args->queue_id, p->pasid);
385
386         mutex_lock(&p->mutex);
387
388         retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
389
390         mutex_unlock(&p->mutex);
391
392         return retval;
393 }
394
395 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
396                                         void *data)
397 {
398         int retval;
399         const int max_num_cus = 1024;
400         struct kfd_ioctl_set_cu_mask_args *args = data;
401         struct queue_properties properties;
402         uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
403         size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
404
405         if ((args->num_cu_mask % 32) != 0) {
406                 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
407                                 args->num_cu_mask);
408                 return -EINVAL;
409         }
410
411         properties.cu_mask_count = args->num_cu_mask;
412         if (properties.cu_mask_count == 0) {
413                 pr_debug("CU mask cannot be 0");
414                 return -EINVAL;
415         }
416
417         /* To prevent an unreasonably large CU mask size, set an arbitrary
418          * limit of max_num_cus bits.  We can then just drop any CU mask bits
419          * past max_num_cus bits and just use the first max_num_cus bits.
420          */
421         if (properties.cu_mask_count > max_num_cus) {
422                 pr_debug("CU mask cannot be greater than 1024 bits");
423                 properties.cu_mask_count = max_num_cus;
424                 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
425         }
426
427         properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
428         if (!properties.cu_mask)
429                 return -ENOMEM;
430
431         retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
432         if (retval) {
433                 pr_debug("Could not copy CU mask from userspace");
434                 kfree(properties.cu_mask);
435                 return -EFAULT;
436         }
437
438         mutex_lock(&p->mutex);
439
440         retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
441
442         mutex_unlock(&p->mutex);
443
444         if (retval)
445                 kfree(properties.cu_mask);
446
447         return retval;
448 }
449
450 static int kfd_ioctl_set_memory_policy(struct file *filep,
451                                         struct kfd_process *p, void *data)
452 {
453         struct kfd_ioctl_set_memory_policy_args *args = data;
454         struct kfd_dev *dev;
455         int err = 0;
456         struct kfd_process_device *pdd;
457         enum cache_policy default_policy, alternate_policy;
458
459         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
460             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
461                 return -EINVAL;
462         }
463
464         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
465             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
466                 return -EINVAL;
467         }
468
469         dev = kfd_device_by_id(args->gpu_id);
470         if (!dev)
471                 return -EINVAL;
472
473         mutex_lock(&p->mutex);
474
475         pdd = kfd_bind_process_to_device(dev, p);
476         if (IS_ERR(pdd)) {
477                 err = -ESRCH;
478                 goto out;
479         }
480
481         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
482                          ? cache_policy_coherent : cache_policy_noncoherent;
483
484         alternate_policy =
485                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
486                    ? cache_policy_coherent : cache_policy_noncoherent;
487
488         if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
489                                 &pdd->qpd,
490                                 default_policy,
491                                 alternate_policy,
492                                 (void __user *)args->alternate_aperture_base,
493                                 args->alternate_aperture_size))
494                 err = -EINVAL;
495
496 out:
497         mutex_unlock(&p->mutex);
498
499         return err;
500 }
501
502 static int kfd_ioctl_set_trap_handler(struct file *filep,
503                                         struct kfd_process *p, void *data)
504 {
505         struct kfd_ioctl_set_trap_handler_args *args = data;
506         struct kfd_dev *dev;
507         int err = 0;
508         struct kfd_process_device *pdd;
509
510         dev = kfd_device_by_id(args->gpu_id);
511         if (dev == NULL)
512                 return -EINVAL;
513
514         mutex_lock(&p->mutex);
515
516         pdd = kfd_bind_process_to_device(dev, p);
517         if (IS_ERR(pdd)) {
518                 err = -ESRCH;
519                 goto out;
520         }
521
522         if (dev->dqm->ops.set_trap_handler(dev->dqm,
523                                         &pdd->qpd,
524                                         args->tba_addr,
525                                         args->tma_addr))
526                 err = -EINVAL;
527
528 out:
529         mutex_unlock(&p->mutex);
530
531         return err;
532 }
533
534 static int kfd_ioctl_dbg_register(struct file *filep,
535                                 struct kfd_process *p, void *data)
536 {
537         struct kfd_ioctl_dbg_register_args *args = data;
538         struct kfd_dev *dev;
539         struct kfd_dbgmgr *dbgmgr_ptr;
540         struct kfd_process_device *pdd;
541         bool create_ok;
542         long status = 0;
543
544         dev = kfd_device_by_id(args->gpu_id);
545         if (!dev)
546                 return -EINVAL;
547
548         if (dev->device_info->asic_family == CHIP_CARRIZO) {
549                 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
550                 return -EINVAL;
551         }
552
553         mutex_lock(&p->mutex);
554         mutex_lock(kfd_get_dbgmgr_mutex());
555
556         /*
557          * make sure that we have pdd, if this the first queue created for
558          * this process
559          */
560         pdd = kfd_bind_process_to_device(dev, p);
561         if (IS_ERR(pdd)) {
562                 status = PTR_ERR(pdd);
563                 goto out;
564         }
565
566         if (!dev->dbgmgr) {
567                 /* In case of a legal call, we have no dbgmgr yet */
568                 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
569                 if (create_ok) {
570                         status = kfd_dbgmgr_register(dbgmgr_ptr, p);
571                         if (status != 0)
572                                 kfd_dbgmgr_destroy(dbgmgr_ptr);
573                         else
574                                 dev->dbgmgr = dbgmgr_ptr;
575                 }
576         } else {
577                 pr_debug("debugger already registered\n");
578                 status = -EINVAL;
579         }
580
581 out:
582         mutex_unlock(kfd_get_dbgmgr_mutex());
583         mutex_unlock(&p->mutex);
584
585         return status;
586 }
587
588 static int kfd_ioctl_dbg_unregister(struct file *filep,
589                                 struct kfd_process *p, void *data)
590 {
591         struct kfd_ioctl_dbg_unregister_args *args = data;
592         struct kfd_dev *dev;
593         long status;
594
595         dev = kfd_device_by_id(args->gpu_id);
596         if (!dev || !dev->dbgmgr)
597                 return -EINVAL;
598
599         if (dev->device_info->asic_family == CHIP_CARRIZO) {
600                 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
601                 return -EINVAL;
602         }
603
604         mutex_lock(kfd_get_dbgmgr_mutex());
605
606         status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
607         if (!status) {
608                 kfd_dbgmgr_destroy(dev->dbgmgr);
609                 dev->dbgmgr = NULL;
610         }
611
612         mutex_unlock(kfd_get_dbgmgr_mutex());
613
614         return status;
615 }
616
617 /*
618  * Parse and generate variable size data structure for address watch.
619  * Total size of the buffer and # watch points is limited in order
620  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
621  * which is enforced by dbgdev module)
622  * please also note that the watch address itself are not "copied from user",
623  * since it be set into the HW in user mode values.
624  *
625  */
626 static int kfd_ioctl_dbg_address_watch(struct file *filep,
627                                         struct kfd_process *p, void *data)
628 {
629         struct kfd_ioctl_dbg_address_watch_args *args = data;
630         struct kfd_dev *dev;
631         struct dbg_address_watch_info aw_info;
632         unsigned char *args_buff;
633         long status;
634         void __user *cmd_from_user;
635         uint64_t watch_mask_value = 0;
636         unsigned int args_idx = 0;
637
638         memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
639
640         dev = kfd_device_by_id(args->gpu_id);
641         if (!dev)
642                 return -EINVAL;
643
644         if (dev->device_info->asic_family == CHIP_CARRIZO) {
645                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
646                 return -EINVAL;
647         }
648
649         cmd_from_user = (void __user *) args->content_ptr;
650
651         /* Validate arguments */
652
653         if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
654                 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
655                 (cmd_from_user == NULL))
656                 return -EINVAL;
657
658         /* this is the actual buffer to work with */
659         args_buff = memdup_user(cmd_from_user,
660                                 args->buf_size_in_bytes - sizeof(*args));
661         if (IS_ERR(args_buff))
662                 return PTR_ERR(args_buff);
663
664         aw_info.process = p;
665
666         aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
667         args_idx += sizeof(aw_info.num_watch_points);
668
669         aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
670         args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
671
672         /*
673          * set watch address base pointer to point on the array base
674          * within args_buff
675          */
676         aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
677
678         /* skip over the addresses buffer */
679         args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
680
681         if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
682                 status = -EINVAL;
683                 goto out;
684         }
685
686         watch_mask_value = (uint64_t) args_buff[args_idx];
687
688         if (watch_mask_value > 0) {
689                 /*
690                  * There is an array of masks.
691                  * set watch mask base pointer to point on the array base
692                  * within args_buff
693                  */
694                 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
695
696                 /* skip over the masks buffer */
697                 args_idx += sizeof(aw_info.watch_mask) *
698                                 aw_info.num_watch_points;
699         } else {
700                 /* just the NULL mask, set to NULL and skip over it */
701                 aw_info.watch_mask = NULL;
702                 args_idx += sizeof(aw_info.watch_mask);
703         }
704
705         if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
706                 status = -EINVAL;
707                 goto out;
708         }
709
710         /* Currently HSA Event is not supported for DBG */
711         aw_info.watch_event = NULL;
712
713         mutex_lock(kfd_get_dbgmgr_mutex());
714
715         status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
716
717         mutex_unlock(kfd_get_dbgmgr_mutex());
718
719 out:
720         kfree(args_buff);
721
722         return status;
723 }
724
725 /* Parse and generate fixed size data structure for wave control */
726 static int kfd_ioctl_dbg_wave_control(struct file *filep,
727                                         struct kfd_process *p, void *data)
728 {
729         struct kfd_ioctl_dbg_wave_control_args *args = data;
730         struct kfd_dev *dev;
731         struct dbg_wave_control_info wac_info;
732         unsigned char *args_buff;
733         uint32_t computed_buff_size;
734         long status;
735         void __user *cmd_from_user;
736         unsigned int args_idx = 0;
737
738         memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
739
740         /* we use compact form, independent of the packing attribute value */
741         computed_buff_size = sizeof(*args) +
742                                 sizeof(wac_info.mode) +
743                                 sizeof(wac_info.operand) +
744                                 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
745                                 sizeof(wac_info.dbgWave_msg.MemoryVA) +
746                                 sizeof(wac_info.trapId);
747
748         dev = kfd_device_by_id(args->gpu_id);
749         if (!dev)
750                 return -EINVAL;
751
752         if (dev->device_info->asic_family == CHIP_CARRIZO) {
753                 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
754                 return -EINVAL;
755         }
756
757         /* input size must match the computed "compact" size */
758         if (args->buf_size_in_bytes != computed_buff_size) {
759                 pr_debug("size mismatch, computed : actual %u : %u\n",
760                                 args->buf_size_in_bytes, computed_buff_size);
761                 return -EINVAL;
762         }
763
764         cmd_from_user = (void __user *) args->content_ptr;
765
766         if (cmd_from_user == NULL)
767                 return -EINVAL;
768
769         /* copy the entire buffer from user */
770
771         args_buff = memdup_user(cmd_from_user,
772                                 args->buf_size_in_bytes - sizeof(*args));
773         if (IS_ERR(args_buff))
774                 return PTR_ERR(args_buff);
775
776         /* move ptr to the start of the "pay-load" area */
777         wac_info.process = p;
778
779         wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
780         args_idx += sizeof(wac_info.operand);
781
782         wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
783         args_idx += sizeof(wac_info.mode);
784
785         wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
786         args_idx += sizeof(wac_info.trapId);
787
788         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
789                                         *((uint32_t *)(&args_buff[args_idx]));
790         wac_info.dbgWave_msg.MemoryVA = NULL;
791
792         mutex_lock(kfd_get_dbgmgr_mutex());
793
794         pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
795                         wac_info.process, wac_info.operand,
796                         wac_info.mode, wac_info.trapId,
797                         wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
798
799         status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
800
801         pr_debug("Returned status of dbg manager is %ld\n", status);
802
803         mutex_unlock(kfd_get_dbgmgr_mutex());
804
805         kfree(args_buff);
806
807         return status;
808 }
809
810 static int kfd_ioctl_get_clock_counters(struct file *filep,
811                                 struct kfd_process *p, void *data)
812 {
813         struct kfd_ioctl_get_clock_counters_args *args = data;
814         struct kfd_dev *dev;
815
816         dev = kfd_device_by_id(args->gpu_id);
817         if (dev)
818                 /* Reading GPU clock counter from KGD */
819                 args->gpu_clock_counter =
820                         dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
821         else
822                 /* Node without GPU resource */
823                 args->gpu_clock_counter = 0;
824
825         /* No access to rdtsc. Using raw monotonic time */
826         args->cpu_clock_counter = ktime_get_raw_ns();
827         args->system_clock_counter = ktime_get_boot_ns();
828
829         /* Since the counter is in nano-seconds we use 1GHz frequency */
830         args->system_clock_freq = 1000000000;
831
832         return 0;
833 }
834
835
836 static int kfd_ioctl_get_process_apertures(struct file *filp,
837                                 struct kfd_process *p, void *data)
838 {
839         struct kfd_ioctl_get_process_apertures_args *args = data;
840         struct kfd_process_device_apertures *pAperture;
841         struct kfd_process_device *pdd;
842
843         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
844
845         args->num_of_nodes = 0;
846
847         mutex_lock(&p->mutex);
848
849         /*if the process-device list isn't empty*/
850         if (kfd_has_process_device_data(p)) {
851                 /* Run over all pdd of the process */
852                 pdd = kfd_get_first_process_device_data(p);
853                 do {
854                         pAperture =
855                                 &args->process_apertures[args->num_of_nodes];
856                         pAperture->gpu_id = pdd->dev->id;
857                         pAperture->lds_base = pdd->lds_base;
858                         pAperture->lds_limit = pdd->lds_limit;
859                         pAperture->gpuvm_base = pdd->gpuvm_base;
860                         pAperture->gpuvm_limit = pdd->gpuvm_limit;
861                         pAperture->scratch_base = pdd->scratch_base;
862                         pAperture->scratch_limit = pdd->scratch_limit;
863
864                         dev_dbg(kfd_device,
865                                 "node id %u\n", args->num_of_nodes);
866                         dev_dbg(kfd_device,
867                                 "gpu id %u\n", pdd->dev->id);
868                         dev_dbg(kfd_device,
869                                 "lds_base %llX\n", pdd->lds_base);
870                         dev_dbg(kfd_device,
871                                 "lds_limit %llX\n", pdd->lds_limit);
872                         dev_dbg(kfd_device,
873                                 "gpuvm_base %llX\n", pdd->gpuvm_base);
874                         dev_dbg(kfd_device,
875                                 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
876                         dev_dbg(kfd_device,
877                                 "scratch_base %llX\n", pdd->scratch_base);
878                         dev_dbg(kfd_device,
879                                 "scratch_limit %llX\n", pdd->scratch_limit);
880
881                         args->num_of_nodes++;
882
883                         pdd = kfd_get_next_process_device_data(p, pdd);
884                 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
885         }
886
887         mutex_unlock(&p->mutex);
888
889         return 0;
890 }
891
892 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
893                                 struct kfd_process *p, void *data)
894 {
895         struct kfd_ioctl_get_process_apertures_new_args *args = data;
896         struct kfd_process_device_apertures *pa;
897         struct kfd_process_device *pdd;
898         uint32_t nodes = 0;
899         int ret;
900
901         dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
902
903         if (args->num_of_nodes == 0) {
904                 /* Return number of nodes, so that user space can alloacate
905                  * sufficient memory
906                  */
907                 mutex_lock(&p->mutex);
908
909                 if (!kfd_has_process_device_data(p))
910                         goto out_unlock;
911
912                 /* Run over all pdd of the process */
913                 pdd = kfd_get_first_process_device_data(p);
914                 do {
915                         args->num_of_nodes++;
916                         pdd = kfd_get_next_process_device_data(p, pdd);
917                 } while (pdd);
918
919                 goto out_unlock;
920         }
921
922         /* Fill in process-aperture information for all available
923          * nodes, but not more than args->num_of_nodes as that is
924          * the amount of memory allocated by user
925          */
926         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
927                                 args->num_of_nodes), GFP_KERNEL);
928         if (!pa)
929                 return -ENOMEM;
930
931         mutex_lock(&p->mutex);
932
933         if (!kfd_has_process_device_data(p)) {
934                 args->num_of_nodes = 0;
935                 kfree(pa);
936                 goto out_unlock;
937         }
938
939         /* Run over all pdd of the process */
940         pdd = kfd_get_first_process_device_data(p);
941         do {
942                 pa[nodes].gpu_id = pdd->dev->id;
943                 pa[nodes].lds_base = pdd->lds_base;
944                 pa[nodes].lds_limit = pdd->lds_limit;
945                 pa[nodes].gpuvm_base = pdd->gpuvm_base;
946                 pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
947                 pa[nodes].scratch_base = pdd->scratch_base;
948                 pa[nodes].scratch_limit = pdd->scratch_limit;
949
950                 dev_dbg(kfd_device,
951                         "gpu id %u\n", pdd->dev->id);
952                 dev_dbg(kfd_device,
953                         "lds_base %llX\n", pdd->lds_base);
954                 dev_dbg(kfd_device,
955                         "lds_limit %llX\n", pdd->lds_limit);
956                 dev_dbg(kfd_device,
957                         "gpuvm_base %llX\n", pdd->gpuvm_base);
958                 dev_dbg(kfd_device,
959                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
960                 dev_dbg(kfd_device,
961                         "scratch_base %llX\n", pdd->scratch_base);
962                 dev_dbg(kfd_device,
963                         "scratch_limit %llX\n", pdd->scratch_limit);
964                 nodes++;
965
966                 pdd = kfd_get_next_process_device_data(p, pdd);
967         } while (pdd && (nodes < args->num_of_nodes));
968         mutex_unlock(&p->mutex);
969
970         args->num_of_nodes = nodes;
971         ret = copy_to_user(
972                         (void __user *)args->kfd_process_device_apertures_ptr,
973                         pa,
974                         (nodes * sizeof(struct kfd_process_device_apertures)));
975         kfree(pa);
976         return ret ? -EFAULT : 0;
977
978 out_unlock:
979         mutex_unlock(&p->mutex);
980         return 0;
981 }
982
983 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
984                                         void *data)
985 {
986         struct kfd_ioctl_create_event_args *args = data;
987         int err;
988
989         /* For dGPUs the event page is allocated in user mode. The
990          * handle is passed to KFD with the first call to this IOCTL
991          * through the event_page_offset field.
992          */
993         if (args->event_page_offset) {
994                 struct kfd_dev *kfd;
995                 struct kfd_process_device *pdd;
996                 void *mem, *kern_addr;
997                 uint64_t size;
998
999                 if (p->signal_page) {
1000                         pr_err("Event page is already set\n");
1001                         return -EINVAL;
1002                 }
1003
1004                 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1005                 if (!kfd) {
1006                         pr_err("Getting device by id failed in %s\n", __func__);
1007                         return -EINVAL;
1008                 }
1009
1010                 mutex_lock(&p->mutex);
1011                 pdd = kfd_bind_process_to_device(kfd, p);
1012                 if (IS_ERR(pdd)) {
1013                         err = PTR_ERR(pdd);
1014                         goto out_unlock;
1015                 }
1016
1017                 mem = kfd_process_device_translate_handle(pdd,
1018                                 GET_IDR_HANDLE(args->event_page_offset));
1019                 if (!mem) {
1020                         pr_err("Can't find BO, offset is 0x%llx\n",
1021                                args->event_page_offset);
1022                         err = -EINVAL;
1023                         goto out_unlock;
1024                 }
1025                 mutex_unlock(&p->mutex);
1026
1027                 err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
1028                                                 mem, &kern_addr, &size);
1029                 if (err) {
1030                         pr_err("Failed to map event page to kernel\n");
1031                         return err;
1032                 }
1033
1034                 err = kfd_event_page_set(p, kern_addr, size);
1035                 if (err) {
1036                         pr_err("Failed to set event page\n");
1037                         return err;
1038                 }
1039         }
1040
1041         err = kfd_event_create(filp, p, args->event_type,
1042                                 args->auto_reset != 0, args->node_id,
1043                                 &args->event_id, &args->event_trigger_data,
1044                                 &args->event_page_offset,
1045                                 &args->event_slot_index);
1046
1047         return err;
1048
1049 out_unlock:
1050         mutex_unlock(&p->mutex);
1051         return err;
1052 }
1053
1054 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1055                                         void *data)
1056 {
1057         struct kfd_ioctl_destroy_event_args *args = data;
1058
1059         return kfd_event_destroy(p, args->event_id);
1060 }
1061
1062 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1063                                 void *data)
1064 {
1065         struct kfd_ioctl_set_event_args *args = data;
1066
1067         return kfd_set_event(p, args->event_id);
1068 }
1069
1070 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1071                                 void *data)
1072 {
1073         struct kfd_ioctl_reset_event_args *args = data;
1074
1075         return kfd_reset_event(p, args->event_id);
1076 }
1077
1078 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1079                                 void *data)
1080 {
1081         struct kfd_ioctl_wait_events_args *args = data;
1082         int err;
1083
1084         err = kfd_wait_on_events(p, args->num_events,
1085                         (void __user *)args->events_ptr,
1086                         (args->wait_for_all != 0),
1087                         args->timeout, &args->wait_result);
1088
1089         return err;
1090 }
1091 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1092                                         struct kfd_process *p, void *data)
1093 {
1094         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1095         struct kfd_process_device *pdd;
1096         struct kfd_dev *dev;
1097         long err;
1098
1099         dev = kfd_device_by_id(args->gpu_id);
1100         if (!dev)
1101                 return -EINVAL;
1102
1103         mutex_lock(&p->mutex);
1104
1105         pdd = kfd_bind_process_to_device(dev, p);
1106         if (IS_ERR(pdd)) {
1107                 err = PTR_ERR(pdd);
1108                 goto bind_process_to_device_fail;
1109         }
1110
1111         pdd->qpd.sh_hidden_private_base = args->va_addr;
1112
1113         mutex_unlock(&p->mutex);
1114
1115         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1116             pdd->qpd.vmid != 0)
1117                 dev->kfd2kgd->set_scratch_backing_va(
1118                         dev->kgd, args->va_addr, pdd->qpd.vmid);
1119
1120         return 0;
1121
1122 bind_process_to_device_fail:
1123         mutex_unlock(&p->mutex);
1124         return err;
1125 }
1126
1127 static int kfd_ioctl_get_tile_config(struct file *filep,
1128                 struct kfd_process *p, void *data)
1129 {
1130         struct kfd_ioctl_get_tile_config_args *args = data;
1131         struct kfd_dev *dev;
1132         struct tile_config config;
1133         int err = 0;
1134
1135         dev = kfd_device_by_id(args->gpu_id);
1136         if (!dev)
1137                 return -EINVAL;
1138
1139         dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1140
1141         args->gb_addr_config = config.gb_addr_config;
1142         args->num_banks = config.num_banks;
1143         args->num_ranks = config.num_ranks;
1144
1145         if (args->num_tile_configs > config.num_tile_configs)
1146                 args->num_tile_configs = config.num_tile_configs;
1147         err = copy_to_user((void __user *)args->tile_config_ptr,
1148                         config.tile_config_ptr,
1149                         args->num_tile_configs * sizeof(uint32_t));
1150         if (err) {
1151                 args->num_tile_configs = 0;
1152                 return -EFAULT;
1153         }
1154
1155         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1156                 args->num_macro_tile_configs =
1157                                 config.num_macro_tile_configs;
1158         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1159                         config.macro_tile_config_ptr,
1160                         args->num_macro_tile_configs * sizeof(uint32_t));
1161         if (err) {
1162                 args->num_macro_tile_configs = 0;
1163                 return -EFAULT;
1164         }
1165
1166         return 0;
1167 }
1168
1169 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1170                                 void *data)
1171 {
1172         struct kfd_ioctl_acquire_vm_args *args = data;
1173         struct kfd_process_device *pdd;
1174         struct kfd_dev *dev;
1175         struct file *drm_file;
1176         int ret;
1177
1178         dev = kfd_device_by_id(args->gpu_id);
1179         if (!dev)
1180                 return -EINVAL;
1181
1182         drm_file = fget(args->drm_fd);
1183         if (!drm_file)
1184                 return -EINVAL;
1185
1186         mutex_lock(&p->mutex);
1187
1188         pdd = kfd_get_process_device_data(dev, p);
1189         if (!pdd) {
1190                 ret = -EINVAL;
1191                 goto err_unlock;
1192         }
1193
1194         if (pdd->drm_file) {
1195                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1196                 goto err_unlock;
1197         }
1198
1199         ret = kfd_process_device_init_vm(pdd, drm_file);
1200         if (ret)
1201                 goto err_unlock;
1202         /* On success, the PDD keeps the drm_file reference */
1203         mutex_unlock(&p->mutex);
1204
1205         return 0;
1206
1207 err_unlock:
1208         mutex_unlock(&p->mutex);
1209         fput(drm_file);
1210         return ret;
1211 }
1212
1213 static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1214 {
1215         struct kfd_local_mem_info mem_info;
1216
1217         if (debug_largebar) {
1218                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1219                 return true;
1220         }
1221
1222         if (dev->device_info->needs_iommu_device)
1223                 return false;
1224
1225         dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1226         if (mem_info.local_mem_size_private == 0 &&
1227                         mem_info.local_mem_size_public > 0)
1228                 return true;
1229         return false;
1230 }
1231
1232 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1233                                         struct kfd_process *p, void *data)
1234 {
1235         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1236         struct kfd_process_device *pdd;
1237         void *mem;
1238         struct kfd_dev *dev;
1239         int idr_handle;
1240         long err;
1241         uint64_t offset = args->mmap_offset;
1242         uint32_t flags = args->flags;
1243
1244         if (args->size == 0)
1245                 return -EINVAL;
1246
1247         dev = kfd_device_by_id(args->gpu_id);
1248         if (!dev)
1249                 return -EINVAL;
1250
1251         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1252                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1253                 !kfd_dev_is_large_bar(dev)) {
1254                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1255                 return -EINVAL;
1256         }
1257
1258         mutex_lock(&p->mutex);
1259
1260         pdd = kfd_bind_process_to_device(dev, p);
1261         if (IS_ERR(pdd)) {
1262                 err = PTR_ERR(pdd);
1263                 goto err_unlock;
1264         }
1265
1266         err = dev->kfd2kgd->alloc_memory_of_gpu(
1267                 dev->kgd, args->va_addr, args->size,
1268                 pdd->vm, (struct kgd_mem **) &mem, &offset,
1269                 flags);
1270
1271         if (err)
1272                 goto err_unlock;
1273
1274         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1275         if (idr_handle < 0) {
1276                 err = -EFAULT;
1277                 goto err_free;
1278         }
1279
1280         mutex_unlock(&p->mutex);
1281
1282         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1283         args->mmap_offset = offset;
1284
1285         return 0;
1286
1287 err_free:
1288         dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1289 err_unlock:
1290         mutex_unlock(&p->mutex);
1291         return err;
1292 }
1293
1294 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1295                                         struct kfd_process *p, void *data)
1296 {
1297         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1298         struct kfd_process_device *pdd;
1299         void *mem;
1300         struct kfd_dev *dev;
1301         int ret;
1302
1303         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1304         if (!dev)
1305                 return -EINVAL;
1306
1307         mutex_lock(&p->mutex);
1308
1309         pdd = kfd_get_process_device_data(dev, p);
1310         if (!pdd) {
1311                 pr_err("Process device data doesn't exist\n");
1312                 ret = -EINVAL;
1313                 goto err_unlock;
1314         }
1315
1316         mem = kfd_process_device_translate_handle(
1317                 pdd, GET_IDR_HANDLE(args->handle));
1318         if (!mem) {
1319                 ret = -EINVAL;
1320                 goto err_unlock;
1321         }
1322
1323         ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1324
1325         /* If freeing the buffer failed, leave the handle in place for
1326          * clean-up during process tear-down.
1327          */
1328         if (!ret)
1329                 kfd_process_device_remove_obj_handle(
1330                         pdd, GET_IDR_HANDLE(args->handle));
1331
1332 err_unlock:
1333         mutex_unlock(&p->mutex);
1334         return ret;
1335 }
1336
1337 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1338                                         struct kfd_process *p, void *data)
1339 {
1340         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1341         struct kfd_process_device *pdd, *peer_pdd;
1342         void *mem;
1343         struct kfd_dev *dev, *peer;
1344         long err = 0;
1345         int i;
1346         uint32_t *devices_arr = NULL;
1347
1348         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1349         if (!dev)
1350                 return -EINVAL;
1351
1352         if (!args->n_devices) {
1353                 pr_debug("Device IDs array empty\n");
1354                 return -EINVAL;
1355         }
1356         if (args->n_success > args->n_devices) {
1357                 pr_debug("n_success exceeds n_devices\n");
1358                 return -EINVAL;
1359         }
1360
1361         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1362                                     GFP_KERNEL);
1363         if (!devices_arr)
1364                 return -ENOMEM;
1365
1366         err = copy_from_user(devices_arr,
1367                              (void __user *)args->device_ids_array_ptr,
1368                              args->n_devices * sizeof(*devices_arr));
1369         if (err != 0) {
1370                 err = -EFAULT;
1371                 goto copy_from_user_failed;
1372         }
1373
1374         mutex_lock(&p->mutex);
1375
1376         pdd = kfd_bind_process_to_device(dev, p);
1377         if (IS_ERR(pdd)) {
1378                 err = PTR_ERR(pdd);
1379                 goto bind_process_to_device_failed;
1380         }
1381
1382         mem = kfd_process_device_translate_handle(pdd,
1383                                                 GET_IDR_HANDLE(args->handle));
1384         if (!mem) {
1385                 err = -ENOMEM;
1386                 goto get_mem_obj_from_handle_failed;
1387         }
1388
1389         for (i = args->n_success; i < args->n_devices; i++) {
1390                 peer = kfd_device_by_id(devices_arr[i]);
1391                 if (!peer) {
1392                         pr_debug("Getting device by id failed for 0x%x\n",
1393                                  devices_arr[i]);
1394                         err = -EINVAL;
1395                         goto get_mem_obj_from_handle_failed;
1396                 }
1397
1398                 peer_pdd = kfd_bind_process_to_device(peer, p);
1399                 if (IS_ERR(peer_pdd)) {
1400                         err = PTR_ERR(peer_pdd);
1401                         goto get_mem_obj_from_handle_failed;
1402                 }
1403                 err = peer->kfd2kgd->map_memory_to_gpu(
1404                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1405                 if (err) {
1406                         pr_err("Failed to map to gpu %d/%d\n",
1407                                i, args->n_devices);
1408                         goto map_memory_to_gpu_failed;
1409                 }
1410                 args->n_success = i+1;
1411         }
1412
1413         mutex_unlock(&p->mutex);
1414
1415         err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1416         if (err) {
1417                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1418                 goto sync_memory_failed;
1419         }
1420
1421         /* Flush TLBs after waiting for the page table updates to complete */
1422         for (i = 0; i < args->n_devices; i++) {
1423                 peer = kfd_device_by_id(devices_arr[i]);
1424                 if (WARN_ON_ONCE(!peer))
1425                         continue;
1426                 peer_pdd = kfd_get_process_device_data(peer, p);
1427                 if (WARN_ON_ONCE(!peer_pdd))
1428                         continue;
1429                 kfd_flush_tlb(peer_pdd);
1430         }
1431
1432         kfree(devices_arr);
1433
1434         return err;
1435
1436 bind_process_to_device_failed:
1437 get_mem_obj_from_handle_failed:
1438 map_memory_to_gpu_failed:
1439         mutex_unlock(&p->mutex);
1440 copy_from_user_failed:
1441 sync_memory_failed:
1442         kfree(devices_arr);
1443
1444         return err;
1445 }
1446
1447 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1448                                         struct kfd_process *p, void *data)
1449 {
1450         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1451         struct kfd_process_device *pdd, *peer_pdd;
1452         void *mem;
1453         struct kfd_dev *dev, *peer;
1454         long err = 0;
1455         uint32_t *devices_arr = NULL, i;
1456
1457         dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1458         if (!dev)
1459                 return -EINVAL;
1460
1461         if (!args->n_devices) {
1462                 pr_debug("Device IDs array empty\n");
1463                 return -EINVAL;
1464         }
1465         if (args->n_success > args->n_devices) {
1466                 pr_debug("n_success exceeds n_devices\n");
1467                 return -EINVAL;
1468         }
1469
1470         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1471                                     GFP_KERNEL);
1472         if (!devices_arr)
1473                 return -ENOMEM;
1474
1475         err = copy_from_user(devices_arr,
1476                              (void __user *)args->device_ids_array_ptr,
1477                              args->n_devices * sizeof(*devices_arr));
1478         if (err != 0) {
1479                 err = -EFAULT;
1480                 goto copy_from_user_failed;
1481         }
1482
1483         mutex_lock(&p->mutex);
1484
1485         pdd = kfd_get_process_device_data(dev, p);
1486         if (!pdd) {
1487                 err = -EINVAL;
1488                 goto bind_process_to_device_failed;
1489         }
1490
1491         mem = kfd_process_device_translate_handle(pdd,
1492                                                 GET_IDR_HANDLE(args->handle));
1493         if (!mem) {
1494                 err = -ENOMEM;
1495                 goto get_mem_obj_from_handle_failed;
1496         }
1497
1498         for (i = args->n_success; i < args->n_devices; i++) {
1499                 peer = kfd_device_by_id(devices_arr[i]);
1500                 if (!peer) {
1501                         err = -EINVAL;
1502                         goto get_mem_obj_from_handle_failed;
1503                 }
1504
1505                 peer_pdd = kfd_get_process_device_data(peer, p);
1506                 if (!peer_pdd) {
1507                         err = -ENODEV;
1508                         goto get_mem_obj_from_handle_failed;
1509                 }
1510                 err = dev->kfd2kgd->unmap_memory_to_gpu(
1511                         peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1512                 if (err) {
1513                         pr_err("Failed to unmap from gpu %d/%d\n",
1514                                i, args->n_devices);
1515                         goto unmap_memory_from_gpu_failed;
1516                 }
1517                 args->n_success = i+1;
1518         }
1519         kfree(devices_arr);
1520
1521         mutex_unlock(&p->mutex);
1522
1523         return 0;
1524
1525 bind_process_to_device_failed:
1526 get_mem_obj_from_handle_failed:
1527 unmap_memory_from_gpu_failed:
1528         mutex_unlock(&p->mutex);
1529 copy_from_user_failed:
1530         kfree(devices_arr);
1531         return err;
1532 }
1533
1534 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1535         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1536                             .cmd_drv = 0, .name = #ioctl}
1537
1538 /** Ioctl table */
1539 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1540         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1541                         kfd_ioctl_get_version, 0),
1542
1543         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1544                         kfd_ioctl_create_queue, 0),
1545
1546         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1547                         kfd_ioctl_destroy_queue, 0),
1548
1549         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1550                         kfd_ioctl_set_memory_policy, 0),
1551
1552         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1553                         kfd_ioctl_get_clock_counters, 0),
1554
1555         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1556                         kfd_ioctl_get_process_apertures, 0),
1557
1558         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1559                         kfd_ioctl_update_queue, 0),
1560
1561         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1562                         kfd_ioctl_create_event, 0),
1563
1564         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1565                         kfd_ioctl_destroy_event, 0),
1566
1567         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1568                         kfd_ioctl_set_event, 0),
1569
1570         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1571                         kfd_ioctl_reset_event, 0),
1572
1573         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1574                         kfd_ioctl_wait_events, 0),
1575
1576         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1577                         kfd_ioctl_dbg_register, 0),
1578
1579         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1580                         kfd_ioctl_dbg_unregister, 0),
1581
1582         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1583                         kfd_ioctl_dbg_address_watch, 0),
1584
1585         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1586                         kfd_ioctl_dbg_wave_control, 0),
1587
1588         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1589                         kfd_ioctl_set_scratch_backing_va, 0),
1590
1591         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1592                         kfd_ioctl_get_tile_config, 0),
1593
1594         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1595                         kfd_ioctl_set_trap_handler, 0),
1596
1597         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1598                         kfd_ioctl_get_process_apertures_new, 0),
1599
1600         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1601                         kfd_ioctl_acquire_vm, 0),
1602
1603         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1604                         kfd_ioctl_alloc_memory_of_gpu, 0),
1605
1606         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1607                         kfd_ioctl_free_memory_of_gpu, 0),
1608
1609         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1610                         kfd_ioctl_map_memory_to_gpu, 0),
1611
1612         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1613                         kfd_ioctl_unmap_memory_from_gpu, 0),
1614
1615         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1616                         kfd_ioctl_set_cu_mask, 0),
1617
1618 };
1619
1620 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1621
1622 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1623 {
1624         struct kfd_process *process;
1625         amdkfd_ioctl_t *func;
1626         const struct amdkfd_ioctl_desc *ioctl = NULL;
1627         unsigned int nr = _IOC_NR(cmd);
1628         char stack_kdata[128];
1629         char *kdata = NULL;
1630         unsigned int usize, asize;
1631         int retcode = -EINVAL;
1632
1633         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1634                 goto err_i1;
1635
1636         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1637                 u32 amdkfd_size;
1638
1639                 ioctl = &amdkfd_ioctls[nr];
1640
1641                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1642                 usize = asize = _IOC_SIZE(cmd);
1643                 if (amdkfd_size > asize)
1644                         asize = amdkfd_size;
1645
1646                 cmd = ioctl->cmd;
1647         } else
1648                 goto err_i1;
1649
1650         dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1651
1652         process = kfd_get_process(current);
1653         if (IS_ERR(process)) {
1654                 dev_dbg(kfd_device, "no process\n");
1655                 goto err_i1;
1656         }
1657
1658         /* Do not trust userspace, use our own definition */
1659         func = ioctl->func;
1660
1661         if (unlikely(!func)) {
1662                 dev_dbg(kfd_device, "no function\n");
1663                 retcode = -EINVAL;
1664                 goto err_i1;
1665         }
1666
1667         if (cmd & (IOC_IN | IOC_OUT)) {
1668                 if (asize <= sizeof(stack_kdata)) {
1669                         kdata = stack_kdata;
1670                 } else {
1671                         kdata = kmalloc(asize, GFP_KERNEL);
1672                         if (!kdata) {
1673                                 retcode = -ENOMEM;
1674                                 goto err_i1;
1675                         }
1676                 }
1677                 if (asize > usize)
1678                         memset(kdata + usize, 0, asize - usize);
1679         }
1680
1681         if (cmd & IOC_IN) {
1682                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1683                         retcode = -EFAULT;
1684                         goto err_i1;
1685                 }
1686         } else if (cmd & IOC_OUT) {
1687                 memset(kdata, 0, usize);
1688         }
1689
1690         retcode = func(filep, process, kdata);
1691
1692         if (cmd & IOC_OUT)
1693                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1694                         retcode = -EFAULT;
1695
1696 err_i1:
1697         if (!ioctl)
1698                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1699                           task_pid_nr(current), cmd, nr);
1700
1701         if (kdata != stack_kdata)
1702                 kfree(kdata);
1703
1704         if (retcode)
1705                 dev_dbg(kfd_device, "ret = %d\n", retcode);
1706
1707         return retcode;
1708 }
1709
1710 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1711 {
1712         struct kfd_process *process;
1713         struct kfd_dev *dev = NULL;
1714         unsigned long vm_pgoff;
1715         unsigned int gpu_id;
1716
1717         process = kfd_get_process(current);
1718         if (IS_ERR(process))
1719                 return PTR_ERR(process);
1720
1721         vm_pgoff = vma->vm_pgoff;
1722         vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
1723         gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
1724         if (gpu_id)
1725                 dev = kfd_device_by_id(gpu_id);
1726
1727         switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
1728         case KFD_MMAP_TYPE_DOORBELL:
1729                 if (!dev)
1730                         return -ENODEV;
1731                 return kfd_doorbell_mmap(dev, process, vma);
1732
1733         case KFD_MMAP_TYPE_EVENTS:
1734                 return kfd_event_mmap(process, vma);
1735
1736         case KFD_MMAP_TYPE_RESERVED_MEM:
1737                 if (!dev)
1738                         return -ENODEV;
1739                 return kfd_reserved_mem_mmap(dev, process, vma);
1740         }
1741
1742         return -EFAULT;
1743 }