GNU Linux-libre 5.10.153-gnu1
[releases.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
24
25 /*
26  * Gaudi security scheme:
27  *
28  * 1. Host is protected by:
29  *        - Range registers
30  *        - MMU
31  *
32  * 2. DDR is protected by:
33  *        - Range registers (protect the first 512MB)
34  *
35  * 3. Configuration is protected by:
36  *        - Range registers
37  *        - Protection bits
38  *
39  * MMU is always enabled.
40  *
41  * QMAN DMA channels 0,1,5 (PCI DMAN):
42  *     - DMA is not secured.
43  *     - PQ and CQ are secured.
44  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
45  *                      because of TDMA (tensor DMA). Hence, WREG is always not
46  *                      secured.
47  *
48  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49  * channel 0 to be secured, execute the DMA and change it back to not secured.
50  * Currently, the driver doesn't use the DMA while there are compute jobs
51  * running.
52  *
53  * The current use cases for the driver to use the DMA are:
54  *     - Clear SRAM on context switch (happens on context switch when device is
55  *       idle)
56  *     - MMU page tables area clear (happens on init)
57  *
58  * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60  * CQ, CP and the engine are not secured
61  *
62  */
63
64 #define GAUDI_BOOT_FIT_FILE     "/*(DEBLOBBED)*/"
65 #define GAUDI_LINUX_FW_FILE     "/*(DEBLOBBED)*/"
66 #define GAUDI_TPC_FW_FILE       "/*(DEBLOBBED)*/"
67
68 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
69
70 #define GAUDI_RESET_TIMEOUT_MSEC        1000            /* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
74
75 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
83
84 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
85
86 #define GAUDI_MAX_STRING_LEN            20
87
88 #define GAUDI_CB_POOL_CB_CNT            512
89 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
90
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
92
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
94
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
96
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
98
99 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
100
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
102                 BIT(GAUDI_ENGINE_ID_MME_0) |\
103                 BIT(GAUDI_ENGINE_ID_MME_2) |\
104                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110                 "gaudi cpu eq"
111 };
112
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116         [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122 };
123
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125         [0] = GAUDI_QUEUE_ID_DMA_0_0,
126         [1] = GAUDI_QUEUE_ID_DMA_0_1,
127         [2] = GAUDI_QUEUE_ID_DMA_0_2,
128         [3] = GAUDI_QUEUE_ID_DMA_0_3,
129         [4] = GAUDI_QUEUE_ID_DMA_1_0,
130         [5] = GAUDI_QUEUE_ID_DMA_1_1,
131         [6] = GAUDI_QUEUE_ID_DMA_1_2,
132         [7] = GAUDI_QUEUE_ID_DMA_1_3,
133         [8] = GAUDI_QUEUE_ID_DMA_5_0,
134         [9] = GAUDI_QUEUE_ID_DMA_5_1,
135         [10] = GAUDI_QUEUE_ID_DMA_5_2,
136         [11] = GAUDI_QUEUE_ID_DMA_5_3
137 };
138
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
141         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
142         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
143         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
144         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
145         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
146         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
147         [PACKET_FENCE]          = sizeof(struct packet_fence),
148         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
149         [PACKET_NOP]            = sizeof(struct packet_nop),
150         [PACKET_STOP]           = sizeof(struct packet_stop),
151         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
152         [PACKET_WAIT]           = sizeof(struct packet_wait),
153         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
154 };
155
156 static inline bool validate_packet_id(enum packet_id id)
157 {
158         switch (id) {
159         case PACKET_WREG_32:
160         case PACKET_WREG_BULK:
161         case PACKET_MSG_LONG:
162         case PACKET_MSG_SHORT:
163         case PACKET_CP_DMA:
164         case PACKET_REPEAT:
165         case PACKET_MSG_PROT:
166         case PACKET_FENCE:
167         case PACKET_LIN_DMA:
168         case PACKET_NOP:
169         case PACKET_STOP:
170         case PACKET_ARB_POINT:
171         case PACKET_WAIT:
172         case PACKET_LOAD_AND_EXE:
173                 return true;
174         default:
175                 return false;
176         }
177 }
178
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181         "tpc_address_exceed_slm",
182         "tpc_div_by_0",
183         "tpc_spu_mac_overflow",
184         "tpc_spu_addsub_overflow",
185         "tpc_spu_abs_overflow",
186         "tpc_spu_fp_dst_nan_inf",
187         "tpc_spu_fp_dst_denorm",
188         "tpc_vpu_mac_overflow",
189         "tpc_vpu_addsub_overflow",
190         "tpc_vpu_abs_overflow",
191         "tpc_vpu_fp_dst_nan_inf",
192         "tpc_vpu_fp_dst_denorm",
193         "tpc_assertions",
194         "tpc_illegal_instruction",
195         "tpc_pc_wrap_around",
196         "tpc_qm_sw_err",
197         "tpc_hbw_rresp_err",
198         "tpc_hbw_bresp_err",
199         "tpc_lbw_rresp_err",
200         "tpc_lbw_bresp_err"
201 };
202
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205         "PQ AXI HBW error",
206         "CQ AXI HBW error",
207         "CP AXI HBW error",
208         "CP error due to undefined OPCODE",
209         "CP encountered STOP OPCODE",
210         "CP AXI LBW error",
211         "CP WRREG32 or WRBULK returned error",
212         "N/A",
213         "FENCE 0 inc over max value and clipped",
214         "FENCE 1 inc over max value and clipped",
215         "FENCE 2 inc over max value and clipped",
216         "FENCE 3 inc over max value and clipped",
217         "FENCE 0 dec under min value and clipped",
218         "FENCE 1 dec under min value and clipped",
219         "FENCE 2 dec under min value and clipped",
220         "FENCE 3 dec under min value and clipped"
221 };
222
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225         "Choice push while full error",
226         "Choice Q watchdog error",
227         "MSG AXI LBW returned with error"
228 };
229
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
305         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
306         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
307         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
308         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
309         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
310         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
311         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
312         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
313         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
314         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
315         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
316         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
317         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
318         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
319         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
320         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
321         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
322         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
323         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
324         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
325         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
326         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
327         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
328         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
329         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
330         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
331         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
332         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
333         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
334         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
335         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
336         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
337         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
338         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
339         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
340         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
341         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
342         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
343         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345
346 struct ecc_info_extract_params {
347         u64 block_address;
348         u32 num_memories;
349         bool derr;
350         bool disable_clock_gating;
351 };
352
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354                                                                 u64 phys_addr);
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356                                         struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358                                         u32 size, u64 val);
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360                                 u32 tpc_id);
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
367 {
368         struct asic_fixed_properties *prop = &hdev->asic_prop;
369         u32 num_sync_stream_queues = 0;
370         int i;
371
372         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373         prop->hw_queues_props = kcalloc(prop->max_queues,
374                         sizeof(struct hw_queue_properties),
375                         GFP_KERNEL);
376
377         if (!prop->hw_queues_props)
378                 return -ENOMEM;
379
380         for (i = 0 ; i < prop->max_queues ; i++) {
381                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383                         prop->hw_queues_props[i].driver_only = 0;
384                         prop->hw_queues_props[i].requires_kernel_cb = 1;
385                         prop->hw_queues_props[i].supports_sync_stream = 1;
386                         num_sync_stream_queues++;
387                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389                         prop->hw_queues_props[i].driver_only = 1;
390                         prop->hw_queues_props[i].requires_kernel_cb = 0;
391                         prop->hw_queues_props[i].supports_sync_stream = 0;
392                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394                         prop->hw_queues_props[i].driver_only = 0;
395                         prop->hw_queues_props[i].requires_kernel_cb = 0;
396                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397                         prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398                         prop->hw_queues_props[i].driver_only = 0;
399                         prop->hw_queues_props[i].requires_kernel_cb = 0;
400                         prop->hw_queues_props[i].supports_sync_stream = 0;
401                 }
402         }
403
404         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405         prop->sync_stream_first_sob = 0;
406         prop->sync_stream_first_mon = 0;
407         prop->dram_base_address = DRAM_PHYS_BASE;
408         prop->dram_size = GAUDI_HBM_SIZE_32GB;
409         prop->dram_end_address = prop->dram_base_address +
410                                         prop->dram_size;
411         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413         prop->sram_base_address = SRAM_BASE_ADDR;
414         prop->sram_size = SRAM_SIZE;
415         prop->sram_end_address = prop->sram_base_address +
416                                         prop->sram_size;
417         prop->sram_user_base_address = prop->sram_base_address +
418                                         SRAM_USER_BASE_OFFSET;
419
420         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421         if (hdev->pldm)
422                 prop->mmu_pgt_size = 0x800000; /* 8MB */
423         else
424                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425         prop->mmu_pte_size = HL_PTE_SIZE;
426         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428         prop->dram_page_size = PAGE_SIZE_2MB;
429
430         prop->pmmu.hop0_shift = HOP0_SHIFT;
431         prop->pmmu.hop1_shift = HOP1_SHIFT;
432         prop->pmmu.hop2_shift = HOP2_SHIFT;
433         prop->pmmu.hop3_shift = HOP3_SHIFT;
434         prop->pmmu.hop4_shift = HOP4_SHIFT;
435         prop->pmmu.hop0_mask = HOP0_MASK;
436         prop->pmmu.hop1_mask = HOP1_MASK;
437         prop->pmmu.hop2_mask = HOP2_MASK;
438         prop->pmmu.hop3_mask = HOP3_MASK;
439         prop->pmmu.hop4_mask = HOP4_MASK;
440         prop->pmmu.start_addr = VA_HOST_SPACE_START;
441         prop->pmmu.end_addr =
442                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443         prop->pmmu.page_size = PAGE_SIZE_4KB;
444         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445
446         /* PMMU and HPMMU are the same except of page size */
447         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450         /* shifts and masks are the same in PMMU and DMMU */
451         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453         prop->dmmu.end_addr = VA_HOST_SPACE_END;
454         prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456         prop->cfg_size = CFG_SIZE;
457         prop->max_asid = MAX_ASID;
458         prop->num_of_events = GAUDI_EVENT_SIZE;
459         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
461         prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462
463         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470                                         CARD_NAME_MAX_LEN);
471
472         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
474         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475                         num_sync_stream_queues * HL_RSVD_SOBS;
476         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477                         num_sync_stream_queues * HL_RSVD_MONS;
478
479         return 0;
480 }
481
482 static int gaudi_pci_bars_map(struct hl_device *hdev)
483 {
484         static const char * const name[] = {"SRAM", "CFG", "HBM"};
485         bool is_wc[3] = {false, false, true};
486         int rc;
487
488         rc = hl_pci_bars_map(hdev, name, is_wc);
489         if (rc)
490                 return rc;
491
492         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495         return 0;
496 }
497
498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499 {
500         struct gaudi_device *gaudi = hdev->asic_specific;
501         struct hl_inbound_pci_region pci_region;
502         u64 old_addr = addr;
503         int rc;
504
505         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506                 return old_addr;
507
508         /* Inbound Region 2 - Bar 4 - Point to HBM */
509         pci_region.mode = PCI_BAR_MATCH_MODE;
510         pci_region.bar = HBM_BAR_ID;
511         pci_region.addr = addr;
512         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513         if (rc)
514                 return U64_MAX;
515
516         if (gaudi) {
517                 old_addr = gaudi->hbm_bar_cur_addr;
518                 gaudi->hbm_bar_cur_addr = addr;
519         }
520
521         return old_addr;
522 }
523
524 static int gaudi_init_iatu(struct hl_device *hdev)
525 {
526         struct hl_inbound_pci_region inbound_region;
527         struct hl_outbound_pci_region outbound_region;
528         int rc;
529
530         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531         inbound_region.mode = PCI_BAR_MATCH_MODE;
532         inbound_region.bar = SRAM_BAR_ID;
533         inbound_region.addr = SRAM_BASE_ADDR;
534         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535         if (rc)
536                 goto done;
537
538         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539         inbound_region.mode = PCI_BAR_MATCH_MODE;
540         inbound_region.bar = CFG_BAR_ID;
541         inbound_region.addr = SPI_FLASH_BASE_ADDR;
542         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543         if (rc)
544                 goto done;
545
546         /* Inbound Region 2 - Bar 4 - Point to HBM */
547         inbound_region.mode = PCI_BAR_MATCH_MODE;
548         inbound_region.bar = HBM_BAR_ID;
549         inbound_region.addr = DRAM_PHYS_BASE;
550         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551         if (rc)
552                 goto done;
553
554         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556         /* Outbound Region 0 - Point to Host */
557         outbound_region.addr = HOST_PHYS_BASE;
558         outbound_region.size = HOST_PHYS_SIZE;
559         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561 done:
562         return rc;
563 }
564
565 static int gaudi_early_init(struct hl_device *hdev)
566 {
567         struct asic_fixed_properties *prop = &hdev->asic_prop;
568         struct pci_dev *pdev = hdev->pdev;
569         int rc;
570
571         rc = gaudi_get_fixed_properties(hdev);
572         if (rc) {
573                 dev_err(hdev->dev, "Failed to get fixed properties\n");
574                 return rc;
575         }
576
577         /* Check BAR sizes */
578         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579                 dev_err(hdev->dev,
580                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581                         SRAM_BAR_ID,
582                         (unsigned long long) pci_resource_len(pdev,
583                                                         SRAM_BAR_ID),
584                         SRAM_BAR_SIZE);
585                 rc = -ENODEV;
586                 goto free_queue_props;
587         }
588
589         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590                 dev_err(hdev->dev,
591                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592                         CFG_BAR_ID,
593                         (unsigned long long) pci_resource_len(pdev,
594                                                                 CFG_BAR_ID),
595                         CFG_BAR_SIZE);
596                 rc = -ENODEV;
597                 goto free_queue_props;
598         }
599
600         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602         rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603                         mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604         if (rc)
605                 goto free_queue_props;
606
607         /* GAUDI Firmware does not yet support security */
608         prop->fw_security_disabled = true;
609         dev_info(hdev->dev, "firmware-level security is disabled\n");
610
611         return 0;
612
613 free_queue_props:
614         kfree(hdev->asic_prop.hw_queues_props);
615         return rc;
616 }
617
618 static int gaudi_early_fini(struct hl_device *hdev)
619 {
620         kfree(hdev->asic_prop.hw_queues_props);
621         hl_pci_fini(hdev);
622
623         return 0;
624 }
625
626 /**
627  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628  *
629  * @hdev: pointer to hl_device structure
630  *
631  */
632 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633 {
634         struct asic_fixed_properties *prop = &hdev->asic_prop;
635         u32 trace_freq = 0;
636         u32 pll_clk = 0;
637         u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638         u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639         u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640         u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641         u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642
643         if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644                 if (div_sel == DIV_SEL_REF_CLK)
645                         trace_freq = PLL_REF_CLK;
646                 else
647                         trace_freq = PLL_REF_CLK / (div_fctr + 1);
648         } else if (div_sel == DIV_SEL_PLL_CLK ||
649                                         div_sel == DIV_SEL_DIVIDED_PLL) {
650                 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651                 if (div_sel == DIV_SEL_PLL_CLK)
652                         trace_freq = pll_clk;
653                 else
654                         trace_freq = pll_clk / (div_fctr + 1);
655         } else {
656                 dev_warn(hdev->dev,
657                         "Received invalid div select value: %d", div_sel);
658         }
659
660         prop->psoc_timestamp_frequency = trace_freq;
661         prop->psoc_pci_pll_nr = nr;
662         prop->psoc_pci_pll_nf = nf;
663         prop->psoc_pci_pll_od = od;
664         prop->psoc_pci_pll_div_factor = div_fctr;
665 }
666
667 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669 {
670         struct asic_fixed_properties *prop = &hdev->asic_prop;
671         struct packet_lin_dma *init_tpc_mem_pkt;
672         struct hl_cs_job *job;
673         struct hl_cb *cb;
674         u64 dst_addr;
675         u32 cb_size, ctl;
676         u8 tpc_id;
677         int rc;
678
679         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680         if (!cb)
681                 return -EFAULT;
682
683         init_tpc_mem_pkt = cb->kernel_address;
684         cb_size = sizeof(*init_tpc_mem_pkt);
685         memset(init_tpc_mem_pkt, 0, cb_size);
686
687         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688
689         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693
694         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695
696         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697         dst_addr = (prop->sram_user_base_address &
698                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701
702         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703         if (!job) {
704                 dev_err(hdev->dev, "Failed to allocate a new job\n");
705                 rc = -ENOMEM;
706                 goto release_cb;
707         }
708
709         job->id = 0;
710         job->user_cb = cb;
711         job->user_cb->cs_cnt++;
712         job->user_cb_size = cb_size;
713         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714         job->patched_cb = job->user_cb;
715         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716
717         hl_debugfs_add_job(hdev, job);
718
719         rc = gaudi_send_job_on_qman0(hdev, job);
720
721         if (rc)
722                 goto free_job;
723
724         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726                 if (rc)
727                         break;
728         }
729
730 free_job:
731         hl_userptr_delete_list(hdev, &job->userptr_list);
732         hl_debugfs_remove_job(hdev, job);
733         kfree(job);
734         cb->cs_cnt--;
735
736 release_cb:
737         hl_cb_put(cb);
738         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739
740         return rc;
741 }
742
743 /*
744  * gaudi_init_tpc_mem() - Initialize TPC memories.
745  * @hdev: Pointer to hl_device structure.
746  *
747  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748  *
749  * Return: 0 for success, negative value for error.
750  */
751 static int gaudi_init_tpc_mem(struct hl_device *hdev)
752 {
753         const struct firmware *fw;
754         size_t fw_size;
755         void *cpu_addr;
756         dma_addr_t dma_handle;
757         int rc, count = 5;
758
759 again:
760         rc = reject_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
761         if (rc == -EINTR && count-- > 0) {
762                 msleep(50);
763                 goto again;
764         }
765
766         if (rc) {
767                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
768                                 GAUDI_TPC_FW_FILE);
769                 goto out;
770         }
771
772         fw_size = fw->size;
773         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
774                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
775         if (!cpu_addr) {
776                 dev_err(hdev->dev,
777                         "Failed to allocate %zu of dma memory for TPC kernel\n",
778                         fw_size);
779                 rc = -ENOMEM;
780                 goto out;
781         }
782
783         memcpy(cpu_addr, fw->data, fw_size);
784
785         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
786
787         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
788                         dma_handle);
789
790 out:
791         release_firmware(fw);
792         return rc;
793 }
794
795 static int gaudi_late_init(struct hl_device *hdev)
796 {
797         struct gaudi_device *gaudi = hdev->asic_specific;
798         int rc;
799
800         rc = gaudi->cpucp_info_get(hdev);
801         if (rc) {
802                 dev_err(hdev->dev, "Failed to get cpucp info\n");
803                 return rc;
804         }
805
806         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
807         if (rc) {
808                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
809                 return rc;
810         }
811
812         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
813
814         gaudi_fetch_psoc_frequency(hdev);
815
816         rc = gaudi_mmu_clear_pgt_range(hdev);
817         if (rc) {
818                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
819                 goto disable_pci_access;
820         }
821
822         rc = gaudi_init_tpc_mem(hdev);
823         if (rc) {
824                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
825                 goto disable_pci_access;
826         }
827
828         return 0;
829
830 disable_pci_access:
831         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
832
833         return rc;
834 }
835
836 static void gaudi_late_fini(struct hl_device *hdev)
837 {
838         const struct hwmon_channel_info **channel_info_arr;
839         int i = 0;
840
841         if (!hdev->hl_chip_info->info)
842                 return;
843
844         channel_info_arr = hdev->hl_chip_info->info;
845
846         while (channel_info_arr[i]) {
847                 kfree(channel_info_arr[i]->config);
848                 kfree(channel_info_arr[i]);
849                 i++;
850         }
851
852         kfree(channel_info_arr);
853
854         hdev->hl_chip_info->info = NULL;
855 }
856
857 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
858 {
859         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
860         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
861         int i, j, rc = 0;
862
863         /*
864          * The device CPU works with 40-bits addresses, while bit 39 must be set
865          * to '1' when accessing the host.
866          * Bits 49:39 of the full host address are saved for a later
867          * configuration of the HW to perform extension to 50 bits.
868          * Because there is a single HW register that holds the extension bits,
869          * these bits must be identical in all allocated range.
870          */
871
872         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
873                 virt_addr_arr[i] =
874                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
875                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
876                                                 &dma_addr_arr[i],
877                                                 GFP_KERNEL | __GFP_ZERO);
878                 if (!virt_addr_arr[i]) {
879                         rc = -ENOMEM;
880                         goto free_dma_mem_arr;
881                 }
882
883                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
884                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
885                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
886                         break;
887         }
888
889         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
890                 dev_err(hdev->dev,
891                         "MSB of CPU accessible DMA memory are not identical in all range\n");
892                 rc = -EFAULT;
893                 goto free_dma_mem_arr;
894         }
895
896         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
897         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
898         hdev->cpu_pci_msb_addr =
899                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
900
901         GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
902
903 free_dma_mem_arr:
904         for (j = 0 ; j < i ; j++)
905                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
906                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
907                                                 virt_addr_arr[j],
908                                                 dma_addr_arr[j]);
909
910         return rc;
911 }
912
913 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
914 {
915         struct gaudi_device *gaudi = hdev->asic_specific;
916         struct gaudi_internal_qman_info *q;
917         u32 i;
918
919         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
920                 q = &gaudi->internal_qmans[i];
921                 if (!q->pq_kernel_addr)
922                         continue;
923                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
924                                                         q->pq_kernel_addr,
925                                                         q->pq_dma_addr);
926         }
927 }
928
929 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
930 {
931         struct gaudi_device *gaudi = hdev->asic_specific;
932         struct gaudi_internal_qman_info *q;
933         int rc, i;
934
935         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
936                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
937                         continue;
938
939                 q = &gaudi->internal_qmans[i];
940
941                 switch (i) {
942                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
943                 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
944                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
945                         break;
946                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
947                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
948                         break;
949                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
950                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
951                         break;
952                 default:
953                         dev_err(hdev->dev, "Bad internal queue index %d", i);
954                         rc = -EINVAL;
955                         goto free_internal_qmans_pq_mem;
956                 }
957
958                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
959                                                 hdev, q->pq_size,
960                                                 &q->pq_dma_addr,
961                                                 GFP_KERNEL | __GFP_ZERO);
962                 if (!q->pq_kernel_addr) {
963                         rc = -ENOMEM;
964                         goto free_internal_qmans_pq_mem;
965                 }
966         }
967
968         return 0;
969
970 free_internal_qmans_pq_mem:
971         gaudi_free_internal_qmans_pq_mem(hdev);
972         return rc;
973 }
974
975 static int gaudi_sw_init(struct hl_device *hdev)
976 {
977         struct gaudi_device *gaudi;
978         u32 i, event_id = 0;
979         int rc;
980
981         /* Allocate device structure */
982         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
983         if (!gaudi)
984                 return -ENOMEM;
985
986         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
987                 if (gaudi_irq_map_table[i].valid) {
988                         if (event_id == GAUDI_EVENT_SIZE) {
989                                 dev_err(hdev->dev,
990                                         "Event array exceeds the limit of %u events\n",
991                                         GAUDI_EVENT_SIZE);
992                                 rc = -EINVAL;
993                                 goto free_gaudi_device;
994                         }
995
996                         gaudi->events[event_id++] =
997                                         gaudi_irq_map_table[i].fc_id;
998                 }
999         }
1000
1001         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1002
1003         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1004
1005         hdev->asic_specific = gaudi;
1006
1007         /* Create DMA pool for small allocations */
1008         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1009                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1010         if (!hdev->dma_pool) {
1011                 dev_err(hdev->dev, "failed to create DMA pool\n");
1012                 rc = -ENOMEM;
1013                 goto free_gaudi_device;
1014         }
1015
1016         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1017         if (rc)
1018                 goto free_dma_pool;
1019
1020         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021         if (!hdev->cpu_accessible_dma_pool) {
1022                 dev_err(hdev->dev,
1023                         "Failed to create CPU accessible DMA pool\n");
1024                 rc = -ENOMEM;
1025                 goto free_cpu_dma_mem;
1026         }
1027
1028         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1030                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031         if (rc) {
1032                 dev_err(hdev->dev,
1033                         "Failed to add memory to CPU accessible DMA pool\n");
1034                 rc = -EFAULT;
1035                 goto free_cpu_accessible_dma_pool;
1036         }
1037
1038         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1039         if (rc)
1040                 goto free_cpu_accessible_dma_pool;
1041
1042         spin_lock_init(&gaudi->hw_queues_lock);
1043         mutex_init(&gaudi->clk_gate_mutex);
1044
1045         hdev->supports_sync_stream = true;
1046         hdev->supports_coresight = true;
1047
1048         return 0;
1049
1050 free_cpu_accessible_dma_pool:
1051         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1052 free_cpu_dma_mem:
1053         GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1054                                 hdev->cpu_pci_msb_addr);
1055         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1056                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1057                         hdev->cpu_accessible_dma_mem,
1058                         hdev->cpu_accessible_dma_address);
1059 free_dma_pool:
1060         dma_pool_destroy(hdev->dma_pool);
1061 free_gaudi_device:
1062         kfree(gaudi);
1063         return rc;
1064 }
1065
1066 static int gaudi_sw_fini(struct hl_device *hdev)
1067 {
1068         struct gaudi_device *gaudi = hdev->asic_specific;
1069
1070         gaudi_free_internal_qmans_pq_mem(hdev);
1071
1072         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1073
1074         GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1075                                         hdev->cpu_pci_msb_addr);
1076         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1077                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1078                         hdev->cpu_accessible_dma_mem,
1079                         hdev->cpu_accessible_dma_address);
1080
1081         dma_pool_destroy(hdev->dma_pool);
1082
1083         mutex_destroy(&gaudi->clk_gate_mutex);
1084
1085         kfree(gaudi);
1086
1087         return 0;
1088 }
1089
1090 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1091 {
1092         struct hl_device *hdev = arg;
1093         int i;
1094
1095         if (hdev->disabled)
1096                 return IRQ_HANDLED;
1097
1098         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1099                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1100
1101         hl_irq_handler_eq(irq, &hdev->event_queue);
1102
1103         return IRQ_HANDLED;
1104 }
1105
1106 /*
1107  * For backward compatibility, new MSI interrupts should be set after the
1108  * existing CPU and NIC interrupts.
1109  */
1110 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1111                                 bool cpu_eq)
1112 {
1113         int msi_vec;
1114
1115         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1116                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1117                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1118
1119         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1120                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1121
1122         return pci_irq_vector(hdev->pdev, msi_vec);
1123 }
1124
1125 static int gaudi_enable_msi_single(struct hl_device *hdev)
1126 {
1127         int rc, irq;
1128
1129         dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1130
1131         irq = gaudi_pci_irq_vector(hdev, 0, false);
1132         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1133                         "gaudi single msi", hdev);
1134         if (rc)
1135                 dev_err(hdev->dev,
1136                         "Failed to request single MSI IRQ\n");
1137
1138         return rc;
1139 }
1140
1141 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1142 {
1143         int cq_cnt = hdev->asic_prop.completion_queues_count;
1144         int rc, i, irq_cnt_init, irq;
1145
1146         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1147                 irq = gaudi_pci_irq_vector(hdev, i, false);
1148                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1149                                 &hdev->completion_queue[i]);
1150                 if (rc) {
1151                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1152                         goto free_irqs;
1153                 }
1154         }
1155
1156         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1157         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1158                                 &hdev->event_queue);
1159         if (rc) {
1160                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1161                 goto free_irqs;
1162         }
1163
1164         return 0;
1165
1166 free_irqs:
1167         for (i = 0 ; i < irq_cnt_init ; i++)
1168                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1169                                 &hdev->completion_queue[i]);
1170         return rc;
1171 }
1172
1173 static int gaudi_enable_msi(struct hl_device *hdev)
1174 {
1175         struct gaudi_device *gaudi = hdev->asic_specific;
1176         int rc;
1177
1178         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1179                 return 0;
1180
1181         rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1182                                         PCI_IRQ_MSI);
1183         if (rc < 0) {
1184                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1185                 return rc;
1186         }
1187
1188         if (rc < NUMBER_OF_INTERRUPTS) {
1189                 gaudi->multi_msi_mode = false;
1190                 rc = gaudi_enable_msi_single(hdev);
1191         } else {
1192                 gaudi->multi_msi_mode = true;
1193                 rc = gaudi_enable_msi_multi(hdev);
1194         }
1195
1196         if (rc)
1197                 goto free_pci_irq_vectors;
1198
1199         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1200
1201         return 0;
1202
1203 free_pci_irq_vectors:
1204         pci_free_irq_vectors(hdev->pdev);
1205         return rc;
1206 }
1207
1208 static void gaudi_sync_irqs(struct hl_device *hdev)
1209 {
1210         struct gaudi_device *gaudi = hdev->asic_specific;
1211         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1212
1213         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1214                 return;
1215
1216         /* Wait for all pending IRQs to be finished */
1217         if (gaudi->multi_msi_mode) {
1218                 for (i = 0 ; i < cq_cnt ; i++)
1219                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1220
1221                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1222                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1223                                                 true));
1224         } else {
1225                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1226         }
1227 }
1228
1229 static void gaudi_disable_msi(struct hl_device *hdev)
1230 {
1231         struct gaudi_device *gaudi = hdev->asic_specific;
1232         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1233
1234         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1235                 return;
1236
1237         gaudi_sync_irqs(hdev);
1238
1239         if (gaudi->multi_msi_mode) {
1240                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1241                                                 true);
1242                 free_irq(irq, &hdev->event_queue);
1243
1244                 for (i = 0 ; i < cq_cnt ; i++) {
1245                         irq = gaudi_pci_irq_vector(hdev, i, false);
1246                         free_irq(irq, &hdev->completion_queue[i]);
1247                 }
1248         } else {
1249                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1250         }
1251
1252         pci_free_irq_vectors(hdev->pdev);
1253
1254         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1255 }
1256
1257 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1258 {
1259         struct gaudi_device *gaudi = hdev->asic_specific;
1260
1261         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1262                 return;
1263
1264         if (!hdev->sram_scrambler_enable)
1265                 return;
1266
1267         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1268                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1270                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1272                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1274                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1276                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1278                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1279         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1280                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1281         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1282                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1283
1284         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1285                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1287                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1289                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1291                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1293                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1295                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1296         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1297                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1298         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1299                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1300
1301         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1302                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1304                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1306                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1308                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1310                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1312                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1313         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1314                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1315         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1316                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1317
1318         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1319 }
1320
1321 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1322 {
1323         struct gaudi_device *gaudi = hdev->asic_specific;
1324
1325         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1326                 return;
1327
1328         if (!hdev->dram_scrambler_enable)
1329                 return;
1330
1331         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1332                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1334                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1336                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1338                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1340                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1342                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1343         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1344                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1345         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1346                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1347
1348         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1349                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1351                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1353                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1355                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1357                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1359                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1360         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1361                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1362         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1363                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1364
1365         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1366                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1368                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1370                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1372                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1374                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1376                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1377         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1378                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1379         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1380                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1381
1382         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1383 }
1384
1385 static void gaudi_init_e2e(struct hl_device *hdev)
1386 {
1387         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1388         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1389         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1390         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1391
1392         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1393         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1394         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1395         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1396
1397         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1398         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1399         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1400         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1401
1402         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1403         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1404         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1405         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1406
1407         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1408         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1409         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1410         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1411
1412         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1413         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1414         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1415         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1416
1417         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1418         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1419         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1420         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1421
1422         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1423         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1424         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1425         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1426
1427         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1428         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1429         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1430         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1431
1432         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1433         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1434         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1435         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1436
1437         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1438         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1439         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1440         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1441
1442         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1443         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1444         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1445         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1446
1447         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1448         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1449         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1450         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1451
1452         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1453         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1454         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1455         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1456
1457         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1458         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1459         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1460         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1461
1462         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1463         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1464         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1465         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1466
1467         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471
1472         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476
1477         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481
1482         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486
1487         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491
1492         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496
1497         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1498         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1499         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1500         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1501
1502         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1503         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1504         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1505         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1506
1507         if (!hdev->dram_scrambler_enable) {
1508                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1509                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1510                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1511                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1512
1513                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1514                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1515                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1516                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1517
1518                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1519                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1520                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1521                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1522
1523                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1524                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1525                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1526                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1527
1528                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1529                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1530                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1531                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1532
1533                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1534                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1535                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1536                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1537
1538                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1539                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1540                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1541                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1542
1543                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1544                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1545                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1546                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1547
1548                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1549                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1550                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1551                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1552
1553                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1554                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1555                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1556                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1557
1558                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1559                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1560                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1561                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1562
1563                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1564                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1565                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1566                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1567
1568                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1569                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1570                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1571                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1572
1573                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1574                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1575                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1576                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1577
1578                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1579                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1580                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1581                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1582
1583                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1584                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1585                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1586                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1587
1588                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592
1593                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597
1598                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602
1603                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607
1608                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612
1613                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617
1618                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1619                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1620                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1621                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1622
1623                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1624                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1625                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1626                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1627         }
1628
1629         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1630                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1632                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633
1634         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1635                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1637                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638
1639         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1640                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1642                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643
1644         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1645                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1647                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648
1649         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1650                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1652                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653
1654         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1655                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1657                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658
1659         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1660                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1662                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663
1664         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1665                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1667                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668
1669         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1670                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1672                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673
1674         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1675                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1677                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678
1679         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1680                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1682                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683
1684         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1685                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1687                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688
1689         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1690                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1692                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693
1694         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1695                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1697                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698
1699         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1700                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1701         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1702                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1703
1704         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1705                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1706         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1707                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1708
1709         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1710                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1712                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713
1714         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1715                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1717                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718
1719         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1720                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1722                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723
1724         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1725                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1727                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728
1729         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1730                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1732                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733
1734         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1735                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1737                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738
1739         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1740                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1741         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1742                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1743
1744         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1745                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1746         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1747                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1748 }
1749
1750 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1751 {
1752         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1753
1754         hbm0_wr = 0x33333333;
1755         hbm0_rd = 0x77777777;
1756         hbm1_wr = 0x55555555;
1757         hbm1_rd = 0xDDDDDDDD;
1758
1759         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763
1764         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768
1769         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1770         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1771         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1772         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1773
1774         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1775         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1776         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1777         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1778
1779         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1780                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1783                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1786                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1789                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791
1792         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1793                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1796                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1799                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1800                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1801         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1802                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1803                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1804 }
1805
1806 static void gaudi_init_golden_registers(struct hl_device *hdev)
1807 {
1808         u32 tpc_offset;
1809         int tpc_id, i;
1810
1811         gaudi_init_e2e(hdev);
1812
1813         gaudi_init_hbm_cred(hdev);
1814
1815         hdev->asic_funcs->disable_clock_gating(hdev);
1816
1817         for (tpc_id = 0, tpc_offset = 0;
1818                                 tpc_id < TPC_NUMBER_OF_ENGINES;
1819                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1820                 /* Mask all arithmetic interrupts from TPC */
1821                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1822                 /* Set 16 cache lines */
1823                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1824                                 ICACHE_FETCH_LINE_NUM, 2);
1825         }
1826
1827         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1828         for (i = 0 ; i < 128 ; i += 8)
1829                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1830
1831         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1832         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1833         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1834         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1835 }
1836
1837 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1838                                         int qman_id, dma_addr_t qman_pq_addr)
1839 {
1840         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1841         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1842         u32 q_off, dma_qm_offset;
1843         u32 dma_qm_err_cfg;
1844
1845         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1846
1847         mtr_base_en_lo = lower_32_bits(CFG_BASE +
1848                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849         mtr_base_en_hi = upper_32_bits(CFG_BASE +
1850                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851         so_base_en_lo = lower_32_bits(CFG_BASE +
1852                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853         so_base_en_hi = upper_32_bits(CFG_BASE +
1854                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1856                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1857         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1858                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859         so_base_ws_lo = lower_32_bits(CFG_BASE +
1860                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1861         so_base_ws_hi = upper_32_bits(CFG_BASE +
1862                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863
1864         q_off = dma_qm_offset + qman_id * 4;
1865
1866         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1867         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1868
1869         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1870         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1871         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1872
1873         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1874         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1875                                                         QMAN_LDMA_SRC_OFFSET);
1876         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1877                                                         QMAN_LDMA_DST_OFFSET);
1878
1879         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1880         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1881         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1882         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1883         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1884         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1885         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1886         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1887
1888         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1889
1890         /* The following configuration is needed only once per QMAN */
1891         if (qman_id == 0) {
1892                 /* Configure RAZWI IRQ */
1893                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1894                 if (hdev->stop_on_err) {
1895                         dma_qm_err_cfg |=
1896                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1897                 }
1898
1899                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1900                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1901                         lower_32_bits(CFG_BASE +
1902                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1903                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1904                         upper_32_bits(CFG_BASE +
1905                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1906                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1907                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1908                                                                         dma_id);
1909
1910                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1911                                 QM_ARB_ERR_MSG_EN_MASK);
1912
1913                 /* Increase ARB WDT to support streams architecture */
1914                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1915                                 GAUDI_ARB_WDT_TIMEOUT);
1916
1917                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1918                                 QMAN_EXTERNAL_MAKE_TRUSTED);
1919
1920                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1921         }
1922 }
1923
1924 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1925 {
1926         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1927         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1928
1929         /* Set to maximum possible according to physical size */
1930         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1931         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1932
1933         /* WA for H/W bug H3-2116 */
1934         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1935
1936         /* STOP_ON bit implies no completion to operation in case of RAZWI */
1937         if (hdev->stop_on_err)
1938                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1939
1940         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1941         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1942                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1943         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1944                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1945         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1946                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1947         WREG32(mmDMA0_CORE_PROT + dma_offset,
1948                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1949         /* If the channel is secured, it should be in MMU bypass mode */
1950         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1951                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1952         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1953 }
1954
1955 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1956                                 u32 enable_mask)
1957 {
1958         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1959
1960         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1961 }
1962
1963 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1964 {
1965         struct gaudi_device *gaudi = hdev->asic_specific;
1966         struct hl_hw_queue *q;
1967         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1968
1969         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1970                 return;
1971
1972         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1973                 dma_id = gaudi_dma_assignment[i];
1974                 /*
1975                  * For queues after the CPU Q need to add 1 to get the correct
1976                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
1977                  * order to get the correct MSI register.
1978                  */
1979                 if (dma_id > 1) {
1980                         cpu_skip = 1;
1981                         nic_skip = NIC_NUMBER_OF_ENGINES;
1982                 } else {
1983                         cpu_skip = 0;
1984                         nic_skip = 0;
1985                 }
1986
1987                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1988                         q_idx = 4 * dma_id + j + cpu_skip;
1989                         q = &hdev->kernel_queues[q_idx];
1990                         q->cq_id = cq_id++;
1991                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1992                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
1993                                                 q->bus_address);
1994                 }
1995
1996                 gaudi_init_dma_core(hdev, dma_id);
1997
1998                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1999         }
2000
2001         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2002 }
2003
2004 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2005                                         int qman_id, u64 qman_base_addr)
2006 {
2007         u32 mtr_base_lo, mtr_base_hi;
2008         u32 so_base_lo, so_base_hi;
2009         u32 q_off, dma_qm_offset;
2010         u32 dma_qm_err_cfg;
2011
2012         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2013
2014         mtr_base_lo = lower_32_bits(CFG_BASE +
2015                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2016         mtr_base_hi = upper_32_bits(CFG_BASE +
2017                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2018         so_base_lo = lower_32_bits(CFG_BASE +
2019                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2020         so_base_hi = upper_32_bits(CFG_BASE +
2021                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2022
2023         q_off = dma_qm_offset + qman_id * 4;
2024
2025         if (qman_id < 4) {
2026                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2027                                         lower_32_bits(qman_base_addr));
2028                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2029                                         upper_32_bits(qman_base_addr));
2030
2031                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2032                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2033                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2034
2035                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2036                                                         QMAN_CPDMA_SIZE_OFFSET);
2037                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2038                                                         QMAN_CPDMA_SRC_OFFSET);
2039                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2040                                                         QMAN_CPDMA_DST_OFFSET);
2041         } else {
2042                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2043                                                         QMAN_LDMA_SIZE_OFFSET);
2044                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2045                                                         QMAN_LDMA_SRC_OFFSET);
2046                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2047                                                         QMAN_LDMA_DST_OFFSET);
2048
2049                 /* Configure RAZWI IRQ */
2050                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2051                 if (hdev->stop_on_err) {
2052                         dma_qm_err_cfg |=
2053                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2054                 }
2055                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2056
2057                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2058                         lower_32_bits(CFG_BASE +
2059                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2061                         upper_32_bits(CFG_BASE +
2062                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2063                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2064                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2065                                                                         dma_id);
2066
2067                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2068                                 QM_ARB_ERR_MSG_EN_MASK);
2069
2070                 /* Increase ARB WDT to support streams architecture */
2071                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2072                                 GAUDI_ARB_WDT_TIMEOUT);
2073
2074                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2075                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2076                                 QMAN_INTERNAL_MAKE_TRUSTED);
2077         }
2078
2079         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2080         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2081         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2082         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2083 }
2084
2085 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2086 {
2087         struct gaudi_device *gaudi = hdev->asic_specific;
2088         struct gaudi_internal_qman_info *q;
2089         u64 qman_base_addr;
2090         int i, j, dma_id, internal_q_index;
2091
2092         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2093                 return;
2094
2095         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2096                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2097
2098                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2099                          /*
2100                           * Add the CPU queue in order to get the correct queue
2101                           * number as all internal queue are placed after it
2102                           */
2103                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2104
2105                         q = &gaudi->internal_qmans[internal_q_index];
2106                         qman_base_addr = (u64) q->pq_dma_addr;
2107                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2108                                                 qman_base_addr);
2109                 }
2110
2111                 /* Initializing lower CP for HBM DMA QMAN */
2112                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2113
2114                 gaudi_init_dma_core(hdev, dma_id);
2115
2116                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2117         }
2118
2119         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2120 }
2121
2122 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2123                                         int qman_id, u64 qman_base_addr)
2124 {
2125         u32 mtr_base_lo, mtr_base_hi;
2126         u32 so_base_lo, so_base_hi;
2127         u32 q_off, mme_id;
2128         u32 mme_qm_err_cfg;
2129
2130         mtr_base_lo = lower_32_bits(CFG_BASE +
2131                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2132         mtr_base_hi = upper_32_bits(CFG_BASE +
2133                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2134         so_base_lo = lower_32_bits(CFG_BASE +
2135                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2136         so_base_hi = upper_32_bits(CFG_BASE +
2137                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2138
2139         q_off = mme_offset + qman_id * 4;
2140
2141         if (qman_id < 4) {
2142                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2143                                         lower_32_bits(qman_base_addr));
2144                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2145                                         upper_32_bits(qman_base_addr));
2146
2147                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2148                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2149                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2150
2151                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2152                                                         QMAN_CPDMA_SIZE_OFFSET);
2153                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2154                                                         QMAN_CPDMA_SRC_OFFSET);
2155                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2156                                                         QMAN_CPDMA_DST_OFFSET);
2157         } else {
2158                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2159                                                         QMAN_LDMA_SIZE_OFFSET);
2160                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2161                                                         QMAN_LDMA_SRC_OFFSET);
2162                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2163                                                         QMAN_LDMA_DST_OFFSET);
2164
2165                 /* Configure RAZWI IRQ */
2166                 mme_id = mme_offset /
2167                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2168
2169                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2170                 if (hdev->stop_on_err) {
2171                         mme_qm_err_cfg |=
2172                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2173                 }
2174                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2175                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2176                         lower_32_bits(CFG_BASE +
2177                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2178                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2179                         upper_32_bits(CFG_BASE +
2180                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2181                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2182                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2183                                                                         mme_id);
2184
2185                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2186                                 QM_ARB_ERR_MSG_EN_MASK);
2187
2188                 /* Increase ARB WDT to support streams architecture */
2189                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2190                                 GAUDI_ARB_WDT_TIMEOUT);
2191
2192                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2193                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2194                                 QMAN_INTERNAL_MAKE_TRUSTED);
2195         }
2196
2197         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2198         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2199         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2200         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2201 }
2202
2203 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2204 {
2205         struct gaudi_device *gaudi = hdev->asic_specific;
2206         struct gaudi_internal_qman_info *q;
2207         u64 qman_base_addr;
2208         u32 mme_offset;
2209         int i, internal_q_index;
2210
2211         if (gaudi->hw_cap_initialized & HW_CAP_MME)
2212                 return;
2213
2214         /*
2215          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2216          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2217          */
2218
2219         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2220
2221         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2222                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2223                 q = &gaudi->internal_qmans[internal_q_index];
2224                 qman_base_addr = (u64) q->pq_dma_addr;
2225                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2226                                         qman_base_addr);
2227                 if (i == 3)
2228                         mme_offset = 0;
2229         }
2230
2231         /* Initializing lower CP for MME QMANs */
2232         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2233         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2234         gaudi_init_mme_qman(hdev, 0, 4, 0);
2235
2236         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2237         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2238
2239         gaudi->hw_cap_initialized |= HW_CAP_MME;
2240 }
2241
2242 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2243                                 int qman_id, u64 qman_base_addr)
2244 {
2245         u32 mtr_base_lo, mtr_base_hi;
2246         u32 so_base_lo, so_base_hi;
2247         u32 q_off, tpc_id;
2248         u32 tpc_qm_err_cfg;
2249
2250         mtr_base_lo = lower_32_bits(CFG_BASE +
2251                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2252         mtr_base_hi = upper_32_bits(CFG_BASE +
2253                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2254         so_base_lo = lower_32_bits(CFG_BASE +
2255                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2256         so_base_hi = upper_32_bits(CFG_BASE +
2257                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2258
2259         q_off = tpc_offset + qman_id * 4;
2260
2261         if (qman_id < 4) {
2262                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2263                                         lower_32_bits(qman_base_addr));
2264                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2265                                         upper_32_bits(qman_base_addr));
2266
2267                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2268                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2269                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2270
2271                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2272                                                         QMAN_CPDMA_SIZE_OFFSET);
2273                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2274                                                         QMAN_CPDMA_SRC_OFFSET);
2275                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2276                                                         QMAN_CPDMA_DST_OFFSET);
2277         } else {
2278                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2279                                                         QMAN_LDMA_SIZE_OFFSET);
2280                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2281                                                         QMAN_LDMA_SRC_OFFSET);
2282                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2283                                                         QMAN_LDMA_DST_OFFSET);
2284
2285                 /* Configure RAZWI IRQ */
2286                 tpc_id = tpc_offset /
2287                                 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2288
2289                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2290                 if (hdev->stop_on_err) {
2291                         tpc_qm_err_cfg |=
2292                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2293                 }
2294
2295                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2296                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2297                         lower_32_bits(CFG_BASE +
2298                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2299                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2300                         upper_32_bits(CFG_BASE +
2301                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2302                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2303                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2304                                                                         tpc_id);
2305
2306                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2307                                 QM_ARB_ERR_MSG_EN_MASK);
2308
2309                 /* Increase ARB WDT to support streams architecture */
2310                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2311                                 GAUDI_ARB_WDT_TIMEOUT);
2312
2313                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2314                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2315                                 QMAN_INTERNAL_MAKE_TRUSTED);
2316         }
2317
2318         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2319         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2320         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2321         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2322 }
2323
2324 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2325 {
2326         struct gaudi_device *gaudi = hdev->asic_specific;
2327         struct gaudi_internal_qman_info *q;
2328         u64 qman_base_addr;
2329         u32 so_base_hi, tpc_offset = 0;
2330         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2331                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2332         int i, tpc_id, internal_q_index;
2333
2334         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2335                 return;
2336
2337         so_base_hi = upper_32_bits(CFG_BASE +
2338                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2339
2340         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2341                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2342                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2343                                                 tpc_id * QMAN_STREAMS + i;
2344                         q = &gaudi->internal_qmans[internal_q_index];
2345                         qman_base_addr = (u64) q->pq_dma_addr;
2346                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
2347                                                 qman_base_addr);
2348
2349                         if (i == 3) {
2350                                 /* Initializing lower CP for TPC QMAN */
2351                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2352
2353                                 /* Enable the QMAN and TPC channel */
2354                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2355                                                 QMAN_TPC_ENABLE);
2356                         }
2357                 }
2358
2359                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2360                                 so_base_hi);
2361
2362                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2363
2364                 gaudi->hw_cap_initialized |=
2365                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2366         }
2367 }
2368
2369 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2370 {
2371         struct gaudi_device *gaudi = hdev->asic_specific;
2372
2373         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2374                 return;
2375
2376         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2377         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2378         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2379 }
2380
2381 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2382 {
2383         struct gaudi_device *gaudi = hdev->asic_specific;
2384
2385         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2386                 return;
2387
2388         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2389         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2390         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2391         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2392         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2393 }
2394
2395 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2396 {
2397         struct gaudi_device *gaudi = hdev->asic_specific;
2398
2399         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2400                 return;
2401
2402         WREG32(mmMME2_QM_GLBL_CFG0, 0);
2403         WREG32(mmMME0_QM_GLBL_CFG0, 0);
2404 }
2405
2406 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2407 {
2408         struct gaudi_device *gaudi = hdev->asic_specific;
2409         u32 tpc_offset = 0;
2410         int tpc_id;
2411
2412         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2413                 return;
2414
2415         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2416                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2417                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2418         }
2419 }
2420
2421 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2422 {
2423         struct gaudi_device *gaudi = hdev->asic_specific;
2424
2425         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2426                 return;
2427
2428         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2429         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432 }
2433
2434 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2435 {
2436         struct gaudi_device *gaudi = hdev->asic_specific;
2437
2438         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2439                 return;
2440
2441         /* Stop CPs of HBM DMA QMANs */
2442
2443         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2445         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2446         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2447         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2448 }
2449
2450 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2451 {
2452         struct gaudi_device *gaudi = hdev->asic_specific;
2453
2454         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2455                 return;
2456
2457         /* Stop CPs of MME QMANs */
2458         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460 }
2461
2462 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2463 {
2464         struct gaudi_device *gaudi = hdev->asic_specific;
2465
2466         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2467                 return;
2468
2469         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2472         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2473         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2474         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2475         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2476         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2477 }
2478
2479 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2480 {
2481         struct gaudi_device *gaudi = hdev->asic_specific;
2482
2483         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2484                 return;
2485
2486         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2488         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2489 }
2490
2491 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2492 {
2493         struct gaudi_device *gaudi = hdev->asic_specific;
2494
2495         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2496                 return;
2497
2498         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2499         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2500         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2501         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2502         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2503 }
2504
2505 static void gaudi_mme_stall(struct hl_device *hdev)
2506 {
2507         struct gaudi_device *gaudi = hdev->asic_specific;
2508
2509         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2510                 return;
2511
2512         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2513         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2514         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2515         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2516         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2517         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2518         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2519         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2520         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2521         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2522         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2523         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2524         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2525         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2526         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2527         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2528         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2529 }
2530
2531 static void gaudi_tpc_stall(struct hl_device *hdev)
2532 {
2533         struct gaudi_device *gaudi = hdev->asic_specific;
2534
2535         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2536                 return;
2537
2538         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2541         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2542         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2543         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2544         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2545         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2546 }
2547
2548 static void gaudi_set_clock_gating(struct hl_device *hdev)
2549 {
2550         struct gaudi_device *gaudi = hdev->asic_specific;
2551         u32 qman_offset;
2552         bool enable;
2553         int i;
2554
2555         /* In case we are during debug session, don't enable the clock gate
2556          * as it may interfere
2557          */
2558         if (hdev->in_debug)
2559                 return;
2560
2561         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2562                 enable = !!(hdev->clock_gating_mask &
2563                                 (BIT_ULL(gaudi_dma_assignment[i])));
2564
2565                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2566                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2567                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2568                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2569                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2570         }
2571
2572         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2573                 enable = !!(hdev->clock_gating_mask &
2574                                 (BIT_ULL(gaudi_dma_assignment[i])));
2575
2576                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2577                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2578                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2580                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2581         }
2582
2583         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2584         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2585         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2586
2587         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2588         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2589         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2590
2591         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2592                 enable = !!(hdev->clock_gating_mask &
2593                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2594
2595                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2596                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2597                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2598                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2599
2600                 qman_offset += TPC_QMAN_OFFSET;
2601         }
2602
2603         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2604 }
2605
2606 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2607 {
2608         struct gaudi_device *gaudi = hdev->asic_specific;
2609         u32 qman_offset;
2610         int i;
2611
2612         if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2613                 return;
2614
2615         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2616                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2617                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2618
2619                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2620         }
2621
2622         WREG32(mmMME0_QM_CGM_CFG, 0);
2623         WREG32(mmMME0_QM_CGM_CFG1, 0);
2624         WREG32(mmMME2_QM_CGM_CFG, 0);
2625         WREG32(mmMME2_QM_CGM_CFG1, 0);
2626
2627         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2628                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2629                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2630
2631                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2632         }
2633
2634         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2635 }
2636
2637 static void gaudi_enable_timestamp(struct hl_device *hdev)
2638 {
2639         /* Disable the timestamp counter */
2640         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2641
2642         /* Zero the lower/upper parts of the 64-bit counter */
2643         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2644         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2645
2646         /* Enable the counter */
2647         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2648 }
2649
2650 static void gaudi_disable_timestamp(struct hl_device *hdev)
2651 {
2652         /* Disable the timestamp counter */
2653         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2654 }
2655
2656 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2657 {
2658         u32 wait_timeout_ms;
2659
2660         dev_info(hdev->dev,
2661                 "Halting compute engines and disabling interrupts\n");
2662
2663         if (hdev->pldm)
2664                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2665         else
2666                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2667
2668
2669         gaudi_stop_mme_qmans(hdev);
2670         gaudi_stop_tpc_qmans(hdev);
2671         gaudi_stop_hbm_dma_qmans(hdev);
2672         gaudi_stop_pci_dma_qmans(hdev);
2673
2674         hdev->asic_funcs->disable_clock_gating(hdev);
2675
2676         msleep(wait_timeout_ms);
2677
2678         gaudi_pci_dma_stall(hdev);
2679         gaudi_hbm_dma_stall(hdev);
2680         gaudi_tpc_stall(hdev);
2681         gaudi_mme_stall(hdev);
2682
2683         msleep(wait_timeout_ms);
2684
2685         gaudi_disable_mme_qmans(hdev);
2686         gaudi_disable_tpc_qmans(hdev);
2687         gaudi_disable_hbm_dma_qmans(hdev);
2688         gaudi_disable_pci_dma_qmans(hdev);
2689
2690         gaudi_disable_timestamp(hdev);
2691
2692         gaudi_disable_msi(hdev);
2693 }
2694
2695 static int gaudi_mmu_init(struct hl_device *hdev)
2696 {
2697         struct asic_fixed_properties *prop = &hdev->asic_prop;
2698         struct gaudi_device *gaudi = hdev->asic_specific;
2699         u64 hop0_addr;
2700         int rc, i;
2701
2702         if (!hdev->mmu_enable)
2703                 return 0;
2704
2705         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2706                 return 0;
2707
2708         hdev->dram_supports_virtual_memory = false;
2709
2710         for (i = 0 ; i < prop->max_asid ; i++) {
2711                 hop0_addr = prop->mmu_pgt_addr +
2712                                 (i * prop->mmu_hop_table_size);
2713
2714                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2715                 if (rc) {
2716                         dev_err(hdev->dev,
2717                                 "failed to set hop0 addr for asid %d\n", i);
2718                         goto err;
2719                 }
2720         }
2721
2722         /* init MMU cache manage page */
2723         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2724         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2725
2726         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2727
2728         WREG32(mmMMU_UP_MMU_ENABLE, 1);
2729         WREG32(mmMMU_UP_SPI_MASK, 0xF);
2730
2731         WREG32(mmSTLB_HOP_CONFIGURATION,
2732                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2733
2734         /*
2735          * The H/W expects the first PI after init to be 1. After wraparound
2736          * we'll write 0.
2737          */
2738         gaudi->mmu_cache_inv_pi = 1;
2739
2740         gaudi->hw_cap_initialized |= HW_CAP_MMU;
2741
2742         return 0;
2743
2744 err:
2745         return rc;
2746 }
2747
2748 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2749 {
2750         void __iomem *dst;
2751
2752         /* HBM scrambler must be initialized before pushing F/W to HBM */
2753         gaudi_init_scrambler_hbm(hdev);
2754
2755         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2756
2757         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2758 }
2759
2760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2761 {
2762         void __iomem *dst;
2763
2764         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2765
2766         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2767 }
2768
2769 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2770                                         enum hl_fw_component fwc)
2771 {
2772         const char *name;
2773         u32 ver_off;
2774         char *dest;
2775
2776         switch (fwc) {
2777         case FW_COMP_UBOOT:
2778                 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2779                 dest = hdev->asic_prop.uboot_ver;
2780                 name = "U-Boot";
2781                 break;
2782         case FW_COMP_PREBOOT:
2783                 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2784                 dest = hdev->asic_prop.preboot_ver;
2785                 name = "Preboot";
2786                 break;
2787         default:
2788                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2789                 return;
2790         }
2791
2792         ver_off &= ~((u32)SRAM_BASE_ADDR);
2793
2794         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2795                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2796                                                         VERSION_MAX_LEN);
2797         } else {
2798                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2799                                                                 name, ver_off);
2800                 strcpy(dest, "unavailable");
2801         }
2802 }
2803
2804 static int gaudi_init_cpu(struct hl_device *hdev)
2805 {
2806         struct gaudi_device *gaudi = hdev->asic_specific;
2807         int rc;
2808
2809         if (!hdev->cpu_enable)
2810                 return 0;
2811
2812         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2813                 return 0;
2814
2815         /*
2816          * The device CPU works with 40 bits addresses.
2817          * This register sets the extension to 50 bits.
2818          */
2819         WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2820
2821         rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2822                         mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2823                         mmCPU_CMD_STATUS_TO_HOST,
2824                         mmCPU_BOOT_ERR0,
2825                         !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2826                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2827
2828         if (rc)
2829                 return rc;
2830
2831         gaudi->hw_cap_initialized |= HW_CAP_CPU;
2832
2833         return 0;
2834 }
2835
2836 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2837 {
2838         struct gaudi_device *gaudi = hdev->asic_specific;
2839         struct hl_eq *eq;
2840         u32 status;
2841         struct hl_hw_queue *cpu_pq =
2842                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2843         int err;
2844
2845         if (!hdev->cpu_queues_enable)
2846                 return 0;
2847
2848         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2849                 return 0;
2850
2851         eq = &hdev->event_queue;
2852
2853         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2854         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2855
2856         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2857         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2858
2859         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2860                         lower_32_bits(hdev->cpu_accessible_dma_address));
2861         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2862                         upper_32_bits(hdev->cpu_accessible_dma_address));
2863
2864         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2865         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2866         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2867
2868         /* Used for EQ CI */
2869         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2870
2871         WREG32(mmCPU_IF_PF_PQ_PI, 0);
2872
2873         if (gaudi->multi_msi_mode)
2874                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2875         else
2876                 WREG32(mmCPU_IF_QUEUE_INIT,
2877                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2878
2879         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2880
2881         err = hl_poll_timeout(
2882                 hdev,
2883                 mmCPU_IF_QUEUE_INIT,
2884                 status,
2885                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2886                 1000,
2887                 cpu_timeout);
2888
2889         if (err) {
2890                 dev_err(hdev->dev,
2891                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
2892                 return -EIO;
2893         }
2894
2895         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2896         return 0;
2897 }
2898
2899 static void gaudi_pre_hw_init(struct hl_device *hdev)
2900 {
2901         /* Perform read from the device to make sure device is up */
2902         RREG32(mmHW_STATE);
2903
2904         /* Set the access through PCI bars (Linux driver only) as
2905          * secured
2906          */
2907         WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2908                         (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2909                         PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2910
2911         /* Perform read to flush the waiting writes to ensure
2912          * configuration was set in the device
2913          */
2914         RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2915
2916         /*
2917          * Let's mark in the H/W that we have reached this point. We check
2918          * this value in the reset_before_init function to understand whether
2919          * we need to reset the chip before doing H/W init. This register is
2920          * cleared by the H/W upon H/W reset
2921          */
2922         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2923
2924         /* Configure the reset registers. Must be done as early as possible
2925          * in case we fail during H/W initialization
2926          */
2927         WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2928                                         (CFG_RST_H_DMA_MASK |
2929                                         CFG_RST_H_MME_MASK |
2930                                         CFG_RST_H_SM_MASK |
2931                                         CFG_RST_H_TPC_7_MASK));
2932
2933         WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2934
2935         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2936                                         (CFG_RST_H_HBM_MASK |
2937                                         CFG_RST_H_TPC_7_MASK |
2938                                         CFG_RST_H_NIC_MASK |
2939                                         CFG_RST_H_SM_MASK |
2940                                         CFG_RST_H_DMA_MASK |
2941                                         CFG_RST_H_MME_MASK |
2942                                         CFG_RST_H_CPU_MASK |
2943                                         CFG_RST_H_MMU_MASK));
2944
2945         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2946                                         (CFG_RST_L_IF_MASK |
2947                                         CFG_RST_L_PSOC_MASK |
2948                                         CFG_RST_L_TPC_MASK));
2949 }
2950
2951 static int gaudi_hw_init(struct hl_device *hdev)
2952 {
2953         int rc;
2954
2955         dev_info(hdev->dev, "Starting initialization of H/W\n");
2956
2957         gaudi_pre_hw_init(hdev);
2958
2959         gaudi_init_pci_dma_qmans(hdev);
2960
2961         gaudi_init_hbm_dma_qmans(hdev);
2962
2963         rc = gaudi_init_cpu(hdev);
2964         if (rc) {
2965                 dev_err(hdev->dev, "failed to initialize CPU\n");
2966                 return rc;
2967         }
2968
2969         /* SRAM scrambler must be initialized after CPU is running from HBM */
2970         gaudi_init_scrambler_sram(hdev);
2971
2972         /* This is here just in case we are working without CPU */
2973         gaudi_init_scrambler_hbm(hdev);
2974
2975         gaudi_init_golden_registers(hdev);
2976
2977         rc = gaudi_mmu_init(hdev);
2978         if (rc)
2979                 return rc;
2980
2981         gaudi_init_security(hdev);
2982
2983         gaudi_init_mme_qmans(hdev);
2984
2985         gaudi_init_tpc_qmans(hdev);
2986
2987         hdev->asic_funcs->set_clock_gating(hdev);
2988
2989         gaudi_enable_timestamp(hdev);
2990
2991         /* MSI must be enabled before CPU queues are initialized */
2992         rc = gaudi_enable_msi(hdev);
2993         if (rc)
2994                 goto disable_queues;
2995
2996         /* must be called after MSI was enabled */
2997         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2998         if (rc) {
2999                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3000                         rc);
3001                 goto disable_msi;
3002         }
3003
3004         /* Perform read from the device to flush all configuration */
3005         RREG32(mmHW_STATE);
3006
3007         return 0;
3008
3009 disable_msi:
3010         gaudi_disable_msi(hdev);
3011 disable_queues:
3012         gaudi_disable_mme_qmans(hdev);
3013         gaudi_disable_pci_dma_qmans(hdev);
3014
3015         return rc;
3016 }
3017
3018 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3019 {
3020         struct gaudi_device *gaudi = hdev->asic_specific;
3021         u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3022
3023         if (!hard_reset) {
3024                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3025                 return;
3026         }
3027
3028         if (hdev->pldm) {
3029                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3030                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3031         } else {
3032                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3033                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3034         }
3035
3036         /* Set device to handle FLR by H/W as we will put the device CPU to
3037          * halt mode
3038          */
3039         WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3040                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3041
3042         /* I don't know what is the state of the CPU so make sure it is
3043          * stopped in any means necessary
3044          */
3045         WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3046         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3047
3048         msleep(cpu_timeout_ms);
3049
3050         /* Tell ASIC not to re-initialize PCIe */
3051         WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3052
3053         boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3054
3055         /* H/W bug WA:
3056          * rdata[31:0] = strap_read_val;
3057          * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3058          */
3059         boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3060                         (boot_strap & 0x001FFFFF));
3061         WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3062
3063         /* Restart BTL/BLR upon hard-reset */
3064         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3065
3066         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3067                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3068         dev_info(hdev->dev,
3069                 "Issued HARD reset command, going to wait %dms\n",
3070                 reset_timeout_ms);
3071
3072         /*
3073          * After hard reset, we can't poll the BTM_FSM register because the PSOC
3074          * itself is in reset. Need to wait until the reset is deasserted
3075          */
3076         msleep(reset_timeout_ms);
3077
3078         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3079         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3080                 dev_err(hdev->dev,
3081                         "Timeout while waiting for device to reset 0x%x\n",
3082                         status);
3083
3084         WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3085
3086         gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3087                                         HW_CAP_HBM | HW_CAP_PCI_DMA |
3088                                         HW_CAP_MME | HW_CAP_TPC_MASK |
3089                                         HW_CAP_HBM_DMA | HW_CAP_PLL |
3090                                         HW_CAP_MMU |
3091                                         HW_CAP_SRAM_SCRAMBLER |
3092                                         HW_CAP_HBM_SCRAMBLER |
3093                                         HW_CAP_CLK_GATE);
3094
3095         memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3096 }
3097
3098 static int gaudi_suspend(struct hl_device *hdev)
3099 {
3100         int rc;
3101
3102         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3103         if (rc)
3104                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3105
3106         return rc;
3107 }
3108
3109 static int gaudi_resume(struct hl_device *hdev)
3110 {
3111         return gaudi_init_iatu(hdev);
3112 }
3113
3114 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3115                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
3116 {
3117         int rc;
3118
3119         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3120                         VM_DONTCOPY | VM_NORESERVE;
3121
3122         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
3123                                 (dma_addr - HOST_PHYS_BASE), size);
3124         if (rc)
3125                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3126
3127         return rc;
3128 }
3129
3130 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3131 {
3132         struct gaudi_device *gaudi = hdev->asic_specific;
3133         u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3134         int dma_id;
3135         bool invalid_queue = false;
3136
3137         switch (hw_queue_id) {
3138         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3139                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3140                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143                 break;
3144
3145         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3146                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3147                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3149                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150                 break;
3151
3152         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3153                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3154                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157                 break;
3158
3159         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3160                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3161                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164                 break;
3165
3166         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3167                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3168                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171                 break;
3172
3173         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3174                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3175                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178                 break;
3179
3180         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3181                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3182                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185                 break;
3186
3187         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3188                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3189                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3190                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3191                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3192                 break;
3193
3194         case GAUDI_QUEUE_ID_CPU_PQ:
3195                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3196                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
3197                 else
3198                         invalid_queue = true;
3199                 break;
3200
3201         case GAUDI_QUEUE_ID_MME_0_0:
3202                 db_reg_offset = mmMME2_QM_PQ_PI_0;
3203                 break;
3204
3205         case GAUDI_QUEUE_ID_MME_0_1:
3206                 db_reg_offset = mmMME2_QM_PQ_PI_1;
3207                 break;
3208
3209         case GAUDI_QUEUE_ID_MME_0_2:
3210                 db_reg_offset = mmMME2_QM_PQ_PI_2;
3211                 break;
3212
3213         case GAUDI_QUEUE_ID_MME_0_3:
3214                 db_reg_offset = mmMME2_QM_PQ_PI_3;
3215                 break;
3216
3217         case GAUDI_QUEUE_ID_MME_1_0:
3218                 db_reg_offset = mmMME0_QM_PQ_PI_0;
3219                 break;
3220
3221         case GAUDI_QUEUE_ID_MME_1_1:
3222                 db_reg_offset = mmMME0_QM_PQ_PI_1;
3223                 break;
3224
3225         case GAUDI_QUEUE_ID_MME_1_2:
3226                 db_reg_offset = mmMME0_QM_PQ_PI_2;
3227                 break;
3228
3229         case GAUDI_QUEUE_ID_MME_1_3:
3230                 db_reg_offset = mmMME0_QM_PQ_PI_3;
3231                 break;
3232
3233         case GAUDI_QUEUE_ID_TPC_0_0:
3234                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3235                 break;
3236
3237         case GAUDI_QUEUE_ID_TPC_0_1:
3238                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3239                 break;
3240
3241         case GAUDI_QUEUE_ID_TPC_0_2:
3242                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3243                 break;
3244
3245         case GAUDI_QUEUE_ID_TPC_0_3:
3246                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3247                 break;
3248
3249         case GAUDI_QUEUE_ID_TPC_1_0:
3250                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3251                 break;
3252
3253         case GAUDI_QUEUE_ID_TPC_1_1:
3254                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3255                 break;
3256
3257         case GAUDI_QUEUE_ID_TPC_1_2:
3258                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3259                 break;
3260
3261         case GAUDI_QUEUE_ID_TPC_1_3:
3262                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3263                 break;
3264
3265         case GAUDI_QUEUE_ID_TPC_2_0:
3266                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3267                 break;
3268
3269         case GAUDI_QUEUE_ID_TPC_2_1:
3270                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3271                 break;
3272
3273         case GAUDI_QUEUE_ID_TPC_2_2:
3274                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3275                 break;
3276
3277         case GAUDI_QUEUE_ID_TPC_2_3:
3278                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3279                 break;
3280
3281         case GAUDI_QUEUE_ID_TPC_3_0:
3282                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3283                 break;
3284
3285         case GAUDI_QUEUE_ID_TPC_3_1:
3286                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3287                 break;
3288
3289         case GAUDI_QUEUE_ID_TPC_3_2:
3290                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3291                 break;
3292
3293         case GAUDI_QUEUE_ID_TPC_3_3:
3294                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3295                 break;
3296
3297         case GAUDI_QUEUE_ID_TPC_4_0:
3298                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3299                 break;
3300
3301         case GAUDI_QUEUE_ID_TPC_4_1:
3302                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3303                 break;
3304
3305         case GAUDI_QUEUE_ID_TPC_4_2:
3306                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3307                 break;
3308
3309         case GAUDI_QUEUE_ID_TPC_4_3:
3310                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3311                 break;
3312
3313         case GAUDI_QUEUE_ID_TPC_5_0:
3314                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3315                 break;
3316
3317         case GAUDI_QUEUE_ID_TPC_5_1:
3318                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3319                 break;
3320
3321         case GAUDI_QUEUE_ID_TPC_5_2:
3322                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3323                 break;
3324
3325         case GAUDI_QUEUE_ID_TPC_5_3:
3326                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3327                 break;
3328
3329         case GAUDI_QUEUE_ID_TPC_6_0:
3330                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3331                 break;
3332
3333         case GAUDI_QUEUE_ID_TPC_6_1:
3334                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3335                 break;
3336
3337         case GAUDI_QUEUE_ID_TPC_6_2:
3338                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3339                 break;
3340
3341         case GAUDI_QUEUE_ID_TPC_6_3:
3342                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3343                 break;
3344
3345         case GAUDI_QUEUE_ID_TPC_7_0:
3346                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3347                 break;
3348
3349         case GAUDI_QUEUE_ID_TPC_7_1:
3350                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3351                 break;
3352
3353         case GAUDI_QUEUE_ID_TPC_7_2:
3354                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3355                 break;
3356
3357         case GAUDI_QUEUE_ID_TPC_7_3:
3358                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3359                 break;
3360
3361         default:
3362                 invalid_queue = true;
3363         }
3364
3365         if (invalid_queue) {
3366                 /* Should never get here */
3367                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3368                         hw_queue_id);
3369                 return;
3370         }
3371
3372         db_value = pi;
3373
3374         /* ring the doorbell */
3375         WREG32(db_reg_offset, db_value);
3376
3377         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3378                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3379                                 GAUDI_EVENT_PI_UPDATE);
3380 }
3381
3382 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3383                                 struct hl_bd *bd)
3384 {
3385         __le64 *pbd = (__le64 *) bd;
3386
3387         /* The QMANs are on the host memory so a simple copy suffice */
3388         pqe[0] = pbd[0];
3389         pqe[1] = pbd[1];
3390 }
3391
3392 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3393                                         dma_addr_t *dma_handle, gfp_t flags)
3394 {
3395         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3396                                                 dma_handle, flags);
3397
3398         /* Shift to the device's base physical address of host memory */
3399         if (kernel_addr)
3400                 *dma_handle += HOST_PHYS_BASE;
3401
3402         return kernel_addr;
3403 }
3404
3405 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3406                 void *cpu_addr, dma_addr_t dma_handle)
3407 {
3408         /* Cancel the device's base physical address of host memory */
3409         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3410
3411         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3412 }
3413
3414 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3415                                 u32 queue_id, dma_addr_t *dma_handle,
3416                                 u16 *queue_len)
3417 {
3418         struct gaudi_device *gaudi = hdev->asic_specific;
3419         struct gaudi_internal_qman_info *q;
3420
3421         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3422                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3423                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3424                 return NULL;
3425         }
3426
3427         q = &gaudi->internal_qmans[queue_id];
3428         *dma_handle = q->pq_dma_addr;
3429         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3430
3431         return q->pq_kernel_addr;
3432 }
3433
3434 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3435                                 u16 len, u32 timeout, long *result)
3436 {
3437         struct gaudi_device *gaudi = hdev->asic_specific;
3438
3439         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3440                 if (result)
3441                         *result = 0;
3442                 return 0;
3443         }
3444
3445         if (!timeout)
3446                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3447
3448         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3449                                                 timeout, result);
3450 }
3451
3452 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3453 {
3454         struct packet_msg_prot *fence_pkt;
3455         dma_addr_t pkt_dma_addr;
3456         u32 fence_val, tmp, timeout_usec;
3457         dma_addr_t fence_dma_addr;
3458         u32 *fence_ptr;
3459         int rc;
3460
3461         if (hdev->pldm)
3462                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3463         else
3464                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3465
3466         fence_val = GAUDI_QMAN0_FENCE_VAL;
3467
3468         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3469                                                         &fence_dma_addr);
3470         if (!fence_ptr) {
3471                 dev_err(hdev->dev,
3472                         "Failed to allocate memory for H/W queue %d testing\n",
3473                         hw_queue_id);
3474                 return -ENOMEM;
3475         }
3476
3477         *fence_ptr = 0;
3478
3479         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3480                                         sizeof(struct packet_msg_prot),
3481                                         GFP_KERNEL, &pkt_dma_addr);
3482         if (!fence_pkt) {
3483                 dev_err(hdev->dev,
3484                         "Failed to allocate packet for H/W queue %d testing\n",
3485                         hw_queue_id);
3486                 rc = -ENOMEM;
3487                 goto free_fence_ptr;
3488         }
3489
3490         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3491         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3492         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3493
3494         fence_pkt->ctl = cpu_to_le32(tmp);
3495         fence_pkt->value = cpu_to_le32(fence_val);
3496         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3497
3498         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3499                                         sizeof(struct packet_msg_prot),
3500                                         pkt_dma_addr);
3501         if (rc) {
3502                 dev_err(hdev->dev,
3503                         "Failed to send fence packet to H/W queue %d\n",
3504                         hw_queue_id);
3505                 goto free_pkt;
3506         }
3507
3508         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3509                                         1000, timeout_usec, true);
3510
3511         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3512
3513         if (rc == -ETIMEDOUT) {
3514                 dev_err(hdev->dev,
3515                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3516                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3517                 rc = -EIO;
3518         }
3519
3520 free_pkt:
3521         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3522                                         pkt_dma_addr);
3523 free_fence_ptr:
3524         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3525                                         fence_dma_addr);
3526         return rc;
3527 }
3528
3529 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3530 {
3531         struct gaudi_device *gaudi = hdev->asic_specific;
3532
3533         /*
3534          * check capability here as send_cpu_message() won't update the result
3535          * value if no capability
3536          */
3537         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3538                 return 0;
3539
3540         return hl_fw_test_cpu_queue(hdev);
3541 }
3542
3543 static int gaudi_test_queues(struct hl_device *hdev)
3544 {
3545         int i, rc, ret_val = 0;
3546
3547         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3548                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3549                         rc = gaudi_test_queue(hdev, i);
3550                         if (rc)
3551                                 ret_val = -EINVAL;
3552                 }
3553         }
3554
3555         rc = gaudi_test_cpu_queue(hdev);
3556         if (rc)
3557                 ret_val = -EINVAL;
3558
3559         return ret_val;
3560 }
3561
3562 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3563                 gfp_t mem_flags, dma_addr_t *dma_handle)
3564 {
3565         void *kernel_addr;
3566
3567         if (size > GAUDI_DMA_POOL_BLK_SIZE)
3568                 return NULL;
3569
3570         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3571
3572         /* Shift to the device's base physical address of host memory */
3573         if (kernel_addr)
3574                 *dma_handle += HOST_PHYS_BASE;
3575
3576         return kernel_addr;
3577 }
3578
3579 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3580                         dma_addr_t dma_addr)
3581 {
3582         /* Cancel the device's base physical address of host memory */
3583         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3584
3585         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3586 }
3587
3588 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3589                                         size_t size, dma_addr_t *dma_handle)
3590 {
3591         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3592 }
3593
3594 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3595                                                 size_t size, void *vaddr)
3596 {
3597         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3598 }
3599
3600 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3601                         int nents, enum dma_data_direction dir)
3602 {
3603         struct scatterlist *sg;
3604         int i;
3605
3606         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3607                 return -ENOMEM;
3608
3609         /* Shift to the device's base physical address of host memory */
3610         for_each_sg(sgl, sg, nents, i)
3611                 sg->dma_address += HOST_PHYS_BASE;
3612
3613         return 0;
3614 }
3615
3616 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3617                         int nents, enum dma_data_direction dir)
3618 {
3619         struct scatterlist *sg;
3620         int i;
3621
3622         /* Cancel the device's base physical address of host memory */
3623         for_each_sg(sgl, sg, nents, i)
3624                 sg->dma_address -= HOST_PHYS_BASE;
3625
3626         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3627 }
3628
3629 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3630                                         struct sg_table *sgt)
3631 {
3632         struct scatterlist *sg, *sg_next_iter;
3633         u32 count, dma_desc_cnt;
3634         u64 len, len_next;
3635         dma_addr_t addr, addr_next;
3636
3637         dma_desc_cnt = 0;
3638
3639         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640
3641                 len = sg_dma_len(sg);
3642                 addr = sg_dma_address(sg);
3643
3644                 if (len == 0)
3645                         break;
3646
3647                 while ((count + 1) < sgt->nents) {
3648                         sg_next_iter = sg_next(sg);
3649                         len_next = sg_dma_len(sg_next_iter);
3650                         addr_next = sg_dma_address(sg_next_iter);
3651
3652                         if (len_next == 0)
3653                                 break;
3654
3655                         if ((addr + len == addr_next) &&
3656                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3657                                 len += len_next;
3658                                 count++;
3659                                 sg = sg_next_iter;
3660                         } else {
3661                                 break;
3662                         }
3663                 }
3664
3665                 dma_desc_cnt++;
3666         }
3667
3668         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3669 }
3670
3671 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3672                                 struct hl_cs_parser *parser,
3673                                 struct packet_lin_dma *user_dma_pkt,
3674                                 u64 addr, enum dma_data_direction dir)
3675 {
3676         struct hl_userptr *userptr;
3677         int rc;
3678
3679         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3680                         parser->job_userptr_list, &userptr))
3681                 goto already_pinned;
3682
3683         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3684         if (!userptr)
3685                 return -ENOMEM;
3686
3687         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3688                                 userptr);
3689         if (rc)
3690                 goto free_userptr;
3691
3692         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3693
3694         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3695                                         userptr->sgt->nents, dir);
3696         if (rc) {
3697                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3698                 goto unpin_memory;
3699         }
3700
3701         userptr->dma_mapped = true;
3702         userptr->dir = dir;
3703
3704 already_pinned:
3705         parser->patched_cb_size +=
3706                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3707
3708         return 0;
3709
3710 unpin_memory:
3711         list_del(&userptr->job_node);
3712         hl_unpin_host_memory(hdev, userptr);
3713 free_userptr:
3714         kfree(userptr);
3715         return rc;
3716 }
3717
3718 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3719                                 struct hl_cs_parser *parser,
3720                                 struct packet_lin_dma *user_dma_pkt,
3721                                 bool src_in_host)
3722 {
3723         enum dma_data_direction dir;
3724         bool skip_host_mem_pin = false, user_memset;
3725         u64 addr;
3726         int rc = 0;
3727
3728         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3729                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3730                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3731
3732         if (src_in_host) {
3733                 if (user_memset)
3734                         skip_host_mem_pin = true;
3735
3736                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3737                 dir = DMA_TO_DEVICE;
3738                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3739         } else {
3740                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3741                 dir = DMA_FROM_DEVICE;
3742                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3743                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3744                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3745         }
3746
3747         if (skip_host_mem_pin)
3748                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3749         else
3750                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3751                                                 addr, dir);
3752
3753         return rc;
3754 }
3755
3756 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3757                                 struct hl_cs_parser *parser,
3758                                 struct packet_lin_dma *user_dma_pkt)
3759 {
3760         bool src_in_host = false;
3761         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3762                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3763                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3764
3765         dev_dbg(hdev->dev, "DMA packet details:\n");
3766         dev_dbg(hdev->dev, "source == 0x%llx\n",
3767                                 le64_to_cpu(user_dma_pkt->src_addr));
3768         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3769         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3770
3771         /*
3772          * Special handling for DMA with size 0. Bypass all validations
3773          * because no transactions will be done except for WR_COMP, which
3774          * is not a security issue
3775          */
3776         if (!le32_to_cpu(user_dma_pkt->tsize)) {
3777                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3778                 return 0;
3779         }
3780
3781         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3782                 src_in_host = true;
3783
3784         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3785                                                 src_in_host);
3786 }
3787
3788 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3789                                         struct hl_cs_parser *parser,
3790                                         struct packet_load_and_exe *user_pkt)
3791 {
3792         u32 cfg;
3793
3794         cfg = le32_to_cpu(user_pkt->cfg);
3795
3796         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3797                 dev_err(hdev->dev,
3798                         "User not allowed to use Load and Execute\n");
3799                 return -EPERM;
3800         }
3801
3802         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3803
3804         return 0;
3805 }
3806
3807 static int gaudi_validate_cb(struct hl_device *hdev,
3808                         struct hl_cs_parser *parser, bool is_mmu)
3809 {
3810         u32 cb_parsed_length = 0;
3811         int rc = 0;
3812
3813         parser->patched_cb_size = 0;
3814
3815         /* cb_user_size is more than 0 so loop will always be executed */
3816         while (cb_parsed_length < parser->user_cb_size) {
3817                 enum packet_id pkt_id;
3818                 u16 pkt_size;
3819                 struct gaudi_packet *user_pkt;
3820
3821                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3822
3823                 pkt_id = (enum packet_id) (
3824                                 (le64_to_cpu(user_pkt->header) &
3825                                 PACKET_HEADER_PACKET_ID_MASK) >>
3826                                         PACKET_HEADER_PACKET_ID_SHIFT);
3827
3828                 if (!validate_packet_id(pkt_id)) {
3829                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3830                         rc = -EINVAL;
3831                         break;
3832                 }
3833
3834                 pkt_size = gaudi_packet_sizes[pkt_id];
3835                 cb_parsed_length += pkt_size;
3836                 if (cb_parsed_length > parser->user_cb_size) {
3837                         dev_err(hdev->dev,
3838                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3839                         rc = -EINVAL;
3840                         break;
3841                 }
3842
3843                 switch (pkt_id) {
3844                 case PACKET_MSG_PROT:
3845                         dev_err(hdev->dev,
3846                                 "User not allowed to use MSG_PROT\n");
3847                         rc = -EPERM;
3848                         break;
3849
3850                 case PACKET_CP_DMA:
3851                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3852                         rc = -EPERM;
3853                         break;
3854
3855                 case PACKET_STOP:
3856                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3857                         rc = -EPERM;
3858                         break;
3859
3860                 case PACKET_WREG_BULK:
3861                         dev_err(hdev->dev,
3862                                 "User not allowed to use WREG_BULK\n");
3863                         rc = -EPERM;
3864                         break;
3865
3866                 case PACKET_LOAD_AND_EXE:
3867                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3868                                 (struct packet_load_and_exe *) user_pkt);
3869                         break;
3870
3871                 case PACKET_LIN_DMA:
3872                         parser->contains_dma_pkt = true;
3873                         if (is_mmu)
3874                                 parser->patched_cb_size += pkt_size;
3875                         else
3876                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3877                                         (struct packet_lin_dma *) user_pkt);
3878                         break;
3879
3880                 case PACKET_WREG_32:
3881                 case PACKET_MSG_LONG:
3882                 case PACKET_MSG_SHORT:
3883                 case PACKET_REPEAT:
3884                 case PACKET_FENCE:
3885                 case PACKET_NOP:
3886                 case PACKET_ARB_POINT:
3887                         parser->patched_cb_size += pkt_size;
3888                         break;
3889
3890                 default:
3891                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3892                                 pkt_id);
3893                         rc = -EINVAL;
3894                         break;
3895                 }
3896
3897                 if (rc)
3898                         break;
3899         }
3900
3901         /*
3902          * The new CB should have space at the end for two MSG_PROT packets:
3903          * 1. A packet that will act as a completion packet
3904          * 2. A packet that will generate MSI-X interrupt
3905          */
3906         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3907
3908         return rc;
3909 }
3910
3911 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3912                                 struct hl_cs_parser *parser,
3913                                 struct packet_lin_dma *user_dma_pkt,
3914                                 struct packet_lin_dma *new_dma_pkt,
3915                                 u32 *new_dma_pkt_size)
3916 {
3917         struct hl_userptr *userptr;
3918         struct scatterlist *sg, *sg_next_iter;
3919         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3920         u64 len, len_next;
3921         dma_addr_t dma_addr, dma_addr_next;
3922         u64 device_memory_addr, addr;
3923         enum dma_data_direction dir;
3924         struct sg_table *sgt;
3925         bool src_in_host = false;
3926         bool skip_host_mem_pin = false;
3927         bool user_memset;
3928
3929         ctl = le32_to_cpu(user_dma_pkt->ctl);
3930
3931         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3932                 src_in_host = true;
3933
3934         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3935                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3936
3937         if (src_in_host) {
3938                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3939                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3940                 dir = DMA_TO_DEVICE;
3941                 if (user_memset)
3942                         skip_host_mem_pin = true;
3943         } else {
3944                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3945                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3946                 dir = DMA_FROM_DEVICE;
3947         }
3948
3949         if ((!skip_host_mem_pin) &&
3950                 (!hl_userptr_is_pinned(hdev, addr,
3951                                         le32_to_cpu(user_dma_pkt->tsize),
3952                                         parser->job_userptr_list, &userptr))) {
3953                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3954                                 addr, user_dma_pkt->tsize);
3955                 return -EFAULT;
3956         }
3957
3958         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3959                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3960                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3961                 return 0;
3962         }
3963
3964         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3965
3966         sgt = userptr->sgt;
3967         dma_desc_cnt = 0;
3968
3969         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3970                 len = sg_dma_len(sg);
3971                 dma_addr = sg_dma_address(sg);
3972
3973                 if (len == 0)
3974                         break;
3975
3976                 while ((count + 1) < sgt->nents) {
3977                         sg_next_iter = sg_next(sg);
3978                         len_next = sg_dma_len(sg_next_iter);
3979                         dma_addr_next = sg_dma_address(sg_next_iter);
3980
3981                         if (len_next == 0)
3982                                 break;
3983
3984                         if ((dma_addr + len == dma_addr_next) &&
3985                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3986                                 len += len_next;
3987                                 count++;
3988                                 sg = sg_next_iter;
3989                         } else {
3990                                 break;
3991                         }
3992                 }
3993
3994                 ctl = le32_to_cpu(user_dma_pkt->ctl);
3995                 if (likely(dma_desc_cnt))
3996                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3997                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3998                 new_dma_pkt->ctl = cpu_to_le32(ctl);
3999                 new_dma_pkt->tsize = cpu_to_le32(len);
4000
4001                 if (dir == DMA_TO_DEVICE) {
4002                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4003                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4004                 } else {
4005                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4006                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4007                 }
4008
4009                 if (!user_memset)
4010                         device_memory_addr += len;
4011                 dma_desc_cnt++;
4012                 new_dma_pkt++;
4013         }
4014
4015         if (!dma_desc_cnt) {
4016                 dev_err(hdev->dev,
4017                         "Error of 0 SG entries when patching DMA packet\n");
4018                 return -EFAULT;
4019         }
4020
4021         /* Fix the last dma packet - wrcomp must be as user set it */
4022         new_dma_pkt--;
4023         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4024
4025         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4026
4027         return 0;
4028 }
4029
4030 static int gaudi_patch_cb(struct hl_device *hdev,
4031                                 struct hl_cs_parser *parser)
4032 {
4033         u32 cb_parsed_length = 0;
4034         u32 cb_patched_cur_length = 0;
4035         int rc = 0;
4036
4037         /* cb_user_size is more than 0 so loop will always be executed */
4038         while (cb_parsed_length < parser->user_cb_size) {
4039                 enum packet_id pkt_id;
4040                 u16 pkt_size;
4041                 u32 new_pkt_size = 0;
4042                 struct gaudi_packet *user_pkt, *kernel_pkt;
4043
4044                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4045                 kernel_pkt = parser->patched_cb->kernel_address +
4046                                         cb_patched_cur_length;
4047
4048                 pkt_id = (enum packet_id) (
4049                                 (le64_to_cpu(user_pkt->header) &
4050                                 PACKET_HEADER_PACKET_ID_MASK) >>
4051                                         PACKET_HEADER_PACKET_ID_SHIFT);
4052
4053                 if (!validate_packet_id(pkt_id)) {
4054                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4055                         rc = -EINVAL;
4056                         break;
4057                 }
4058
4059                 pkt_size = gaudi_packet_sizes[pkt_id];
4060                 cb_parsed_length += pkt_size;
4061                 if (cb_parsed_length > parser->user_cb_size) {
4062                         dev_err(hdev->dev,
4063                                 "packet 0x%x is out of CB boundary\n", pkt_id);
4064                         rc = -EINVAL;
4065                         break;
4066                 }
4067
4068                 switch (pkt_id) {
4069                 case PACKET_LIN_DMA:
4070                         rc = gaudi_patch_dma_packet(hdev, parser,
4071                                         (struct packet_lin_dma *) user_pkt,
4072                                         (struct packet_lin_dma *) kernel_pkt,
4073                                         &new_pkt_size);
4074                         cb_patched_cur_length += new_pkt_size;
4075                         break;
4076
4077                 case PACKET_MSG_PROT:
4078                         dev_err(hdev->dev,
4079                                 "User not allowed to use MSG_PROT\n");
4080                         rc = -EPERM;
4081                         break;
4082
4083                 case PACKET_CP_DMA:
4084                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4085                         rc = -EPERM;
4086                         break;
4087
4088                 case PACKET_STOP:
4089                         dev_err(hdev->dev, "User not allowed to use STOP\n");
4090                         rc = -EPERM;
4091                         break;
4092
4093                 case PACKET_WREG_32:
4094                 case PACKET_WREG_BULK:
4095                 case PACKET_MSG_LONG:
4096                 case PACKET_MSG_SHORT:
4097                 case PACKET_REPEAT:
4098                 case PACKET_FENCE:
4099                 case PACKET_NOP:
4100                 case PACKET_ARB_POINT:
4101                 case PACKET_LOAD_AND_EXE:
4102                         memcpy(kernel_pkt, user_pkt, pkt_size);
4103                         cb_patched_cur_length += pkt_size;
4104                         break;
4105
4106                 default:
4107                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4108                                 pkt_id);
4109                         rc = -EINVAL;
4110                         break;
4111                 }
4112
4113                 if (rc)
4114                         break;
4115         }
4116
4117         return rc;
4118 }
4119
4120 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4121                 struct hl_cs_parser *parser)
4122 {
4123         u64 patched_cb_handle;
4124         u32 patched_cb_size;
4125         struct hl_cb *user_cb;
4126         int rc;
4127
4128         /*
4129          * The new CB should have space at the end for two MSG_PROT pkt:
4130          * 1. A packet that will act as a completion packet
4131          * 2. A packet that will generate MSI interrupt
4132          */
4133         parser->patched_cb_size = parser->user_cb_size +
4134                         sizeof(struct packet_msg_prot) * 2;
4135
4136         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4137                                 parser->patched_cb_size, false, false,
4138                                 &patched_cb_handle);
4139
4140         if (rc) {
4141                 dev_err(hdev->dev,
4142                         "Failed to allocate patched CB for DMA CS %d\n",
4143                         rc);
4144                 return rc;
4145         }
4146
4147         patched_cb_handle >>= PAGE_SHIFT;
4148         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4149                                 (u32) patched_cb_handle);
4150         /* hl_cb_get should never fail here so use kernel WARN */
4151         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4152                         (u32) patched_cb_handle);
4153         if (!parser->patched_cb) {
4154                 rc = -EFAULT;
4155                 goto out;
4156         }
4157
4158         /*
4159          * The check that parser->user_cb_size <= parser->user_cb->size was done
4160          * in validate_queue_index().
4161          */
4162         memcpy(parser->patched_cb->kernel_address,
4163                 parser->user_cb->kernel_address,
4164                 parser->user_cb_size);
4165
4166         patched_cb_size = parser->patched_cb_size;
4167
4168         /* Validate patched CB instead of user CB */
4169         user_cb = parser->user_cb;
4170         parser->user_cb = parser->patched_cb;
4171         rc = gaudi_validate_cb(hdev, parser, true);
4172         parser->user_cb = user_cb;
4173
4174         if (rc) {
4175                 hl_cb_put(parser->patched_cb);
4176                 goto out;
4177         }
4178
4179         if (patched_cb_size != parser->patched_cb_size) {
4180                 dev_err(hdev->dev, "user CB size mismatch\n");
4181                 hl_cb_put(parser->patched_cb);
4182                 rc = -EINVAL;
4183                 goto out;
4184         }
4185
4186 out:
4187         /*
4188          * Always call cb destroy here because we still have 1 reference
4189          * to it by calling cb_get earlier. After the job will be completed,
4190          * cb_put will release it, but here we want to remove it from the
4191          * idr
4192          */
4193         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4194                                         patched_cb_handle << PAGE_SHIFT);
4195
4196         return rc;
4197 }
4198
4199 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4200                 struct hl_cs_parser *parser)
4201 {
4202         u64 patched_cb_handle;
4203         int rc;
4204
4205         rc = gaudi_validate_cb(hdev, parser, false);
4206
4207         if (rc)
4208                 goto free_userptr;
4209
4210         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4211                                 parser->patched_cb_size, false, false,
4212                                 &patched_cb_handle);
4213         if (rc) {
4214                 dev_err(hdev->dev,
4215                         "Failed to allocate patched CB for DMA CS %d\n", rc);
4216                 goto free_userptr;
4217         }
4218
4219         patched_cb_handle >>= PAGE_SHIFT;
4220         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4221                                 (u32) patched_cb_handle);
4222         /* hl_cb_get should never fail here so use kernel WARN */
4223         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4224                         (u32) patched_cb_handle);
4225         if (!parser->patched_cb) {
4226                 rc = -EFAULT;
4227                 goto out;
4228         }
4229
4230         rc = gaudi_patch_cb(hdev, parser);
4231
4232         if (rc)
4233                 hl_cb_put(parser->patched_cb);
4234
4235 out:
4236         /*
4237          * Always call cb destroy here because we still have 1 reference
4238          * to it by calling cb_get earlier. After the job will be completed,
4239          * cb_put will release it, but here we want to remove it from the
4240          * idr
4241          */
4242         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4243                                 patched_cb_handle << PAGE_SHIFT);
4244
4245 free_userptr:
4246         if (rc)
4247                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4248         return rc;
4249 }
4250
4251 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4252                                         struct hl_cs_parser *parser)
4253 {
4254         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4255
4256         /* For internal queue jobs just check if CB address is valid */
4257         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4258                                         parser->user_cb_size,
4259                                         asic_prop->sram_user_base_address,
4260                                         asic_prop->sram_end_address))
4261                 return 0;
4262
4263         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4264                                         parser->user_cb_size,
4265                                         asic_prop->dram_user_base_address,
4266                                         asic_prop->dram_end_address))
4267                 return 0;
4268
4269         /* PMMU and HPMMU addresses are equal, check only one of them */
4270         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4271                                         parser->user_cb_size,
4272                                         asic_prop->pmmu.start_addr,
4273                                         asic_prop->pmmu.end_addr))
4274                 return 0;
4275
4276         dev_err(hdev->dev,
4277                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4278                 parser->user_cb, parser->user_cb_size);
4279
4280         return -EFAULT;
4281 }
4282
4283 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4284 {
4285         struct gaudi_device *gaudi = hdev->asic_specific;
4286
4287         if (parser->queue_type == QUEUE_TYPE_INT)
4288                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4289
4290         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4291                 return gaudi_parse_cb_mmu(hdev, parser);
4292         else
4293                 return gaudi_parse_cb_no_mmu(hdev, parser);
4294 }
4295
4296 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4297                                         void *kernel_address, u32 len,
4298                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
4299                                         bool eb)
4300 {
4301         struct gaudi_device *gaudi = hdev->asic_specific;
4302         struct packet_msg_prot *cq_pkt;
4303         u32 tmp;
4304
4305         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4306
4307         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4308         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4309
4310         if (eb)
4311                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4312
4313         cq_pkt->ctl = cpu_to_le32(tmp);
4314         cq_pkt->value = cpu_to_le32(cq_val);
4315         cq_pkt->addr = cpu_to_le64(cq_addr);
4316
4317         cq_pkt++;
4318
4319         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4320         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4321         cq_pkt->ctl = cpu_to_le32(tmp);
4322         cq_pkt->value = cpu_to_le32(1);
4323
4324         if (!gaudi->multi_msi_mode)
4325                 msi_vec = 0;
4326
4327         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4328 }
4329
4330 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4331 {
4332         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4333 }
4334
4335 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4336                                         u32 size, u64 val)
4337 {
4338         struct packet_lin_dma *lin_dma_pkt;
4339         struct hl_cs_job *job;
4340         u32 cb_size, ctl, err_cause;
4341         struct hl_cb *cb;
4342         int rc;
4343
4344         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4345         if (!cb)
4346                 return -EFAULT;
4347
4348         lin_dma_pkt = cb->kernel_address;
4349         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4350         cb_size = sizeof(*lin_dma_pkt);
4351
4352         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4353         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4354         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4355         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4356         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4357
4358         lin_dma_pkt->ctl = cpu_to_le32(ctl);
4359         lin_dma_pkt->src_addr = cpu_to_le64(val);
4360         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4361         lin_dma_pkt->tsize = cpu_to_le32(size);
4362
4363         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4364         if (!job) {
4365                 dev_err(hdev->dev, "Failed to allocate a new job\n");
4366                 rc = -ENOMEM;
4367                 goto release_cb;
4368         }
4369
4370         /* Verify DMA is OK */
4371         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4372         if (err_cause && !hdev->init_done) {
4373                 dev_dbg(hdev->dev,
4374                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
4375                         err_cause);
4376                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4377         }
4378
4379         job->id = 0;
4380         job->user_cb = cb;
4381         job->user_cb->cs_cnt++;
4382         job->user_cb_size = cb_size;
4383         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4384         job->patched_cb = job->user_cb;
4385         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4386
4387         hl_debugfs_add_job(hdev, job);
4388
4389         rc = gaudi_send_job_on_qman0(hdev, job);
4390         hl_debugfs_remove_job(hdev, job);
4391         kfree(job);
4392         cb->cs_cnt--;
4393
4394         /* Verify DMA is OK */
4395         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4396         if (err_cause) {
4397                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4398                 rc = -EIO;
4399                 if (!hdev->init_done) {
4400                         dev_dbg(hdev->dev,
4401                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4402                                 err_cause);
4403                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4404                 }
4405         }
4406
4407 release_cb:
4408         hl_cb_put(cb);
4409         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4410
4411         return rc;
4412 }
4413
4414 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4415 {
4416         int i;
4417
4418         for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4419                 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4420                 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4421                 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4422         }
4423
4424         for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4425                 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4426                 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4427                 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4428         }
4429
4430         i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4431
4432         for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4433                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4434
4435         i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4436
4437         for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4438                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4439 }
4440
4441 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4442 {
4443         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4444                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4445         int i;
4446
4447         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4448                 u64 sob_addr = CFG_BASE +
4449                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4450                                 (i * sob_delta);
4451                 u32 dma_offset = i * DMA_CORE_OFFSET;
4452
4453                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4454                                 lower_32_bits(sob_addr));
4455                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4456                                 upper_32_bits(sob_addr));
4457                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4458
4459                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4460                  * modified by the user for SRAM reduction
4461                  */
4462                 if (i > 1)
4463                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4464                                                                 0x00000001);
4465         }
4466 }
4467
4468 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4469 {
4470         u32 qman_offset;
4471         int i;
4472
4473         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4474                 qman_offset = i * DMA_QMAN_OFFSET;
4475                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4476         }
4477
4478         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4479                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4480                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4481         }
4482
4483         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4484                 qman_offset = i * TPC_QMAN_OFFSET;
4485                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4486         }
4487 }
4488
4489 static void gaudi_restore_user_registers(struct hl_device *hdev)
4490 {
4491         gaudi_restore_sm_registers(hdev);
4492         gaudi_restore_dma_registers(hdev);
4493         gaudi_restore_qm_registers(hdev);
4494 }
4495
4496 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4497 {
4498         struct asic_fixed_properties *prop = &hdev->asic_prop;
4499         u64 addr = prop->sram_user_base_address;
4500         u32 size = hdev->pldm ? 0x10000 :
4501                         (prop->sram_size - SRAM_USER_BASE_OFFSET);
4502         u64 val = 0x7777777777777777ull;
4503         int rc;
4504
4505         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4506         if (rc) {
4507                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4508                 return rc;
4509         }
4510
4511         gaudi_mmu_prepare(hdev, asid);
4512
4513         gaudi_restore_user_registers(hdev);
4514
4515         return 0;
4516 }
4517
4518 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4519 {
4520         struct asic_fixed_properties *prop = &hdev->asic_prop;
4521         struct gaudi_device *gaudi = hdev->asic_specific;
4522         u64 addr = prop->mmu_pgt_addr;
4523         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4524
4525         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4526                 return 0;
4527
4528         return gaudi_memset_device_memory(hdev, addr, size, 0);
4529 }
4530
4531 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4532 {
4533
4534 }
4535
4536 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4537 {
4538         struct asic_fixed_properties *prop = &hdev->asic_prop;
4539         struct gaudi_device *gaudi = hdev->asic_specific;
4540         u64 hbm_bar_addr;
4541         int rc = 0;
4542
4543         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4544
4545                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4546                                 (hdev->clock_gating_mask &
4547                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4548
4549                         dev_err_ratelimited(hdev->dev,
4550                                 "Can't read register - clock gating is enabled!\n");
4551                         rc = -EFAULT;
4552                 } else {
4553                         *val = RREG32(addr - CFG_BASE);
4554                 }
4555
4556         } else if ((addr >= SRAM_BASE_ADDR) &&
4557                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4558                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4559                                 (addr - SRAM_BASE_ADDR));
4560         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4561                 u64 bar_base_addr = DRAM_PHYS_BASE +
4562                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4563
4564                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4565                 if (hbm_bar_addr != U64_MAX) {
4566                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4567                                                 (addr - bar_base_addr));
4568
4569                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4570                                                 hbm_bar_addr);
4571                 }
4572                 if (hbm_bar_addr == U64_MAX)
4573                         rc = -EIO;
4574         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4575                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4576         } else {
4577                 rc = -EFAULT;
4578         }
4579
4580         return rc;
4581 }
4582
4583 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4584 {
4585         struct asic_fixed_properties *prop = &hdev->asic_prop;
4586         struct gaudi_device *gaudi = hdev->asic_specific;
4587         u64 hbm_bar_addr;
4588         int rc = 0;
4589
4590         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4591
4592                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4593                                 (hdev->clock_gating_mask &
4594                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4595
4596                         dev_err_ratelimited(hdev->dev,
4597                                 "Can't write register - clock gating is enabled!\n");
4598                         rc = -EFAULT;
4599                 } else {
4600                         WREG32(addr - CFG_BASE, val);
4601                 }
4602
4603         } else if ((addr >= SRAM_BASE_ADDR) &&
4604                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4605                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4606                                         (addr - SRAM_BASE_ADDR));
4607         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4608                 u64 bar_base_addr = DRAM_PHYS_BASE +
4609                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4610
4611                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4612                 if (hbm_bar_addr != U64_MAX) {
4613                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4614                                                 (addr - bar_base_addr));
4615
4616                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4617                                                 hbm_bar_addr);
4618                 }
4619                 if (hbm_bar_addr == U64_MAX)
4620                         rc = -EIO;
4621         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4622                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4623         } else {
4624                 rc = -EFAULT;
4625         }
4626
4627         return rc;
4628 }
4629
4630 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4631 {
4632         struct asic_fixed_properties *prop = &hdev->asic_prop;
4633         struct gaudi_device *gaudi = hdev->asic_specific;
4634         u64 hbm_bar_addr;
4635         int rc = 0;
4636
4637         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4638
4639                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4640                                 (hdev->clock_gating_mask &
4641                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4642
4643                         dev_err_ratelimited(hdev->dev,
4644                                 "Can't read register - clock gating is enabled!\n");
4645                         rc = -EFAULT;
4646                 } else {
4647                         u32 val_l = RREG32(addr - CFG_BASE);
4648                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4649
4650                         *val = (((u64) val_h) << 32) | val_l;
4651                 }
4652
4653         } else if ((addr >= SRAM_BASE_ADDR) &&
4654                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4655                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4656                                 (addr - SRAM_BASE_ADDR));
4657         } else if (addr <=
4658                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4659                 u64 bar_base_addr = DRAM_PHYS_BASE +
4660                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4661
4662                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4663                 if (hbm_bar_addr != U64_MAX) {
4664                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4665                                                 (addr - bar_base_addr));
4666
4667                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4668                                                 hbm_bar_addr);
4669                 }
4670                 if (hbm_bar_addr == U64_MAX)
4671                         rc = -EIO;
4672         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4673                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4674         } else {
4675                 rc = -EFAULT;
4676         }
4677
4678         return rc;
4679 }
4680
4681 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4682 {
4683         struct asic_fixed_properties *prop = &hdev->asic_prop;
4684         struct gaudi_device *gaudi = hdev->asic_specific;
4685         u64 hbm_bar_addr;
4686         int rc = 0;
4687
4688         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4689
4690                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4691                                 (hdev->clock_gating_mask &
4692                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4693
4694                         dev_err_ratelimited(hdev->dev,
4695                                 "Can't write register - clock gating is enabled!\n");
4696                         rc = -EFAULT;
4697                 } else {
4698                         WREG32(addr - CFG_BASE, lower_32_bits(val));
4699                         WREG32(addr + sizeof(u32) - CFG_BASE,
4700                                 upper_32_bits(val));
4701                 }
4702
4703         } else if ((addr >= SRAM_BASE_ADDR) &&
4704                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4705                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4706                                         (addr - SRAM_BASE_ADDR));
4707         } else if (addr <=
4708                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4709                 u64 bar_base_addr = DRAM_PHYS_BASE +
4710                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4711
4712                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4713                 if (hbm_bar_addr != U64_MAX) {
4714                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4715                                                 (addr - bar_base_addr));
4716
4717                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4718                                                 hbm_bar_addr);
4719                 }
4720                 if (hbm_bar_addr == U64_MAX)
4721                         rc = -EIO;
4722         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4723                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4724         } else {
4725                 rc = -EFAULT;
4726         }
4727
4728         return rc;
4729 }
4730
4731 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4732 {
4733         struct gaudi_device *gaudi = hdev->asic_specific;
4734
4735         if (hdev->hard_reset_pending)
4736                 return U64_MAX;
4737
4738         return readq(hdev->pcie_bar[HBM_BAR_ID] +
4739                         (addr - gaudi->hbm_bar_cur_addr));
4740 }
4741
4742 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4743 {
4744         struct gaudi_device *gaudi = hdev->asic_specific;
4745
4746         if (hdev->hard_reset_pending)
4747                 return;
4748
4749         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4750                         (addr - gaudi->hbm_bar_cur_addr));
4751 }
4752
4753 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4754 {
4755         /* mask to zero the MMBP and ASID bits */
4756         WREG32_AND(reg, ~0x7FF);
4757         WREG32_OR(reg, asid);
4758 }
4759
4760 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4761 {
4762         struct gaudi_device *gaudi = hdev->asic_specific;
4763
4764         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4765                 return;
4766
4767         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4768                 WARN(1, "asid %u is too big\n", asid);
4769                 return;
4770         }
4771
4772         mutex_lock(&gaudi->clk_gate_mutex);
4773
4774         hdev->asic_funcs->disable_clock_gating(hdev);
4775
4776         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781
4782         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787
4788         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793
4794         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799
4800         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805
4806         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4807         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4808         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4809         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4810         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4811
4812         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4813         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4814         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4815         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4816         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4817
4818         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823
4824         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4825         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4826         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4827         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4828         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4829         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4830         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4831         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4832
4833         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4839         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4840
4841         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4847         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4848
4849         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4855         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4856
4857         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4863         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4864
4865         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4871         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4872
4873         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4879         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4880
4881         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4887         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4888
4889         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4895         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4896
4897         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4898         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4899         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4900         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4901         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4902         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4903         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4904         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4905         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4906         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4907
4908         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4909         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4910         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4911         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4912         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4913         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4914         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4915         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4916         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4917         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4918         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4919         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4920
4921         hdev->asic_funcs->set_clock_gating(hdev);
4922
4923         mutex_unlock(&gaudi->clk_gate_mutex);
4924 }
4925
4926 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4927                 struct hl_cs_job *job)
4928 {
4929         struct packet_msg_prot *fence_pkt;
4930         u32 *fence_ptr;
4931         dma_addr_t fence_dma_addr;
4932         struct hl_cb *cb;
4933         u32 tmp, timeout, dma_offset;
4934         int rc;
4935
4936         if (hdev->pldm)
4937                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4938         else
4939                 timeout = HL_DEVICE_TIMEOUT_USEC;
4940
4941         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4942                 dev_err_ratelimited(hdev->dev,
4943                         "Can't send driver job on QMAN0 because the device is not idle\n");
4944                 return -EBUSY;
4945         }
4946
4947         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4948                                                         &fence_dma_addr);
4949         if (!fence_ptr) {
4950                 dev_err(hdev->dev,
4951                         "Failed to allocate fence memory for QMAN0\n");
4952                 return -ENOMEM;
4953         }
4954
4955         cb = job->patched_cb;
4956
4957         fence_pkt = cb->kernel_address +
4958                         job->job_cb_size - sizeof(struct packet_msg_prot);
4959
4960         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4961         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4962         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4963
4964         fence_pkt->ctl = cpu_to_le32(tmp);
4965         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4966         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4967
4968         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4969
4970         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4971
4972         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4973                                         job->job_cb_size, cb->bus_address);
4974         if (rc) {
4975                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4976                 goto free_fence_ptr;
4977         }
4978
4979         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4980                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4981                                 timeout, true);
4982
4983         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4984
4985         if (rc == -ETIMEDOUT) {
4986                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4987                 goto free_fence_ptr;
4988         }
4989
4990 free_fence_ptr:
4991         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4992                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4993
4994         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4995                                         fence_dma_addr);
4996         return rc;
4997 }
4998
4999 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5000 {
5001         if (event_type >= GAUDI_EVENT_SIZE)
5002                 goto event_not_supported;
5003
5004         if (!gaudi_irq_map_table[event_type].valid)
5005                 goto event_not_supported;
5006
5007         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5008
5009         return;
5010
5011 event_not_supported:
5012         snprintf(desc, size, "N/A");
5013 }
5014
5015 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5016                                                         u32 x_y, bool is_write)
5017 {
5018         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5019
5020         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5021                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5022
5023         switch (x_y) {
5024         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5025         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5026                 dma_id[0] = 0;
5027                 dma_id[1] = 2;
5028                 break;
5029         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5030         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5031                 dma_id[0] = 1;
5032                 dma_id[1] = 3;
5033                 break;
5034         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5035         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5036                 dma_id[0] = 4;
5037                 dma_id[1] = 6;
5038                 break;
5039         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5040         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5041                 dma_id[0] = 5;
5042                 dma_id[1] = 7;
5043                 break;
5044         default:
5045                 goto unknown_initiator;
5046         }
5047
5048         for (i = 0 ; i < 2 ; i++) {
5049                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5050                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5051         }
5052
5053         switch (x_y) {
5054         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5055         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5056                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057                         return "DMA0";
5058                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059                         return "DMA2";
5060                 else
5061                         return "DMA0 or DMA2";
5062         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5063         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5064                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065                         return "DMA1";
5066                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067                         return "DMA3";
5068                 else
5069                         return "DMA1 or DMA3";
5070         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5071         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5072                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073                         return "DMA4";
5074                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075                         return "DMA6";
5076                 else
5077                         return "DMA4 or DMA6";
5078         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5079         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5080                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5081                         return "DMA5";
5082                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5083                         return "DMA7";
5084                 else
5085                         return "DMA5 or DMA7";
5086         }
5087
5088 unknown_initiator:
5089         return "unknown initiator";
5090 }
5091
5092 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5093                                                         bool is_write)
5094 {
5095         u32 val, x_y, axi_id;
5096
5097         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5098                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
5099         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5100                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5101         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5102                         RAZWI_INITIATOR_AXI_ID_SHIFT);
5103
5104         switch (x_y) {
5105         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5106                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5107                         return "TPC0";
5108                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5109                         return "NIC0";
5110                 break;
5111         case RAZWI_INITIATOR_ID_X_Y_TPC1:
5112                 return "TPC1";
5113         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5114         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5115                 return "MME0";
5116         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5117         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5118                 return "MME1";
5119         case RAZWI_INITIATOR_ID_X_Y_TPC2:
5120                 return "TPC2";
5121         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5122                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5123                         return "TPC3";
5124                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5125                         return "PCI";
5126                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5127                         return "CPU";
5128                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5129                         return "PSOC";
5130                 break;
5131         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5132         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5133         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5134         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5135         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5136         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5137         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5138         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5139                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5140         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5141                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5142                         return "TPC4";
5143                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5144                         return "NIC1";
5145                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5146                         return "NIC2";
5147                 break;
5148         case RAZWI_INITIATOR_ID_X_Y_TPC5:
5149                 return "TPC5";
5150         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5151         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5152                 return "MME2";
5153         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5154         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5155                 return "MME3";
5156         case RAZWI_INITIATOR_ID_X_Y_TPC6:
5157                 return "TPC6";
5158         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5159                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5160                         return "TPC7";
5161                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5162                         return "NIC4";
5163                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5164                         return "NIC5";
5165                 break;
5166         default:
5167                 break;
5168         }
5169
5170         dev_err(hdev->dev,
5171                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5172                 val,
5173                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5174                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5175                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5176                         RAZWI_INITIATOR_AXI_ID_MASK);
5177
5178         return "unknown initiator";
5179 }
5180
5181 static void gaudi_print_razwi_info(struct hl_device *hdev)
5182 {
5183         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5184                 dev_err_ratelimited(hdev->dev,
5185                         "RAZWI event caused by illegal write of %s\n",
5186                         gaudi_get_razwi_initiator_name(hdev, true));
5187                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5188         }
5189
5190         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5191                 dev_err_ratelimited(hdev->dev,
5192                         "RAZWI event caused by illegal read of %s\n",
5193                         gaudi_get_razwi_initiator_name(hdev, false));
5194                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5195         }
5196 }
5197
5198 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5199 {
5200         struct gaudi_device *gaudi = hdev->asic_specific;
5201         u64 addr;
5202         u32 val;
5203
5204         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5205                 return;
5206
5207         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5208         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5209                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5210                 addr <<= 32;
5211                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5212
5213                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5214                                         addr);
5215
5216                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5217         }
5218
5219         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5220         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5221                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5222                 addr <<= 32;
5223                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5224
5225                 dev_err_ratelimited(hdev->dev,
5226                                 "MMU access error on va 0x%llx\n", addr);
5227
5228                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5229         }
5230 }
5231
5232 /*
5233  *  +-------------------+------------------------------------------------------+
5234  *  | Configuration Reg |                     Description                      |
5235  *  |      Address      |                                                      |
5236  *  +-------------------+------------------------------------------------------+
5237  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5238  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5239  *  |                   |0xF34 memory wrappers 63:32                           |
5240  *  |                   |0xF38 memory wrappers 95:64                           |
5241  *  |                   |0xF3C memory wrappers 127:96                          |
5242  *  +-------------------+------------------------------------------------------+
5243  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5244  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5245  *  |                   |0xF44 memory wrappers 63:32                           |
5246  *  |                   |0xF48 memory wrappers 95:64                           |
5247  *  |                   |0xF4C memory wrappers 127:96                          |
5248  *  +-------------------+------------------------------------------------------+
5249  */
5250 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5251                 struct ecc_info_extract_params *params, u64 *ecc_address,
5252                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5253 {
5254         struct gaudi_device *gaudi = hdev->asic_specific;
5255         u32 i, num_mem_regs, reg, err_bit;
5256         u64 err_addr, err_word = 0;
5257         int rc = 0;
5258
5259         num_mem_regs = params->num_memories / 32 +
5260                         ((params->num_memories % 32) ? 1 : 0);
5261
5262         if (params->block_address >= CFG_BASE)
5263                 params->block_address -= CFG_BASE;
5264
5265         if (params->derr)
5266                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5267         else
5268                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5269
5270         if (params->disable_clock_gating) {
5271                 mutex_lock(&gaudi->clk_gate_mutex);
5272                 hdev->asic_funcs->disable_clock_gating(hdev);
5273         }
5274
5275         /* Set invalid wrapper index */
5276         *memory_wrapper_idx = 0xFF;
5277
5278         /* Iterate through memory wrappers, a single bit must be set */
5279         for (i = 0 ; i < num_mem_regs ; i++) {
5280                 err_addr += i * 4;
5281                 err_word = RREG32(err_addr);
5282                 if (err_word) {
5283                         err_bit = __ffs(err_word);
5284                         *memory_wrapper_idx = err_bit + (32 * i);
5285                         break;
5286                 }
5287         }
5288
5289         if (*memory_wrapper_idx == 0xFF) {
5290                 dev_err(hdev->dev, "ECC error information cannot be found\n");
5291                 rc = -EINVAL;
5292                 goto enable_clk_gate;
5293         }
5294
5295         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5296                         *memory_wrapper_idx);
5297
5298         *ecc_address =
5299                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5300         *ecc_syndrom =
5301                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5302
5303         /* Clear error indication */
5304         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5305         if (params->derr)
5306                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5307         else
5308                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5309
5310         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5311
5312 enable_clk_gate:
5313         if (params->disable_clock_gating) {
5314                 hdev->asic_funcs->set_clock_gating(hdev);
5315
5316                 mutex_unlock(&gaudi->clk_gate_mutex);
5317         }
5318
5319         return rc;
5320 }
5321
5322 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5323                                           const char *qm_name,
5324                                           u64 glbl_sts_addr,
5325                                           u64 arb_err_addr)
5326 {
5327         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5328         char reg_desc[32];
5329
5330         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5331         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5332                 glbl_sts_clr_val = 0;
5333                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5334
5335                 if (!glbl_sts_val)
5336                         continue;
5337
5338                 if (i == QMAN_STREAMS)
5339                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5340                 else
5341                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5342
5343                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5344                         if (glbl_sts_val & BIT(j)) {
5345                                 dev_err_ratelimited(hdev->dev,
5346                                                 "%s %s. err cause: %s\n",
5347                                                 qm_name, reg_desc,
5348                                                 gaudi_qman_error_cause[j]);
5349                                 glbl_sts_clr_val |= BIT(j);
5350                         }
5351                 }
5352
5353                 /* Write 1 clear errors */
5354                 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5355         }
5356
5357         arb_err_val = RREG32(arb_err_addr);
5358
5359         if (!arb_err_val)
5360                 return;
5361
5362         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5363                 if (arb_err_val & BIT(j)) {
5364                         dev_err_ratelimited(hdev->dev,
5365                                         "%s ARB_ERR. err cause: %s\n",
5366                                         qm_name,
5367                                         gaudi_qman_arb_error_cause[j]);
5368                 }
5369         }
5370 }
5371
5372 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5373                 struct hl_eq_ecc_data *ecc_data)
5374 {
5375         struct ecc_info_extract_params params;
5376         u64 ecc_address = 0, ecc_syndrom = 0;
5377         u8 index, memory_wrapper_idx = 0;
5378         bool extract_info_from_fw;
5379         int rc;
5380
5381         switch (event_type) {
5382         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5383         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5384                 extract_info_from_fw = true;
5385                 break;
5386         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5387                 index = event_type - GAUDI_EVENT_TPC0_SERR;
5388                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5389                 params.num_memories = 90;
5390                 params.derr = false;
5391                 params.disable_clock_gating = true;
5392                 extract_info_from_fw = false;
5393                 break;
5394         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5395                 index = event_type - GAUDI_EVENT_TPC0_DERR;
5396                 params.block_address =
5397                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5398                 params.num_memories = 90;
5399                 params.derr = true;
5400                 params.disable_clock_gating = true;
5401                 extract_info_from_fw = false;
5402                 break;
5403         case GAUDI_EVENT_MME0_ACC_SERR:
5404         case GAUDI_EVENT_MME1_ACC_SERR:
5405         case GAUDI_EVENT_MME2_ACC_SERR:
5406         case GAUDI_EVENT_MME3_ACC_SERR:
5407                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5408                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5409                 params.num_memories = 128;
5410                 params.derr = false;
5411                 params.disable_clock_gating = true;
5412                 extract_info_from_fw = false;
5413                 break;
5414         case GAUDI_EVENT_MME0_ACC_DERR:
5415         case GAUDI_EVENT_MME1_ACC_DERR:
5416         case GAUDI_EVENT_MME2_ACC_DERR:
5417         case GAUDI_EVENT_MME3_ACC_DERR:
5418                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5419                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5420                 params.num_memories = 128;
5421                 params.derr = true;
5422                 params.disable_clock_gating = true;
5423                 extract_info_from_fw = false;
5424                 break;
5425         case GAUDI_EVENT_MME0_SBAB_SERR:
5426         case GAUDI_EVENT_MME1_SBAB_SERR:
5427         case GAUDI_EVENT_MME2_SBAB_SERR:
5428         case GAUDI_EVENT_MME3_SBAB_SERR:
5429                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5430                 params.block_address =
5431                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5432                 params.num_memories = 33;
5433                 params.derr = false;
5434                 params.disable_clock_gating = true;
5435                 extract_info_from_fw = false;
5436                 break;
5437         case GAUDI_EVENT_MME0_SBAB_DERR:
5438         case GAUDI_EVENT_MME1_SBAB_DERR:
5439         case GAUDI_EVENT_MME2_SBAB_DERR:
5440         case GAUDI_EVENT_MME3_SBAB_DERR:
5441                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5442                 params.block_address =
5443                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5444                 params.num_memories = 33;
5445                 params.derr = true;
5446                 params.disable_clock_gating = true;
5447                 extract_info_from_fw = false;
5448                 break;
5449         default:
5450                 return;
5451         }
5452
5453         if (extract_info_from_fw) {
5454                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5455                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5456                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5457         } else {
5458                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5459                                 &ecc_syndrom, &memory_wrapper_idx);
5460                 if (rc)
5461                         return;
5462         }
5463
5464         dev_err(hdev->dev,
5465                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5466                 ecc_address, ecc_syndrom, memory_wrapper_idx);
5467 }
5468
5469 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5470 {
5471         u64 glbl_sts_addr, arb_err_addr;
5472         u8 index;
5473         char desc[32];
5474
5475         switch (event_type) {
5476         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5477                 index = event_type - GAUDI_EVENT_TPC0_QM;
5478                 glbl_sts_addr =
5479                         mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5480                 arb_err_addr =
5481                         mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5482                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5483                 break;
5484         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5485                 index = event_type - GAUDI_EVENT_MME0_QM;
5486                 glbl_sts_addr =
5487                         mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5488                 arb_err_addr =
5489                         mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5490                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5491                 break;
5492         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5493                 index = event_type - GAUDI_EVENT_DMA0_QM;
5494                 glbl_sts_addr =
5495                         mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5496                 arb_err_addr =
5497                         mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5498                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5499                 break;
5500         default:
5501                 return;
5502         }
5503
5504         gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5505 }
5506
5507 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5508                                         bool razwi)
5509 {
5510         char desc[64] = "";
5511
5512         gaudi_get_event_desc(event_type, desc, sizeof(desc));
5513         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5514                 event_type, desc);
5515
5516         if (razwi) {
5517                 gaudi_print_razwi_info(hdev);
5518                 gaudi_print_mmu_error_info(hdev);
5519         }
5520 }
5521
5522 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5523 {
5524         struct gaudi_device *gaudi = hdev->asic_specific;
5525
5526         /* Unmask all IRQs since some could have been received
5527          * during the soft reset
5528          */
5529         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5530 }
5531
5532 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5533 {
5534         int ch, err = 0;
5535         u32 base, val, val2;
5536
5537         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5538         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5539                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5540                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5541                 if (val) {
5542                         err = 1;
5543                         dev_err(hdev->dev,
5544                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5545                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5546                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
5547                                 (val >> 4) & 0x1);
5548
5549                         val2 = RREG32(base + ch * 0x1000 + 0x060);
5550                         dev_err(hdev->dev,
5551                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5552                                 device, ch * 2,
5553                                 RREG32(base + ch * 0x1000 + 0x064),
5554                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5555                                 (val2 & 0xFF0000) >> 16,
5556                                 (val2 & 0xFF000000) >> 24);
5557                 }
5558
5559                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5560                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5561                 if (val) {
5562                         err = 1;
5563                         dev_err(hdev->dev,
5564                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5565                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5566                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
5567                                 (val >> 4) & 0x1);
5568
5569                         val2 = RREG32(base + ch * 0x1000 + 0x070);
5570                         dev_err(hdev->dev,
5571                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5572                                 device, ch * 2 + 1,
5573                                 RREG32(base + ch * 0x1000 + 0x074),
5574                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5575                                 (val2 & 0xFF0000) >> 16,
5576                                 (val2 & 0xFF000000) >> 24);
5577                 }
5578
5579                 /* Clear interrupts */
5580                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5581                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5582                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5583                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5584                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5585                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5586         }
5587
5588         val  = RREG32(base + 0x8F30);
5589         val2 = RREG32(base + 0x8F34);
5590         if (val | val2) {
5591                 err = 1;
5592                 dev_err(hdev->dev,
5593                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5594                         device, val, val2);
5595         }
5596         val  = RREG32(base + 0x8F40);
5597         val2 = RREG32(base + 0x8F44);
5598         if (val | val2) {
5599                 err = 1;
5600                 dev_err(hdev->dev,
5601                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5602                         device, val, val2);
5603         }
5604
5605         return err;
5606 }
5607
5608 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5609 {
5610         switch (hbm_event_type) {
5611         case GAUDI_EVENT_HBM0_SPI_0:
5612         case GAUDI_EVENT_HBM0_SPI_1:
5613                 return 0;
5614         case GAUDI_EVENT_HBM1_SPI_0:
5615         case GAUDI_EVENT_HBM1_SPI_1:
5616                 return 1;
5617         case GAUDI_EVENT_HBM2_SPI_0:
5618         case GAUDI_EVENT_HBM2_SPI_1:
5619                 return 2;
5620         case GAUDI_EVENT_HBM3_SPI_0:
5621         case GAUDI_EVENT_HBM3_SPI_1:
5622                 return 3;
5623         default:
5624                 break;
5625         }
5626
5627         /* Should never happen */
5628         return 0;
5629 }
5630
5631 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5632                                         char *interrupt_name)
5633 {
5634         struct gaudi_device *gaudi = hdev->asic_specific;
5635         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5636         bool soft_reset_required = false;
5637
5638         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5639          * gating, and thus cannot be done in CPU-CP and should be done instead
5640          * by the driver.
5641          */
5642
5643         mutex_lock(&gaudi->clk_gate_mutex);
5644
5645         hdev->asic_funcs->disable_clock_gating(hdev);
5646
5647         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5648                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5649
5650         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5651                 if (tpc_interrupts_cause & BIT(i)) {
5652                         dev_err_ratelimited(hdev->dev,
5653                                         "TPC%d_%s interrupt cause: %s\n",
5654                                         tpc_id, interrupt_name,
5655                                         gaudi_tpc_interrupts_cause[i]);
5656                         /* If this is QM error, we need to soft-reset */
5657                         if (i == 15)
5658                                 soft_reset_required = true;
5659                 }
5660
5661         /* Clear interrupts */
5662         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5663
5664         hdev->asic_funcs->set_clock_gating(hdev);
5665
5666         mutex_unlock(&gaudi->clk_gate_mutex);
5667
5668         return soft_reset_required;
5669 }
5670
5671 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5672 {
5673         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5674 }
5675
5676 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5677 {
5678         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5679 }
5680
5681 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5682                                         u16 event_type)
5683 {
5684         switch (event_type) {
5685         case GAUDI_EVENT_FIX_POWER_ENV_S:
5686                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5687                 dev_info_ratelimited(hdev->dev,
5688                         "Clock throttling due to power consumption\n");
5689                 break;
5690
5691         case GAUDI_EVENT_FIX_POWER_ENV_E:
5692                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5693                 dev_info_ratelimited(hdev->dev,
5694                         "Power envelop is safe, back to optimal clock\n");
5695                 break;
5696
5697         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5698                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5699                 dev_info_ratelimited(hdev->dev,
5700                         "Clock throttling due to overheating\n");
5701                 break;
5702
5703         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5704                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5705                 dev_info_ratelimited(hdev->dev,
5706                         "Thermal envelop is safe, back to optimal clock\n");
5707                 break;
5708
5709         default:
5710                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5711                         event_type);
5712                 break;
5713         }
5714 }
5715
5716 static void gaudi_handle_eqe(struct hl_device *hdev,
5717                                 struct hl_eq_entry *eq_entry)
5718 {
5719         struct gaudi_device *gaudi = hdev->asic_specific;
5720         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5721         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5722                         >> EQ_CTL_EVENT_TYPE_SHIFT);
5723         u8 cause;
5724         bool reset_required;
5725
5726         if (event_type >= GAUDI_EVENT_SIZE) {
5727                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
5728                                 event_type, GAUDI_EVENT_SIZE - 1);
5729                 return;
5730         }
5731
5732         gaudi->events_stat[event_type]++;
5733         gaudi->events_stat_aggregate[event_type]++;
5734
5735         switch (event_type) {
5736         case GAUDI_EVENT_PCIE_CORE_DERR:
5737         case GAUDI_EVENT_PCIE_IF_DERR:
5738         case GAUDI_EVENT_PCIE_PHY_DERR:
5739         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5740         case GAUDI_EVENT_MME0_ACC_DERR:
5741         case GAUDI_EVENT_MME0_SBAB_DERR:
5742         case GAUDI_EVENT_MME1_ACC_DERR:
5743         case GAUDI_EVENT_MME1_SBAB_DERR:
5744         case GAUDI_EVENT_MME2_ACC_DERR:
5745         case GAUDI_EVENT_MME2_SBAB_DERR:
5746         case GAUDI_EVENT_MME3_ACC_DERR:
5747         case GAUDI_EVENT_MME3_SBAB_DERR:
5748         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5749                 fallthrough;
5750         case GAUDI_EVENT_CPU_IF_ECC_DERR:
5751         case GAUDI_EVENT_PSOC_MEM_DERR:
5752         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5753         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5754         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5755         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5756         case GAUDI_EVENT_MMU_DERR:
5757                 gaudi_print_irq_info(hdev, event_type, true);
5758                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5759                 if (hdev->hard_reset_on_fw_events)
5760                         hl_device_reset(hdev, true, false);
5761                 break;
5762
5763         case GAUDI_EVENT_GIC500:
5764         case GAUDI_EVENT_AXI_ECC:
5765         case GAUDI_EVENT_L2_RAM_ECC:
5766         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5767                 gaudi_print_irq_info(hdev, event_type, false);
5768                 if (hdev->hard_reset_on_fw_events)
5769                         hl_device_reset(hdev, true, false);
5770                 break;
5771
5772         case GAUDI_EVENT_HBM0_SPI_0:
5773         case GAUDI_EVENT_HBM1_SPI_0:
5774         case GAUDI_EVENT_HBM2_SPI_0:
5775         case GAUDI_EVENT_HBM3_SPI_0:
5776                 gaudi_print_irq_info(hdev, event_type, false);
5777                 gaudi_hbm_read_interrupts(hdev,
5778                                           gaudi_hbm_event_to_dev(event_type));
5779                 if (hdev->hard_reset_on_fw_events)
5780                         hl_device_reset(hdev, true, false);
5781                 break;
5782
5783         case GAUDI_EVENT_HBM0_SPI_1:
5784         case GAUDI_EVENT_HBM1_SPI_1:
5785         case GAUDI_EVENT_HBM2_SPI_1:
5786         case GAUDI_EVENT_HBM3_SPI_1:
5787                 gaudi_print_irq_info(hdev, event_type, false);
5788                 gaudi_hbm_read_interrupts(hdev,
5789                                           gaudi_hbm_event_to_dev(event_type));
5790                 break;
5791
5792         case GAUDI_EVENT_TPC0_DEC:
5793         case GAUDI_EVENT_TPC1_DEC:
5794         case GAUDI_EVENT_TPC2_DEC:
5795         case GAUDI_EVENT_TPC3_DEC:
5796         case GAUDI_EVENT_TPC4_DEC:
5797         case GAUDI_EVENT_TPC5_DEC:
5798         case GAUDI_EVENT_TPC6_DEC:
5799         case GAUDI_EVENT_TPC7_DEC:
5800                 gaudi_print_irq_info(hdev, event_type, true);
5801                 reset_required = gaudi_tpc_read_interrupts(hdev,
5802                                         tpc_dec_event_to_tpc_id(event_type),
5803                                         "AXI_SLV_DEC_Error");
5804                 if (reset_required) {
5805                         dev_err(hdev->dev, "hard reset required due to %s\n",
5806                                 gaudi_irq_map_table[event_type].name);
5807
5808                         if (hdev->hard_reset_on_fw_events)
5809                                 hl_device_reset(hdev, true, false);
5810                 } else {
5811                         hl_fw_unmask_irq(hdev, event_type);
5812                 }
5813                 break;
5814
5815         case GAUDI_EVENT_TPC0_KRN_ERR:
5816         case GAUDI_EVENT_TPC1_KRN_ERR:
5817         case GAUDI_EVENT_TPC2_KRN_ERR:
5818         case GAUDI_EVENT_TPC3_KRN_ERR:
5819         case GAUDI_EVENT_TPC4_KRN_ERR:
5820         case GAUDI_EVENT_TPC5_KRN_ERR:
5821         case GAUDI_EVENT_TPC6_KRN_ERR:
5822         case GAUDI_EVENT_TPC7_KRN_ERR:
5823                 gaudi_print_irq_info(hdev, event_type, true);
5824                 reset_required = gaudi_tpc_read_interrupts(hdev,
5825                                         tpc_krn_event_to_tpc_id(event_type),
5826                                         "KRN_ERR");
5827                 if (reset_required) {
5828                         dev_err(hdev->dev, "hard reset required due to %s\n",
5829                                 gaudi_irq_map_table[event_type].name);
5830
5831                         if (hdev->hard_reset_on_fw_events)
5832                                 hl_device_reset(hdev, true, false);
5833                 } else {
5834                         hl_fw_unmask_irq(hdev, event_type);
5835                 }
5836                 break;
5837
5838         case GAUDI_EVENT_PCIE_CORE_SERR:
5839         case GAUDI_EVENT_PCIE_IF_SERR:
5840         case GAUDI_EVENT_PCIE_PHY_SERR:
5841         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5842         case GAUDI_EVENT_MME0_ACC_SERR:
5843         case GAUDI_EVENT_MME0_SBAB_SERR:
5844         case GAUDI_EVENT_MME1_ACC_SERR:
5845         case GAUDI_EVENT_MME1_SBAB_SERR:
5846         case GAUDI_EVENT_MME2_ACC_SERR:
5847         case GAUDI_EVENT_MME2_SBAB_SERR:
5848         case GAUDI_EVENT_MME3_ACC_SERR:
5849         case GAUDI_EVENT_MME3_SBAB_SERR:
5850         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5851         case GAUDI_EVENT_CPU_IF_ECC_SERR:
5852         case GAUDI_EVENT_PSOC_MEM_SERR:
5853         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5854         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5855         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5856         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5857                 fallthrough;
5858         case GAUDI_EVENT_MMU_SERR:
5859                 gaudi_print_irq_info(hdev, event_type, true);
5860                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5861                 hl_fw_unmask_irq(hdev, event_type);
5862                 break;
5863
5864         case GAUDI_EVENT_PCIE_DEC:
5865         case GAUDI_EVENT_MME0_WBC_RSP:
5866         case GAUDI_EVENT_MME0_SBAB0_RSP:
5867         case GAUDI_EVENT_MME1_WBC_RSP:
5868         case GAUDI_EVENT_MME1_SBAB0_RSP:
5869         case GAUDI_EVENT_MME2_WBC_RSP:
5870         case GAUDI_EVENT_MME2_SBAB0_RSP:
5871         case GAUDI_EVENT_MME3_WBC_RSP:
5872         case GAUDI_EVENT_MME3_SBAB0_RSP:
5873         case GAUDI_EVENT_CPU_AXI_SPLITTER:
5874         case GAUDI_EVENT_PSOC_AXI_DEC:
5875         case GAUDI_EVENT_PSOC_PRSTN_FALL:
5876         case GAUDI_EVENT_MMU_PAGE_FAULT:
5877         case GAUDI_EVENT_MMU_WR_PERM:
5878         case GAUDI_EVENT_RAZWI_OR_ADC:
5879         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5880         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5881         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5882                 fallthrough;
5883         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5884                 gaudi_print_irq_info(hdev, event_type, true);
5885                 gaudi_handle_qman_err(hdev, event_type);
5886                 hl_fw_unmask_irq(hdev, event_type);
5887                 break;
5888
5889         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5890                 gaudi_print_irq_info(hdev, event_type, true);
5891                 if (hdev->hard_reset_on_fw_events)
5892                         hl_device_reset(hdev, true, false);
5893                 break;
5894
5895         case GAUDI_EVENT_TPC0_BMON_SPMU:
5896         case GAUDI_EVENT_TPC1_BMON_SPMU:
5897         case GAUDI_EVENT_TPC2_BMON_SPMU:
5898         case GAUDI_EVENT_TPC3_BMON_SPMU:
5899         case GAUDI_EVENT_TPC4_BMON_SPMU:
5900         case GAUDI_EVENT_TPC5_BMON_SPMU:
5901         case GAUDI_EVENT_TPC6_BMON_SPMU:
5902         case GAUDI_EVENT_TPC7_BMON_SPMU:
5903         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5904                 gaudi_print_irq_info(hdev, event_type, false);
5905                 hl_fw_unmask_irq(hdev, event_type);
5906                 break;
5907
5908         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5909                 gaudi_print_clk_change_info(hdev, event_type);
5910                 hl_fw_unmask_irq(hdev, event_type);
5911                 break;
5912
5913         case GAUDI_EVENT_PSOC_GPIO_U16_0:
5914                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5915                 dev_err(hdev->dev,
5916                         "Received high temp H/W interrupt %d (cause %d)\n",
5917                         event_type, cause);
5918                 break;
5919
5920         default:
5921                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5922                                 event_type);
5923                 break;
5924         }
5925 }
5926
5927 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5928                                         u32 *size)
5929 {
5930         struct gaudi_device *gaudi = hdev->asic_specific;
5931
5932         if (aggregate) {
5933                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5934                 return gaudi->events_stat_aggregate;
5935         }
5936
5937         *size = (u32) sizeof(gaudi->events_stat);
5938         return gaudi->events_stat;
5939 }
5940
5941 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5942                                         u32 flags)
5943 {
5944         struct gaudi_device *gaudi = hdev->asic_specific;
5945         u32 status, timeout_usec;
5946         int rc;
5947
5948         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5949                 hdev->hard_reset_pending)
5950                 return 0;
5951
5952         if (hdev->pldm)
5953                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5954         else
5955                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5956
5957         mutex_lock(&hdev->mmu_cache_lock);
5958
5959         /* L0 & L1 invalidation */
5960         WREG32(mmSTLB_INV_PS, 3);
5961         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5962         WREG32(mmSTLB_INV_PS, 2);
5963
5964         rc = hl_poll_timeout(
5965                 hdev,
5966                 mmSTLB_INV_PS,
5967                 status,
5968                 !status,
5969                 1000,
5970                 timeout_usec);
5971
5972         WREG32(mmSTLB_INV_SET, 0);
5973
5974         mutex_unlock(&hdev->mmu_cache_lock);
5975
5976         if (rc) {
5977                 dev_err_ratelimited(hdev->dev,
5978                                         "MMU cache invalidation timeout\n");
5979                 hl_device_reset(hdev, true, false);
5980         }
5981
5982         return rc;
5983 }
5984
5985 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5986                                 bool is_hard, u32 asid, u64 va, u64 size)
5987 {
5988         struct gaudi_device *gaudi = hdev->asic_specific;
5989         u32 status, timeout_usec;
5990         u32 inv_data;
5991         u32 pi;
5992         int rc;
5993
5994         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5995                 hdev->hard_reset_pending)
5996                 return 0;
5997
5998         mutex_lock(&hdev->mmu_cache_lock);
5999
6000         if (hdev->pldm)
6001                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6002         else
6003                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6004
6005         /*
6006          * TODO: currently invalidate entire L0 & L1 as in regular hard
6007          * invalidation. Need to apply invalidation of specific cache
6008          * lines with mask of ASID & VA & size.
6009          * Note that L1 with be flushed entirely in any case.
6010          */
6011
6012         /* L0 & L1 invalidation */
6013         inv_data = RREG32(mmSTLB_CACHE_INV);
6014         /* PI is 8 bit */
6015         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6016         WREG32(mmSTLB_CACHE_INV,
6017                 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6018
6019         rc = hl_poll_timeout(
6020                 hdev,
6021                 mmSTLB_INV_CONSUMER_INDEX,
6022                 status,
6023                 status == pi,
6024                 1000,
6025                 timeout_usec);
6026
6027         mutex_unlock(&hdev->mmu_cache_lock);
6028
6029         if (rc) {
6030                 dev_err_ratelimited(hdev->dev,
6031                                         "MMU cache invalidation timeout\n");
6032                 hl_device_reset(hdev, true, false);
6033         }
6034
6035         return rc;
6036 }
6037
6038 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6039                                         u32 asid, u64 phys_addr)
6040 {
6041         u32 status, timeout_usec;
6042         int rc;
6043
6044         if (hdev->pldm)
6045                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6046         else
6047                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6048
6049         WREG32(MMU_ASID, asid);
6050         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6051         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6052         WREG32(MMU_BUSY, 0x80000000);
6053
6054         rc = hl_poll_timeout(
6055                 hdev,
6056                 MMU_BUSY,
6057                 status,
6058                 !(status & 0x80000000),
6059                 1000,
6060                 timeout_usec);
6061
6062         if (rc) {
6063                 dev_err(hdev->dev,
6064                         "Timeout during MMU hop0 config of asid %d\n", asid);
6065                 return rc;
6066         }
6067
6068         return 0;
6069 }
6070
6071 static int gaudi_send_heartbeat(struct hl_device *hdev)
6072 {
6073         struct gaudi_device *gaudi = hdev->asic_specific;
6074
6075         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6076                 return 0;
6077
6078         return hl_fw_send_heartbeat(hdev);
6079 }
6080
6081 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6082 {
6083         struct gaudi_device *gaudi = hdev->asic_specific;
6084         struct asic_fixed_properties *prop = &hdev->asic_prop;
6085         int rc;
6086
6087         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6088                 return 0;
6089
6090         rc = hl_fw_cpucp_info_get(hdev);
6091         if (rc)
6092                 return rc;
6093
6094         if (!strlen(prop->cpucp_info.card_name))
6095                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6096                                 CARD_NAME_MAX_LEN);
6097
6098         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6099
6100         if (hdev->card_type == cpucp_card_type_pci)
6101                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6102         else if (hdev->card_type == cpucp_card_type_pmc)
6103                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6104
6105         hdev->max_power = prop->max_power_default;
6106
6107         return 0;
6108 }
6109
6110 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6111                                         struct seq_file *s)
6112 {
6113         struct gaudi_device *gaudi = hdev->asic_specific;
6114         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6115         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6116         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6117         bool is_idle = true, is_eng_idle, is_slave;
6118         u64 offset;
6119         int i, dma_id;
6120
6121         mutex_lock(&gaudi->clk_gate_mutex);
6122
6123         hdev->asic_funcs->disable_clock_gating(hdev);
6124
6125         if (s)
6126                 seq_puts(s,
6127                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6128                         "---  -------  ------------  ----------  -------------\n");
6129
6130         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6131                 dma_id = gaudi_dma_assignment[i];
6132                 offset = dma_id * DMA_QMAN_OFFSET;
6133
6134                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6135                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6136                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6137                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138                                 IS_DMA_IDLE(dma_core_sts0);
6139                 is_idle &= is_eng_idle;
6140
6141                 if (mask)
6142                         *mask |= ((u64) !is_eng_idle) <<
6143                                         (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6144                 if (s)
6145                         seq_printf(s, fmt, dma_id,
6146                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6147                                 qm_cgm_sts, dma_core_sts0);
6148         }
6149
6150         if (s)
6151                 seq_puts(s,
6152                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6153                         "---  -------  ------------  ----------  ----------\n");
6154
6155         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6156                 offset = i * TPC_QMAN_OFFSET;
6157                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6158                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6159                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6160                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6161                                 IS_TPC_IDLE(tpc_cfg_sts);
6162                 is_idle &= is_eng_idle;
6163
6164                 if (mask)
6165                         *mask |= ((u64) !is_eng_idle) <<
6166                                                 (GAUDI_ENGINE_ID_TPC_0 + i);
6167                 if (s)
6168                         seq_printf(s, fmt, i,
6169                                 is_eng_idle ? "Y" : "N",
6170                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6171         }
6172
6173         if (s)
6174                 seq_puts(s,
6175                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6176                         "---  -------  ------------  ----------  -----------\n");
6177
6178         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6179                 offset = i * MME_QMAN_OFFSET;
6180                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6181                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6182
6183                 /* MME 1 & 3 are slaves, no need to check their QMANs */
6184                 is_slave = i % 2;
6185                 if (!is_slave) {
6186                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6187                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6188                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6189                 }
6190
6191                 is_idle &= is_eng_idle;
6192
6193                 if (mask)
6194                         *mask |= ((u64) !is_eng_idle) <<
6195                                                 (GAUDI_ENGINE_ID_MME_0 + i);
6196                 if (s) {
6197                         if (!is_slave)
6198                                 seq_printf(s, fmt, i,
6199                                         is_eng_idle ? "Y" : "N",
6200                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6201                         else
6202                                 seq_printf(s, mme_slave_fmt, i,
6203                                         is_eng_idle ? "Y" : "N", "-",
6204                                         "-", mme_arch_sts);
6205                 }
6206         }
6207
6208         if (s)
6209                 seq_puts(s, "\n");
6210
6211         hdev->asic_funcs->set_clock_gating(hdev);
6212
6213         mutex_unlock(&gaudi->clk_gate_mutex);
6214
6215         return is_idle;
6216 }
6217
6218 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6219         __acquires(&gaudi->hw_queues_lock)
6220 {
6221         struct gaudi_device *gaudi = hdev->asic_specific;
6222
6223         spin_lock(&gaudi->hw_queues_lock);
6224 }
6225
6226 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6227         __releases(&gaudi->hw_queues_lock)
6228 {
6229         struct gaudi_device *gaudi = hdev->asic_specific;
6230
6231         spin_unlock(&gaudi->hw_queues_lock);
6232 }
6233
6234 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6235 {
6236         return hdev->pdev->device;
6237 }
6238
6239 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6240                                 size_t max_size)
6241 {
6242         struct gaudi_device *gaudi = hdev->asic_specific;
6243
6244         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6245                 return 0;
6246
6247         return hl_fw_get_eeprom_data(hdev, data, max_size);
6248 }
6249
6250 /*
6251  * this function should be used only during initialization and/or after reset,
6252  * when there are no active users.
6253  */
6254 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6255                                 u32 tpc_id)
6256 {
6257         struct gaudi_device *gaudi = hdev->asic_specific;
6258         u64 kernel_timeout;
6259         u32 status, offset;
6260         int rc;
6261
6262         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6263
6264         if (hdev->pldm)
6265                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6266         else
6267                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6268
6269         mutex_lock(&gaudi->clk_gate_mutex);
6270
6271         hdev->asic_funcs->disable_clock_gating(hdev);
6272
6273         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6274                         lower_32_bits(tpc_kernel));
6275         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6276                         upper_32_bits(tpc_kernel));
6277
6278         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6279                         lower_32_bits(tpc_kernel));
6280         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6281                         upper_32_bits(tpc_kernel));
6282         /* set a valid LUT pointer, content is of no significance */
6283         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6284                         lower_32_bits(tpc_kernel));
6285         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6286                         upper_32_bits(tpc_kernel));
6287
6288         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6289                         lower_32_bits(CFG_BASE +
6290                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6291
6292         WREG32(mmTPC0_CFG_TPC_CMD + offset,
6293                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6294                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6295         /* wait a bit for the engine to start executing */
6296         usleep_range(1000, 1500);
6297
6298         /* wait until engine has finished executing */
6299         rc = hl_poll_timeout(
6300                 hdev,
6301                 mmTPC0_CFG_STATUS + offset,
6302                 status,
6303                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6304                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6305                 1000,
6306                 kernel_timeout);
6307
6308         if (rc) {
6309                 dev_err(hdev->dev,
6310                         "Timeout while waiting for TPC%d icache prefetch\n",
6311                         tpc_id);
6312                 hdev->asic_funcs->set_clock_gating(hdev);
6313                 mutex_unlock(&gaudi->clk_gate_mutex);
6314                 return -EIO;
6315         }
6316
6317         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6318                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6319
6320         /* wait a bit for the engine to start executing */
6321         usleep_range(1000, 1500);
6322
6323         /* wait until engine has finished executing */
6324         rc = hl_poll_timeout(
6325                 hdev,
6326                 mmTPC0_CFG_STATUS + offset,
6327                 status,
6328                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6329                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6330                 1000,
6331                 kernel_timeout);
6332
6333         if (rc) {
6334                 dev_err(hdev->dev,
6335                         "Timeout while waiting for TPC%d vector pipe\n",
6336                         tpc_id);
6337                 hdev->asic_funcs->set_clock_gating(hdev);
6338                 mutex_unlock(&gaudi->clk_gate_mutex);
6339                 return -EIO;
6340         }
6341
6342         rc = hl_poll_timeout(
6343                 hdev,
6344                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6345                 status,
6346                 (status == 0),
6347                 1000,
6348                 kernel_timeout);
6349
6350         hdev->asic_funcs->set_clock_gating(hdev);
6351         mutex_unlock(&gaudi->clk_gate_mutex);
6352
6353         if (rc) {
6354                 dev_err(hdev->dev,
6355                         "Timeout while waiting for TPC%d kernel to execute\n",
6356                         tpc_id);
6357                 return -EIO;
6358         }
6359
6360         return 0;
6361 }
6362
6363 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6364 {
6365         return RREG32(mmHW_STATE);
6366 }
6367
6368 static int gaudi_ctx_init(struct hl_ctx *ctx)
6369 {
6370         return 0;
6371 }
6372
6373 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6374 {
6375         return gaudi_cq_assignment[cq_idx];
6376 }
6377
6378 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6379 {
6380         return sizeof(struct packet_msg_short) +
6381                         sizeof(struct packet_msg_prot) * 2;
6382 }
6383
6384 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6385 {
6386         return sizeof(struct packet_msg_short) * 4 +
6387                         sizeof(struct packet_fence) +
6388                         sizeof(struct packet_msg_prot) * 2;
6389 }
6390
6391 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6392 {
6393         struct hl_cb *cb = (struct hl_cb *) data;
6394         struct packet_msg_short *pkt;
6395         u32 value, ctl;
6396
6397         pkt = cb->kernel_address;
6398         memset(pkt, 0, sizeof(*pkt));
6399
6400         /* Inc by 1, Mode ADD */
6401         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6402         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6403
6404         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6405         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6406         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6407         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6408         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6409         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6410         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6411
6412         pkt->value = cpu_to_le32(value);
6413         pkt->ctl = cpu_to_le32(ctl);
6414 }
6415
6416 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6417                                         u16 addr)
6418 {
6419         u32 ctl, pkt_size = sizeof(*pkt);
6420
6421         memset(pkt, 0, pkt_size);
6422
6423         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6424         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6425         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6426         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6427         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6428         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6429
6430         pkt->value = cpu_to_le32(value);
6431         pkt->ctl = cpu_to_le32(ctl);
6432
6433         return pkt_size;
6434 }
6435
6436 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6437                                         u16 sob_val, u16 addr)
6438 {
6439         u32 ctl, value, pkt_size = sizeof(*pkt);
6440         u8 mask = ~(1 << (sob_id & 0x7));
6441
6442         memset(pkt, 0, pkt_size);
6443
6444         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6445         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6446         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6447                         0); /* GREATER OR EQUAL*/
6448         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6449
6450         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6451         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6452         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6453         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6454         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6455         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6456         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6457
6458         pkt->value = cpu_to_le32(value);
6459         pkt->ctl = cpu_to_le32(ctl);
6460
6461         return pkt_size;
6462 }
6463
6464 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6465 {
6466         u32 ctl, cfg, pkt_size = sizeof(*pkt);
6467
6468         memset(pkt, 0, pkt_size);
6469
6470         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6471         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6472         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6473
6474         ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6475         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6476         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6477         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6478
6479         pkt->cfg = cpu_to_le32(cfg);
6480         pkt->ctl = cpu_to_le32(ctl);
6481
6482         return pkt_size;
6483 }
6484
6485 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6486                         u16 sob_val, u16 mon_id, u32 q_idx)
6487 {
6488         struct hl_cb *cb = (struct hl_cb *) data;
6489         void *buf = cb->kernel_address;
6490         u64 monitor_base, fence_addr = 0;
6491         u32 size = 0;
6492         u16 msg_addr_offset;
6493
6494         switch (q_idx) {
6495         case GAUDI_QUEUE_ID_DMA_0_0:
6496                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6497                 break;
6498         case GAUDI_QUEUE_ID_DMA_0_1:
6499                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6500                 break;
6501         case GAUDI_QUEUE_ID_DMA_0_2:
6502                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6503                 break;
6504         case GAUDI_QUEUE_ID_DMA_0_3:
6505                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6506                 break;
6507         case GAUDI_QUEUE_ID_DMA_1_0:
6508                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6509                 break;
6510         case GAUDI_QUEUE_ID_DMA_1_1:
6511                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6512                 break;
6513         case GAUDI_QUEUE_ID_DMA_1_2:
6514                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6515                 break;
6516         case GAUDI_QUEUE_ID_DMA_1_3:
6517                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6518                 break;
6519         case GAUDI_QUEUE_ID_DMA_5_0:
6520                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6521                 break;
6522         case GAUDI_QUEUE_ID_DMA_5_1:
6523                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6524                 break;
6525         case GAUDI_QUEUE_ID_DMA_5_2:
6526                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6527                 break;
6528         case GAUDI_QUEUE_ID_DMA_5_3:
6529                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6530                 break;
6531         default:
6532                 /* queue index should be valid here */
6533                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6534                                 q_idx);
6535                 return;
6536         }
6537
6538         fence_addr += CFG_BASE;
6539
6540         /*
6541          * monitor_base should be the content of the base0 address registers,
6542          * so it will be added to the msg short offsets
6543          */
6544         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6545
6546         /* First monitor config packet: low address of the sync */
6547         msg_addr_offset =
6548                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6549                                 monitor_base;
6550
6551         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6552                                         msg_addr_offset);
6553
6554         /* Second monitor config packet: high address of the sync */
6555         msg_addr_offset =
6556                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6557                                 monitor_base;
6558
6559         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6560                                         msg_addr_offset);
6561
6562         /*
6563          * Third monitor config packet: the payload, i.e. what to write when the
6564          * sync triggers
6565          */
6566         msg_addr_offset =
6567                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6568                                 monitor_base;
6569
6570         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6571
6572         /* Fourth monitor config packet: bind the monitor to a sync object */
6573         msg_addr_offset =
6574                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6575                                 monitor_base;
6576         size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6577                                                 msg_addr_offset);
6578
6579         /* Fence packet */
6580         size += gaudi_add_fence_pkt(buf + size);
6581 }
6582
6583 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6584 {
6585         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6586
6587         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6588                 hw_sob->sob_id);
6589
6590         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6591                 0);
6592
6593         kref_init(&hw_sob->kref);
6594 }
6595
6596 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6597 {
6598         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6599                                                         HL_POWER9_HOST_MAGIC) {
6600                 hdev->power9_64bit_dma_enable = 1;
6601                 hdev->dma_mask = 64;
6602         } else {
6603                 hdev->power9_64bit_dma_enable = 0;
6604                 hdev->dma_mask = 48;
6605         }
6606 }
6607
6608 static u64 gaudi_get_device_time(struct hl_device *hdev)
6609 {
6610         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6611
6612         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6613 }
6614
6615 static const struct hl_asic_funcs gaudi_funcs = {
6616         .early_init = gaudi_early_init,
6617         .early_fini = gaudi_early_fini,
6618         .late_init = gaudi_late_init,
6619         .late_fini = gaudi_late_fini,
6620         .sw_init = gaudi_sw_init,
6621         .sw_fini = gaudi_sw_fini,
6622         .hw_init = gaudi_hw_init,
6623         .hw_fini = gaudi_hw_fini,
6624         .halt_engines = gaudi_halt_engines,
6625         .suspend = gaudi_suspend,
6626         .resume = gaudi_resume,
6627         .cb_mmap = gaudi_cb_mmap,
6628         .ring_doorbell = gaudi_ring_doorbell,
6629         .pqe_write = gaudi_pqe_write,
6630         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6631         .asic_dma_free_coherent = gaudi_dma_free_coherent,
6632         .get_int_queue_base = gaudi_get_int_queue_base,
6633         .test_queues = gaudi_test_queues,
6634         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6635         .asic_dma_pool_free = gaudi_dma_pool_free,
6636         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6637         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6638         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6639         .cs_parser = gaudi_cs_parser,
6640         .asic_dma_map_sg = gaudi_dma_map_sg,
6641         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6642         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6643         .update_eq_ci = gaudi_update_eq_ci,
6644         .context_switch = gaudi_context_switch,
6645         .restore_phase_topology = gaudi_restore_phase_topology,
6646         .debugfs_read32 = gaudi_debugfs_read32,
6647         .debugfs_write32 = gaudi_debugfs_write32,
6648         .debugfs_read64 = gaudi_debugfs_read64,
6649         .debugfs_write64 = gaudi_debugfs_write64,
6650         .add_device_attr = gaudi_add_device_attr,
6651         .handle_eqe = gaudi_handle_eqe,
6652         .set_pll_profile = gaudi_set_pll_profile,
6653         .get_events_stat = gaudi_get_events_stat,
6654         .read_pte = gaudi_read_pte,
6655         .write_pte = gaudi_write_pte,
6656         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6657         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6658         .send_heartbeat = gaudi_send_heartbeat,
6659         .set_clock_gating = gaudi_set_clock_gating,
6660         .disable_clock_gating = gaudi_disable_clock_gating,
6661         .debug_coresight = gaudi_debug_coresight,
6662         .is_device_idle = gaudi_is_device_idle,
6663         .soft_reset_late_init = gaudi_soft_reset_late_init,
6664         .hw_queues_lock = gaudi_hw_queues_lock,
6665         .hw_queues_unlock = gaudi_hw_queues_unlock,
6666         .get_pci_id = gaudi_get_pci_id,
6667         .get_eeprom_data = gaudi_get_eeprom_data,
6668         .send_cpu_message = gaudi_send_cpu_message,
6669         .get_hw_state = gaudi_get_hw_state,
6670         .pci_bars_map = gaudi_pci_bars_map,
6671         .init_iatu = gaudi_init_iatu,
6672         .rreg = hl_rreg,
6673         .wreg = hl_wreg,
6674         .halt_coresight = gaudi_halt_coresight,
6675         .ctx_init = gaudi_ctx_init,
6676         .get_clk_rate = gaudi_get_clk_rate,
6677         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6678         .read_device_fw_version = gaudi_read_device_fw_version,
6679         .load_firmware_to_device = gaudi_load_firmware_to_device,
6680         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6681         .get_signal_cb_size = gaudi_get_signal_cb_size,
6682         .get_wait_cb_size = gaudi_get_wait_cb_size,
6683         .gen_signal_cb = gaudi_gen_signal_cb,
6684         .gen_wait_cb = gaudi_gen_wait_cb,
6685         .reset_sob = gaudi_reset_sob,
6686         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6687         .get_device_time = gaudi_get_device_time
6688 };
6689
6690 /**
6691  * gaudi_set_asic_funcs - set GAUDI function pointers
6692  *
6693  * @hdev: pointer to hl_device structure
6694  *
6695  */
6696 void gaudi_set_asic_funcs(struct hl_device *hdev)
6697 {
6698         hdev->asic_funcs = &gaudi_funcs;
6699 }