GNU Linux-libre 6.8.9-gnu
[releases.git] / drivers / accel / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "/*(DEBLOBBED)*/"
63 #define GAUDI_LINUX_FW_FILE     "/*(DEBLOBBED)*/"
64 #define GAUDI_TPC_FW_FILE       "/*(DEBLOBBED)*/"
65
66 /*(DEBLOBBED)*/
67
68 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
69
70 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
71 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
74
75 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
83 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
84
85 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
86
87 #define GAUDI_MAX_STRING_LEN            20
88
89 #define GAUDI_CB_POOL_CB_CNT            512
90 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
91
92 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
93
94 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
95
96 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
97
98 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
99
100 #define GAUDI_ARB_WDT_TIMEOUT           0xEE6b27FF /* 8 seconds */
101
102 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
103
104 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
105
106 #define MONITOR_SOB_STRING_SIZE         256
107
108 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
109         GAUDI_QUEUE_ID_DMA_0_0,
110         GAUDI_QUEUE_ID_DMA_0_1,
111         GAUDI_QUEUE_ID_DMA_0_2,
112         GAUDI_QUEUE_ID_DMA_0_3,
113         GAUDI_QUEUE_ID_DMA_1_0,
114         GAUDI_QUEUE_ID_DMA_1_1,
115         GAUDI_QUEUE_ID_DMA_1_2,
116         GAUDI_QUEUE_ID_DMA_1_3
117 };
118
119 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
120         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
121         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
122         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
123         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
124         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
125         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
126         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
127         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
128 };
129
130 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
131         [0] = GAUDI_QUEUE_ID_DMA_0_0,
132         [1] = GAUDI_QUEUE_ID_DMA_0_1,
133         [2] = GAUDI_QUEUE_ID_DMA_0_2,
134         [3] = GAUDI_QUEUE_ID_DMA_0_3,
135         [4] = GAUDI_QUEUE_ID_DMA_1_0,
136         [5] = GAUDI_QUEUE_ID_DMA_1_1,
137         [6] = GAUDI_QUEUE_ID_DMA_1_2,
138         [7] = GAUDI_QUEUE_ID_DMA_1_3,
139 };
140
141 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
142         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
143         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
144         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
145         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
146         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
147         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
148         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
149         [PACKET_FENCE]          = sizeof(struct packet_fence),
150         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
151         [PACKET_NOP]            = sizeof(struct packet_nop),
152         [PACKET_STOP]           = sizeof(struct packet_stop),
153         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
154         [PACKET_WAIT]           = sizeof(struct packet_wait),
155         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
156 };
157
158 static inline bool validate_packet_id(enum packet_id id)
159 {
160         switch (id) {
161         case PACKET_WREG_32:
162         case PACKET_WREG_BULK:
163         case PACKET_MSG_LONG:
164         case PACKET_MSG_SHORT:
165         case PACKET_CP_DMA:
166         case PACKET_REPEAT:
167         case PACKET_MSG_PROT:
168         case PACKET_FENCE:
169         case PACKET_LIN_DMA:
170         case PACKET_NOP:
171         case PACKET_STOP:
172         case PACKET_ARB_POINT:
173         case PACKET_WAIT:
174         case PACKET_LOAD_AND_EXE:
175                 return true;
176         default:
177                 return false;
178         }
179 }
180
181 static const char * const
182 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
183         "tpc_address_exceed_slm",
184         "tpc_div_by_0",
185         "tpc_spu_mac_overflow",
186         "tpc_spu_addsub_overflow",
187         "tpc_spu_abs_overflow",
188         "tpc_spu_fp_dst_nan_inf",
189         "tpc_spu_fp_dst_denorm",
190         "tpc_vpu_mac_overflow",
191         "tpc_vpu_addsub_overflow",
192         "tpc_vpu_abs_overflow",
193         "tpc_vpu_fp_dst_nan_inf",
194         "tpc_vpu_fp_dst_denorm",
195         "tpc_assertions",
196         "tpc_illegal_instruction",
197         "tpc_pc_wrap_around",
198         "tpc_qm_sw_err",
199         "tpc_hbw_rresp_err",
200         "tpc_hbw_bresp_err",
201         "tpc_lbw_rresp_err",
202         "tpc_lbw_bresp_err"
203 };
204
205 static const char * const
206 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
207         "PQ AXI HBW error",
208         "CQ AXI HBW error",
209         "CP AXI HBW error",
210         "CP error due to undefined OPCODE",
211         "CP encountered STOP OPCODE",
212         "CP AXI LBW error",
213         "CP WRREG32 or WRBULK returned error",
214         "N/A",
215         "FENCE 0 inc over max value and clipped",
216         "FENCE 1 inc over max value and clipped",
217         "FENCE 2 inc over max value and clipped",
218         "FENCE 3 inc over max value and clipped",
219         "FENCE 0 dec under min value and clipped",
220         "FENCE 1 dec under min value and clipped",
221         "FENCE 2 dec under min value and clipped",
222         "FENCE 3 dec under min value and clipped"
223 };
224
225 static const char * const
226 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
227         "Choice push while full error",
228         "Choice Q watchdog error",
229         "MSG AXI LBW returned with error"
230 };
231
232 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
233         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
234         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
235         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
236         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
237         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
241         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
242         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
243         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
244         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
245         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
246         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
346 };
347
348 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
349         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
350         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
351         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
352         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
353         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
354         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
355         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
356         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
357         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
358         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
359         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
360         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
361         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
362         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
363         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
364         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
365         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
366         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
367         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
368         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
369         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
370         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
371         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
372         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
373         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
374         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
375         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
376 };
377
378 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
379         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
380         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
381         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
382         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
383         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
384         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
385         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
386         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
387         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
388         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
389         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
390 };
391
392 static s64 gaudi_state_dump_specs_props[] = {
393         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
394         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
395         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
396         [SP_MON_OBJ_WR_ADDR_LOW] =
397                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
398         [SP_MON_OBJ_WR_ADDR_HIGH] =
399                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
400         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
401         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
402         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
403         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
404         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
405         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
406         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
407         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
408         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
409         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
410         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
411         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
412         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
413         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
414         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
415         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
416         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
417         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
418         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
419         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
420         [SP_FENCE0_CNT_OFFSET] =
421                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
422         [SP_FENCE0_RDATA_OFFSET] =
423                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
424         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
425         [SP_NUM_CORES] = 1,
426 };
427
428 static const int gaudi_queue_id_to_engine_id[] = {
429         [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
430         [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
431         [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
432         [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
433         [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
434         [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
435         [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
436         [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
437         [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
438         [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
439         [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
440         [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
441         [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
442         [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
443         [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
444         [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
445         [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
446         [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
447         [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
448         [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
449         [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
450         [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
451         [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
452         [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
453         [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
454         [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
455         [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
456         [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
457         [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
458 };
459
460 /* The order here is opposite to the order of the indexing in the h/w.
461  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
462  */
463 static const char * const gaudi_sync_manager_names[] = {
464         "SYNC_MGR_E_N",
465         "SYNC_MGR_W_N",
466         "SYNC_MGR_E_S",
467         "SYNC_MGR_W_S",
468         NULL
469 };
470
471 struct ecc_info_extract_params {
472         u64 block_address;
473         u32 num_memories;
474         bool derr;
475 };
476
477 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
478                                                                 u64 phys_addr);
479 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
480                                         struct hl_cs_job *job);
481 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
482                                         u32 size, u64 val);
483 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
484                                         u32 num_regs, u32 val);
485 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
486                                 u32 tpc_id);
487 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
488 static int gaudi_cpucp_info_get(struct hl_device *hdev);
489 static void gaudi_disable_clock_gating(struct hl_device *hdev);
490 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
491 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
492                                 u32 size, bool eb);
493 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
494                                 struct hl_gen_wait_properties *prop);
495 static inline enum hl_collective_mode
496 get_collective_mode(struct hl_device *hdev, u32 queue_id)
497 {
498         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
499                 return HL_COLLECTIVE_MASTER;
500
501         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
502                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
503                 return HL_COLLECTIVE_SLAVE;
504
505         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
506                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
507                 return HL_COLLECTIVE_SLAVE;
508
509         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
510                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
511                 return HL_COLLECTIVE_SLAVE;
512
513         return HL_COLLECTIVE_NOT_SUPPORTED;
514 }
515
516 static inline void set_default_power_values(struct hl_device *hdev)
517 {
518         struct asic_fixed_properties *prop = &hdev->asic_prop;
519
520         if (hdev->card_type == cpucp_card_type_pmc) {
521                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
522
523                 if (prop->fw_security_enabled)
524                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
525                 else
526                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
527         } else {
528                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
529                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
530         }
531 }
532
533 static int gaudi_set_fixed_properties(struct hl_device *hdev)
534 {
535         struct asic_fixed_properties *prop = &hdev->asic_prop;
536         u32 num_sync_stream_queues = 0;
537         int i;
538
539         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
540         prop->hw_queues_props = kcalloc(prop->max_queues,
541                         sizeof(struct hw_queue_properties),
542                         GFP_KERNEL);
543
544         if (!prop->hw_queues_props)
545                 return -ENOMEM;
546
547         for (i = 0 ; i < prop->max_queues ; i++) {
548                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 1;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_KERNEL;
554                         num_sync_stream_queues++;
555                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
556                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
557                         prop->hw_queues_props[i].driver_only = 1;
558                         prop->hw_queues_props[i].supports_sync_stream = 0;
559                         prop->hw_queues_props[i].cb_alloc_flags =
560                                 CB_ALLOC_KERNEL;
561                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
562                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
563                         prop->hw_queues_props[i].driver_only = 0;
564                         prop->hw_queues_props[i].supports_sync_stream = 0;
565                         prop->hw_queues_props[i].cb_alloc_flags =
566                                 CB_ALLOC_USER;
567
568                 }
569                 prop->hw_queues_props[i].collective_mode =
570                                                 get_collective_mode(hdev, i);
571         }
572
573         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
574         prop->cfg_base_address = CFG_BASE;
575         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
576         prop->host_base_address = HOST_PHYS_BASE;
577         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
578         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
579         prop->completion_mode = HL_COMPLETION_MODE_JOB;
580         prop->collective_first_sob = 0;
581         prop->collective_first_mon = 0;
582
583         /* 2 SOBs per internal queue stream are reserved for collective */
584         prop->sync_stream_first_sob =
585                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
586                         * QMAN_STREAMS * HL_RSVD_SOBS;
587
588         /* 1 monitor per internal queue stream are reserved for collective
589          * 2 monitors per external queue stream are reserved for collective
590          */
591         prop->sync_stream_first_mon =
592                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
593                         (NUMBER_OF_EXT_HW_QUEUES * 2);
594
595         prop->dram_base_address = DRAM_PHYS_BASE;
596         prop->dram_size = GAUDI_HBM_SIZE_32GB;
597         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
598         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
599
600         prop->sram_base_address = SRAM_BASE_ADDR;
601         prop->sram_size = SRAM_SIZE;
602         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
603         prop->sram_user_base_address =
604                         prop->sram_base_address + SRAM_USER_BASE_OFFSET;
605
606         prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
607         prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
608
609         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
610         if (hdev->pldm)
611                 prop->mmu_pgt_size = 0x800000; /* 8MB */
612         else
613                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
614         prop->mmu_pte_size = HL_PTE_SIZE;
615         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
616         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
617         prop->dram_page_size = PAGE_SIZE_2MB;
618         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619         prop->dram_supports_virtual_memory = false;
620
621         prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622         prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623         prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624         prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625         prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626         prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627         prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628         prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629         prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630         prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631         prop->pmmu.start_addr = VA_HOST_SPACE_START;
632         prop->pmmu.end_addr =
633                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634         prop->pmmu.page_size = PAGE_SIZE_4KB;
635         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636         prop->pmmu.last_mask = LAST_MASK;
637         /* TODO: will be duplicated until implementing per-MMU props */
638         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
639         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
640
641         /* PMMU and HPMMU are the same except of page size */
642         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645         /* shifts and masks are the same in PMMU and DMMU */
646         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648         prop->dmmu.end_addr = VA_HOST_SPACE_END;
649         prop->dmmu.page_size = PAGE_SIZE_2MB;
650
651         prop->cfg_size = CFG_SIZE;
652         prop->max_asid = MAX_ASID;
653         prop->num_of_events = GAUDI_EVENT_SIZE;
654         prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
655         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
656
657         set_default_power_values(hdev);
658
659         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
660         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
661
662         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
663         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
664
665         strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
666                                         CARD_NAME_MAX_LEN);
667
668         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
669
670         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
671                         prop->sync_stream_first_sob +
672                         (num_sync_stream_queues * HL_RSVD_SOBS);
673         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
674                         prop->sync_stream_first_mon +
675                         (num_sync_stream_queues * HL_RSVD_MONS);
676
677         prop->first_available_user_interrupt = USHRT_MAX;
678         prop->tpc_interrupt_id = USHRT_MAX;
679
680         /* single msi */
681         prop->eq_interrupt_id = 0;
682
683         for (i = 0 ; i < HL_MAX_DCORES ; i++)
684                 prop->first_available_cq[i] = USHRT_MAX;
685
686         prop->fw_cpu_boot_dev_sts0_valid = false;
687         prop->fw_cpu_boot_dev_sts1_valid = false;
688         prop->hard_reset_done_by_fw = false;
689         prop->gic_interrupts_enable = true;
690
691         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693         prop->clk_pll_index = HL_GAUDI_MME_PLL;
694         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696         prop->use_get_power_for_reset_history = true;
697
698         prop->configurable_stop_on_err = true;
699
700         prop->set_max_power_on_device_init = true;
701
702         prop->dma_mask = 48;
703
704         prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
705
706         return 0;
707 }
708
709 static int gaudi_pci_bars_map(struct hl_device *hdev)
710 {
711         static const char * const name[] = {"SRAM", "CFG", "HBM"};
712         bool is_wc[3] = {false, false, true};
713         int rc;
714
715         rc = hl_pci_bars_map(hdev, name, is_wc);
716         if (rc)
717                 return rc;
718
719         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
720                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
721
722         return 0;
723 }
724
725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
726 {
727         struct gaudi_device *gaudi = hdev->asic_specific;
728         struct hl_inbound_pci_region pci_region;
729         u64 old_addr = addr;
730         int rc;
731
732         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
733                 return old_addr;
734
735         if (hdev->asic_prop.iatu_done_by_fw)
736                 return U64_MAX;
737
738         /* Inbound Region 2 - Bar 4 - Point to HBM */
739         pci_region.mode = PCI_BAR_MATCH_MODE;
740         pci_region.bar = HBM_BAR_ID;
741         pci_region.addr = addr;
742         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
743         if (rc)
744                 return U64_MAX;
745
746         if (gaudi) {
747                 old_addr = gaudi->hbm_bar_cur_addr;
748                 gaudi->hbm_bar_cur_addr = addr;
749         }
750
751         return old_addr;
752 }
753
754 static int gaudi_init_iatu(struct hl_device *hdev)
755 {
756         struct hl_inbound_pci_region inbound_region;
757         struct hl_outbound_pci_region outbound_region;
758         int rc;
759
760         if (hdev->asic_prop.iatu_done_by_fw)
761                 return 0;
762
763         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
764         inbound_region.mode = PCI_BAR_MATCH_MODE;
765         inbound_region.bar = SRAM_BAR_ID;
766         inbound_region.addr = SRAM_BASE_ADDR;
767         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
768         if (rc)
769                 goto done;
770
771         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
772         inbound_region.mode = PCI_BAR_MATCH_MODE;
773         inbound_region.bar = CFG_BAR_ID;
774         inbound_region.addr = SPI_FLASH_BASE_ADDR;
775         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
776         if (rc)
777                 goto done;
778
779         /* Inbound Region 2 - Bar 4 - Point to HBM */
780         inbound_region.mode = PCI_BAR_MATCH_MODE;
781         inbound_region.bar = HBM_BAR_ID;
782         inbound_region.addr = DRAM_PHYS_BASE;
783         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
784         if (rc)
785                 goto done;
786
787         /* Outbound Region 0 - Point to Host */
788         outbound_region.addr = HOST_PHYS_BASE;
789         outbound_region.size = HOST_PHYS_SIZE;
790         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
791
792 done:
793         return rc;
794 }
795
796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
797 {
798         return RREG32(mmHW_STATE);
799 }
800
801 static int gaudi_early_init(struct hl_device *hdev)
802 {
803         struct asic_fixed_properties *prop = &hdev->asic_prop;
804         struct pci_dev *pdev = hdev->pdev;
805         resource_size_t pci_bar_size;
806         u32 fw_boot_status;
807         int rc;
808
809         rc = gaudi_set_fixed_properties(hdev);
810         if (rc) {
811                 dev_err(hdev->dev, "Failed setting fixed properties\n");
812                 return rc;
813         }
814
815         /* Check BAR sizes */
816         pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
817
818         if (pci_bar_size != SRAM_BAR_SIZE) {
819                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
820                         SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
821                 rc = -ENODEV;
822                 goto free_queue_props;
823         }
824
825         pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
826
827         if (pci_bar_size != CFG_BAR_SIZE) {
828                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
829                         CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
830                 rc = -ENODEV;
831                 goto free_queue_props;
832         }
833
834         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
835         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
836
837         /* If FW security is enabled at this point it means no access to ELBI */
838         if (hdev->asic_prop.fw_security_enabled) {
839                 hdev->asic_prop.iatu_done_by_fw = true;
840
841                 /*
842                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
843                  * decision can only be taken based on PCI ID security.
844                  */
845                 hdev->asic_prop.gic_interrupts_enable = false;
846                 goto pci_init;
847         }
848
849         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
850                                 &fw_boot_status);
851         if (rc)
852                 goto free_queue_props;
853
854         /* Check whether FW is configuring iATU */
855         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
856                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
857                 hdev->asic_prop.iatu_done_by_fw = true;
858
859 pci_init:
860         rc = hl_pci_init(hdev);
861         if (rc)
862                 goto free_queue_props;
863
864         /* Before continuing in the initialization, we need to read the preboot
865          * version to determine whether we run with a security-enabled firmware
866          */
867         rc = hl_fw_read_preboot_status(hdev);
868         if (rc) {
869                 if (hdev->reset_on_preboot_fail)
870                         /* we are already on failure flow, so don't check if hw_fini fails. */
871                         hdev->asic_funcs->hw_fini(hdev, true, false);
872                 goto pci_fini;
873         }
874
875         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
876                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
877                 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
878                 if (rc) {
879                         dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
880                         goto pci_fini;
881                 }
882         }
883
884         return 0;
885
886 pci_fini:
887         hl_pci_fini(hdev);
888 free_queue_props:
889         kfree(hdev->asic_prop.hw_queues_props);
890         return rc;
891 }
892
893 static int gaudi_early_fini(struct hl_device *hdev)
894 {
895         kfree(hdev->asic_prop.hw_queues_props);
896         hl_pci_fini(hdev);
897
898         return 0;
899 }
900
901 /**
902  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
903  *
904  * @hdev: pointer to hl_device structure
905  *
906  */
907 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
908 {
909         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
910         struct asic_fixed_properties *prop = &hdev->asic_prop;
911         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
912         int rc;
913
914         if ((hdev->fw_components & FW_TYPE_LINUX) &&
915                         (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
916                 struct gaudi_device *gaudi = hdev->asic_specific;
917
918                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
919                         return 0;
920
921                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
922
923                 if (rc)
924                         return rc;
925
926                 freq = pll_freq_arr[2];
927         } else {
928                 /* Backward compatibility */
929                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
930                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
931                 nr = RREG32(mmPSOC_CPU_PLL_NR);
932                 nf = RREG32(mmPSOC_CPU_PLL_NF);
933                 od = RREG32(mmPSOC_CPU_PLL_OD);
934
935                 if (div_sel == DIV_SEL_REF_CLK ||
936                                 div_sel == DIV_SEL_DIVIDED_REF) {
937                         if (div_sel == DIV_SEL_REF_CLK)
938                                 freq = PLL_REF_CLK;
939                         else
940                                 freq = PLL_REF_CLK / (div_fctr + 1);
941                 } else if (div_sel == DIV_SEL_PLL_CLK ||
942                         div_sel == DIV_SEL_DIVIDED_PLL) {
943                         pll_clk = PLL_REF_CLK * (nf + 1) /
944                                         ((nr + 1) * (od + 1));
945                         if (div_sel == DIV_SEL_PLL_CLK)
946                                 freq = pll_clk;
947                         else
948                                 freq = pll_clk / (div_fctr + 1);
949                 } else {
950                         dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
951                         freq = 0;
952                 }
953         }
954
955         prop->psoc_timestamp_frequency = freq;
956         prop->psoc_pci_pll_nr = nr;
957         prop->psoc_pci_pll_nf = nf;
958         prop->psoc_pci_pll_od = od;
959         prop->psoc_pci_pll_div_factor = div_fctr;
960
961         return 0;
962 }
963
964 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
965                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
966 {
967         struct asic_fixed_properties *prop = &hdev->asic_prop;
968         struct packet_lin_dma *init_tpc_mem_pkt;
969         struct hl_cs_job *job;
970         struct hl_cb *cb;
971         u64 dst_addr;
972         u32 cb_size, ctl;
973         u8 tpc_id;
974         int rc;
975
976         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
977         if (!cb)
978                 return -EFAULT;
979
980         init_tpc_mem_pkt = cb->kernel_address;
981         cb_size = sizeof(*init_tpc_mem_pkt);
982         memset(init_tpc_mem_pkt, 0, cb_size);
983
984         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
985
986         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
987         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
988         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
989         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
990
991         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
992
993         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
994
995         /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
996         dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
997                                 round_up(prop->sram_user_base_address, SZ_8K));
998         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
999
1000         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1001         if (!job) {
1002                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1003                 rc = -ENOMEM;
1004                 goto release_cb;
1005         }
1006
1007         job->id = 0;
1008         job->user_cb = cb;
1009         atomic_inc(&job->user_cb->cs_cnt);
1010         job->user_cb_size = cb_size;
1011         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1012         job->patched_cb = job->user_cb;
1013         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1014
1015         hl_debugfs_add_job(hdev, job);
1016
1017         rc = gaudi_send_job_on_qman0(hdev, job);
1018
1019         if (rc)
1020                 goto free_job;
1021
1022         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1023                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1024                 if (rc)
1025                         break;
1026         }
1027
1028 free_job:
1029         hl_userptr_delete_list(hdev, &job->userptr_list);
1030         hl_debugfs_remove_job(hdev, job);
1031         kfree(job);
1032         atomic_dec(&cb->cs_cnt);
1033
1034 release_cb:
1035         hl_cb_put(cb);
1036         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1037
1038         return rc;
1039 }
1040
1041 /*
1042  * gaudi_init_tpc_mem() - Initialize TPC memories.
1043  * @hdev: Pointer to hl_device structure.
1044  *
1045  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1046  *
1047  * Return: 0 for success, negative value for error.
1048  */
1049 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1050 {
1051         const struct firmware *fw;
1052         size_t fw_size;
1053         void *cpu_addr;
1054         dma_addr_t dma_handle;
1055         int rc, count = 5;
1056
1057 again:
1058         rc = reject_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1059         if (rc == -EINTR && count-- > 0) {
1060                 msleep(50);
1061                 goto again;
1062         }
1063
1064         if (rc) {
1065                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1066                                 GAUDI_TPC_FW_FILE);
1067                 goto out;
1068         }
1069
1070         fw_size = fw->size;
1071         cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1072         if (!cpu_addr) {
1073                 dev_err(hdev->dev,
1074                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1075                         fw_size);
1076                 rc = -ENOMEM;
1077                 goto out;
1078         }
1079
1080         memcpy(cpu_addr, fw->data, fw_size);
1081
1082         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1083
1084         hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1085
1086 out:
1087         release_firmware(fw);
1088         return rc;
1089 }
1090
1091 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1092 {
1093         struct gaudi_device *gaudi = hdev->asic_specific;
1094         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1095         struct hl_hw_queue *q;
1096         u32 i, sob_id, sob_group_id, queue_id;
1097
1098         /* Iterate through SOB groups and assign a SOB for each slave queue */
1099         sob_group_id =
1100                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1101         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1102
1103         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1104         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1105                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1106                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1107         }
1108
1109         /* Both DMA5 and TPC7 use the same resources since only a single
1110          * engine need to participate in the reduction process
1111          */
1112         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1113         q = &hdev->kernel_queues[queue_id];
1114         q->sync_stream_prop.collective_sob_id =
1115                         sob_id + NIC_NUMBER_OF_ENGINES;
1116
1117         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1118         q = &hdev->kernel_queues[queue_id];
1119         q->sync_stream_prop.collective_sob_id =
1120                         sob_id + NIC_NUMBER_OF_ENGINES;
1121 }
1122
1123 static void gaudi_sob_group_hw_reset(struct kref *ref)
1124 {
1125         struct gaudi_hw_sob_group *hw_sob_group =
1126                 container_of(ref, struct gaudi_hw_sob_group, kref);
1127         struct hl_device *hdev = hw_sob_group->hdev;
1128         int i;
1129
1130         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1131                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1132                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1133
1134         kref_init(&hw_sob_group->kref);
1135 }
1136
1137 static void gaudi_sob_group_reset_error(struct kref *ref)
1138 {
1139         struct gaudi_hw_sob_group *hw_sob_group =
1140                 container_of(ref, struct gaudi_hw_sob_group, kref);
1141         struct hl_device *hdev = hw_sob_group->hdev;
1142
1143         dev_crit(hdev->dev,
1144                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1145                 hw_sob_group->base_sob_id);
1146 }
1147
1148 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1149 {
1150         struct gaudi_collective_properties *prop;
1151         int i;
1152
1153         prop = &gaudi->collective_props;
1154
1155         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1156
1157         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1158                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1159                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1160                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1161         /* Set collective engine bit */
1162         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1163                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1164 }
1165
1166 static int gaudi_collective_init(struct hl_device *hdev)
1167 {
1168         u32 i, sob_id, reserved_sobs_per_group;
1169         struct gaudi_collective_properties *prop;
1170         struct gaudi_device *gaudi;
1171
1172         gaudi = hdev->asic_specific;
1173         prop = &gaudi->collective_props;
1174         sob_id = hdev->asic_prop.collective_first_sob;
1175
1176         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1177         reserved_sobs_per_group =
1178                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1179
1180         /* Init SOB groups */
1181         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1182                 prop->hw_sob_group[i].hdev = hdev;
1183                 prop->hw_sob_group[i].base_sob_id = sob_id;
1184                 sob_id += reserved_sobs_per_group;
1185                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1186         }
1187
1188         for (i = 0 ; i < QMAN_STREAMS; i++) {
1189                 prop->next_sob_group_val[i] = 1;
1190                 prop->curr_sob_group_idx[i] = 0;
1191                 gaudi_collective_map_sobs(hdev, i);
1192         }
1193
1194         gaudi_collective_mstr_sob_mask_set(gaudi);
1195
1196         return 0;
1197 }
1198
1199 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1200 {
1201         struct gaudi_device *gaudi = hdev->asic_specific;
1202         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1203
1204         kref_put(&cprop->hw_sob_group[sob_group].kref,
1205                                         gaudi_sob_group_hw_reset);
1206 }
1207
1208 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1209                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1210 {
1211         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1212         struct gaudi_collective_properties *cprop;
1213         struct hl_gen_wait_properties wait_prop;
1214         struct hl_sync_stream_properties *prop;
1215         struct gaudi_device *gaudi;
1216
1217         gaudi = hdev->asic_specific;
1218         cprop = &gaudi->collective_props;
1219         queue_id = job->hw_queue_id;
1220         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1221
1222         master_sob_base =
1223                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1224         master_monitor = prop->collective_mstr_mon_id[0];
1225
1226         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1227
1228         dev_dbg(hdev->dev,
1229                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1230                 master_sob_base, cprop->mstr_sob_mask[0],
1231                 cprop->next_sob_group_val[stream],
1232                 master_monitor, queue_id);
1233
1234         wait_prop.data = (void *) job->patched_cb;
1235         wait_prop.sob_base = master_sob_base;
1236         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1237         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1238         wait_prop.mon_id = master_monitor;
1239         wait_prop.q_idx = queue_id;
1240         wait_prop.size = cb_size;
1241         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1242
1243         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1244         master_monitor = prop->collective_mstr_mon_id[1];
1245
1246         dev_dbg(hdev->dev,
1247                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1248                 master_sob_base, cprop->mstr_sob_mask[1],
1249                 cprop->next_sob_group_val[stream],
1250                 master_monitor, queue_id);
1251
1252         wait_prop.sob_base = master_sob_base;
1253         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1254         wait_prop.mon_id = master_monitor;
1255         wait_prop.size = cb_size;
1256         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257 }
1258
1259 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1260                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1261 {
1262         struct hl_gen_wait_properties wait_prop;
1263         struct hl_sync_stream_properties *prop;
1264         u32 queue_id, cb_size = 0;
1265
1266         queue_id = job->hw_queue_id;
1267         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1268
1269         if (job->cs->encaps_signals) {
1270                 /* use the encaps signal handle store earlier in the flow
1271                  * and set the SOB information from the encaps
1272                  * signals handle
1273                  */
1274                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1275                                                 cs_cmpl);
1276
1277                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1278                                 job->cs->sequence,
1279                                 cs_cmpl->hw_sob->sob_id,
1280                                 cs_cmpl->sob_val);
1281         }
1282
1283         /* Add to wait CBs using slave monitor */
1284         wait_prop.data = (void *) job->user_cb;
1285         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1286         wait_prop.sob_mask = 0x1;
1287         wait_prop.sob_val = cs_cmpl->sob_val;
1288         wait_prop.mon_id = prop->collective_slave_mon_id;
1289         wait_prop.q_idx = queue_id;
1290         wait_prop.size = cb_size;
1291
1292         dev_dbg(hdev->dev,
1293                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1294                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1295                 prop->collective_slave_mon_id, queue_id);
1296
1297         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1298
1299         dev_dbg(hdev->dev,
1300                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1301                 prop->collective_sob_id, queue_id);
1302
1303         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1304                         prop->collective_sob_id, cb_size, false);
1305 }
1306
1307 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1308 {
1309         struct hl_cs_compl *signal_cs_cmpl =
1310                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1311         struct hl_cs_compl *cs_cmpl =
1312                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1313         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1314         struct gaudi_collective_properties *cprop;
1315         u32 stream, queue_id, sob_group_offset;
1316         struct gaudi_device *gaudi;
1317         struct hl_device *hdev;
1318         struct hl_cs_job *job;
1319         struct hl_ctx *ctx;
1320
1321         ctx = cs->ctx;
1322         hdev = ctx->hdev;
1323         gaudi = hdev->asic_specific;
1324         cprop = &gaudi->collective_props;
1325
1326         if (cs->encaps_signals) {
1327                 cs_cmpl->hw_sob = handle->hw_sob;
1328                 /* at this checkpoint we only need the hw_sob pointer
1329                  * for the completion check before start going over the jobs
1330                  * of the master/slaves, the sob_value will be taken later on
1331                  * in gaudi_collective_slave_init_job depends on each
1332                  * job wait offset value.
1333                  */
1334                 cs_cmpl->sob_val = 0;
1335         } else {
1336                 /* copy the SOB id and value of the signal CS */
1337                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1338                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1339         }
1340
1341         /* check again if the signal cs already completed.
1342          * if yes then don't send any wait cs since the hw_sob
1343          * could be in reset already. if signal is not completed
1344          * then get refcount to hw_sob to prevent resetting the sob
1345          * while wait cs is not submitted.
1346          * note that this check is protected by two locks,
1347          * hw queue lock and completion object lock,
1348          * and the same completion object lock also protects
1349          * the hw_sob reset handler function.
1350          * The hw_queue lock prevent out of sync of hw_sob
1351          * refcount value, changed by signal/wait flows.
1352          */
1353         spin_lock(&signal_cs_cmpl->lock);
1354
1355         if (completion_done(&cs->signal_fence->completion)) {
1356                 spin_unlock(&signal_cs_cmpl->lock);
1357                 return -EINVAL;
1358         }
1359         /* Increment kref since all slave queues are now waiting on it */
1360         kref_get(&cs_cmpl->hw_sob->kref);
1361
1362         spin_unlock(&signal_cs_cmpl->lock);
1363
1364         /* Calculate the stream from collective master queue (1st job) */
1365         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1366         stream = job->hw_queue_id % 4;
1367         sob_group_offset =
1368                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1369
1370         list_for_each_entry(job, &cs->job_list, cs_node) {
1371                 queue_id = job->hw_queue_id;
1372
1373                 if (hdev->kernel_queues[queue_id].collective_mode ==
1374                                 HL_COLLECTIVE_MASTER)
1375                         gaudi_collective_master_init_job(hdev, job, stream,
1376                                                 sob_group_offset);
1377                 else
1378                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1379         }
1380
1381         cs_cmpl->sob_group = sob_group_offset;
1382
1383         /* Handle sob group kref and wraparound */
1384         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1385         cprop->next_sob_group_val[stream]++;
1386
1387         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1388                 /*
1389                  * Decrement as we reached the max value.
1390                  * The release function won't be called here as we've
1391                  * just incremented the refcount.
1392                  */
1393                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1394                                 gaudi_sob_group_reset_error);
1395                 cprop->next_sob_group_val[stream] = 1;
1396                 /* only two SOBs are currently in use */
1397                 cprop->curr_sob_group_idx[stream] =
1398                         (cprop->curr_sob_group_idx[stream] + 1) &
1399                                                         (HL_RSVD_SOBS - 1);
1400
1401                 gaudi_collective_map_sobs(hdev, stream);
1402
1403                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1404                                 cprop->curr_sob_group_idx[stream], stream);
1405         }
1406
1407         mb();
1408         hl_fence_put(cs->signal_fence);
1409         cs->signal_fence = NULL;
1410
1411         return 0;
1412 }
1413
1414 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1415 {
1416         u32 cacheline_end, additional_commands;
1417
1418         cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1419         additional_commands = sizeof(struct packet_msg_prot) * 2;
1420
1421         if (user_cb_size + additional_commands > cacheline_end)
1422                 return cacheline_end - user_cb_size + additional_commands;
1423         else
1424                 return additional_commands;
1425 }
1426
1427 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1428                 struct hl_ctx *ctx, struct hl_cs *cs,
1429                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1430                 u32 encaps_signal_offset)
1431 {
1432         struct hw_queue_properties *hw_queue_prop;
1433         struct hl_cs_counters_atomic *cntr;
1434         struct hl_cs_job *job;
1435         struct hl_cb *cb;
1436         u32 cb_size;
1437         bool patched_cb;
1438
1439         cntr = &hdev->aggregated_cs_counters;
1440
1441         if (mode == HL_COLLECTIVE_MASTER) {
1442                 /* CB size of collective master queue contains
1443                  * 4 msg short packets for monitor 1 configuration
1444                  * 1 fence packet
1445                  * 4 msg short packets for monitor 2 configuration
1446                  * 1 fence packet
1447                  * 2 msg prot packets for completion and MSI
1448                  */
1449                 cb_size = sizeof(struct packet_msg_short) * 8 +
1450                                 sizeof(struct packet_fence) * 2 +
1451                                 sizeof(struct packet_msg_prot) * 2;
1452                 patched_cb = true;
1453         } else {
1454                 /* CB size of collective slave queues contains
1455                  * 4 msg short packets for monitor configuration
1456                  * 1 fence packet
1457                  * 1 additional msg short packet for sob signal
1458                  */
1459                 cb_size = sizeof(struct packet_msg_short) * 5 +
1460                                 sizeof(struct packet_fence);
1461                 patched_cb = false;
1462         }
1463
1464         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1465         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1466         if (!job) {
1467                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1468                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1469                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1470                 return -ENOMEM;
1471         }
1472
1473         /* Allocate internal mapped CB for non patched CBs */
1474         cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1475         if (!cb) {
1476                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1477                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1478                 kfree(job);
1479                 return -EFAULT;
1480         }
1481
1482         job->id = 0;
1483         job->cs = cs;
1484         job->user_cb = cb;
1485         atomic_inc(&job->user_cb->cs_cnt);
1486         job->user_cb_size = cb_size;
1487         job->hw_queue_id = queue_id;
1488
1489         /* since its guaranteed to have only one chunk in the collective wait
1490          * cs, we can use this chunk to set the encapsulated signal offset
1491          * in the jobs.
1492          */
1493         if (cs->encaps_signals)
1494                 job->encaps_sig_wait_offset = encaps_signal_offset;
1495
1496         /*
1497          * No need in parsing, user CB is the patched CB.
1498          * We call hl_cb_destroy() out of two reasons - we don't need
1499          * the CB in the CB idr anymore and to decrement its refcount as
1500          * it was incremented inside hl_cb_kernel_create().
1501          */
1502         if (patched_cb)
1503                 job->patched_cb = job->user_cb;
1504         else
1505                 job->patched_cb = NULL;
1506
1507         job->job_cb_size = job->user_cb_size;
1508         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1509
1510         /* increment refcount as for external queues we get completion */
1511         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1512                 cs_get(cs);
1513
1514         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1515
1516         list_add_tail(&job->cs_node, &cs->job_list);
1517
1518         hl_debugfs_add_job(hdev, job);
1519
1520         return 0;
1521 }
1522
1523 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1524                 struct hl_ctx *ctx, struct hl_cs *cs,
1525                 u32 wait_queue_id, u32 collective_engine_id,
1526                 u32 encaps_signal_offset)
1527 {
1528         struct gaudi_device *gaudi = hdev->asic_specific;
1529         struct hw_queue_properties *hw_queue_prop;
1530         u32 queue_id, collective_queue, num_jobs;
1531         u32 stream, nic_queue, nic_idx = 0;
1532         bool skip;
1533         int i, rc = 0;
1534
1535         /* Verify wait queue id is configured as master */
1536         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1537         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1538                 dev_err(hdev->dev,
1539                         "Queue %d is not configured as collective master\n",
1540                         wait_queue_id);
1541                 return -EINVAL;
1542         }
1543
1544         /* Verify engine id is supported */
1545         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1546                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1547                 dev_err(hdev->dev,
1548                         "Collective wait does not support engine %u\n",
1549                         collective_engine_id);
1550                 return -EINVAL;
1551         }
1552
1553         stream = wait_queue_id % 4;
1554
1555         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1556                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1557         else
1558                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1559
1560         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1561         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1562
1563         /* First job goes to the collective master queue, it will wait for
1564          * the collective slave queues to finish execution.
1565          * The synchronization is done using two monitors:
1566          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1567          * reduction engine (DMA5/TPC7).
1568          *
1569          * Rest of the jobs goes to the collective slave queues which will
1570          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1571          */
1572         for (i = 0 ; i < num_jobs ; i++) {
1573                 if (i == 0) {
1574                         queue_id = wait_queue_id;
1575                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1576                                 HL_COLLECTIVE_MASTER, queue_id,
1577                                 wait_queue_id, encaps_signal_offset);
1578                 } else {
1579                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1580                                 if (gaudi->hw_cap_initialized &
1581                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1582                                         skip = false;
1583                                 else
1584                                         skip = true;
1585
1586                                 queue_id = nic_queue;
1587                                 nic_queue += 4;
1588                                 nic_idx++;
1589
1590                                 if (skip)
1591                                         continue;
1592                         } else {
1593                                 queue_id = collective_queue;
1594                         }
1595
1596                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1597                                 HL_COLLECTIVE_SLAVE, queue_id,
1598                                 wait_queue_id, encaps_signal_offset);
1599                 }
1600
1601                 if (rc)
1602                         return rc;
1603         }
1604
1605         return rc;
1606 }
1607
1608 static int gaudi_late_init(struct hl_device *hdev)
1609 {
1610         struct gaudi_device *gaudi = hdev->asic_specific;
1611         int rc;
1612
1613         rc = gaudi->cpucp_info_get(hdev);
1614         if (rc) {
1615                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1616                 return rc;
1617         }
1618
1619         if ((hdev->card_type == cpucp_card_type_pci) &&
1620                         (hdev->nic_ports_mask & 0x3)) {
1621                 dev_info(hdev->dev,
1622                         "PCI card detected, only 8 ports are enabled\n");
1623                 hdev->nic_ports_mask &= ~0x3;
1624
1625                 /* Stop and disable unused NIC QMANs */
1626                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1627                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1628                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1629
1630                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1631                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1632                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1633
1634                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1635                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1636
1637                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1638         }
1639
1640         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1641         if (rc) {
1642                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1643                 return rc;
1644         }
1645
1646         /* Scrub both SRAM and DRAM */
1647         rc = hdev->asic_funcs->scrub_device_mem(hdev);
1648         if (rc)
1649                 goto disable_pci_access;
1650
1651         rc = gaudi_fetch_psoc_frequency(hdev);
1652         if (rc) {
1653                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1654                 goto disable_pci_access;
1655         }
1656
1657         rc = gaudi_mmu_clear_pgt_range(hdev);
1658         if (rc) {
1659                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1660                 goto disable_pci_access;
1661         }
1662
1663         rc = gaudi_init_tpc_mem(hdev);
1664         if (rc) {
1665                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1666                 goto disable_pci_access;
1667         }
1668
1669         rc = gaudi_collective_init(hdev);
1670         if (rc) {
1671                 dev_err(hdev->dev, "Failed to init collective\n");
1672                 goto disable_pci_access;
1673         }
1674
1675         /* We only support a single ASID for the user, so for the sake of optimization, just
1676          * initialize the ASID one time during device initialization with the fixed value of 1
1677          */
1678         gaudi_mmu_prepare(hdev, 1);
1679
1680         hl_fw_set_pll_profile(hdev);
1681
1682         return 0;
1683
1684 disable_pci_access:
1685         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1686
1687         return rc;
1688 }
1689
1690 static void gaudi_late_fini(struct hl_device *hdev)
1691 {
1692         hl_hwmon_release_resources(hdev);
1693 }
1694
1695 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1696 {
1697         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1698         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1699         int i, j, rc = 0;
1700
1701         /*
1702          * The device CPU works with 40-bits addresses, while bit 39 must be set
1703          * to '1' when accessing the host.
1704          * Bits 49:39 of the full host address are saved for a later
1705          * configuration of the HW to perform extension to 50 bits.
1706          * Because there is a single HW register that holds the extension bits,
1707          * these bits must be identical in all allocated range.
1708          */
1709
1710         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1711                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1712                                                                 &dma_addr_arr[i],
1713                                                                 GFP_KERNEL | __GFP_ZERO);
1714                 if (!virt_addr_arr[i]) {
1715                         rc = -ENOMEM;
1716                         goto free_dma_mem_arr;
1717                 }
1718
1719                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1720                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1721                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1722                         break;
1723         }
1724
1725         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1726                 dev_err(hdev->dev,
1727                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1728                 rc = -EFAULT;
1729                 goto free_dma_mem_arr;
1730         }
1731
1732         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1733         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1734         hdev->cpu_pci_msb_addr =
1735                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1736
1737         if (!hdev->asic_prop.fw_security_enabled)
1738                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1739
1740 free_dma_mem_arr:
1741         for (j = 0 ; j < i ; j++)
1742                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1743                                                 dma_addr_arr[j]);
1744
1745         return rc;
1746 }
1747
1748 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1749 {
1750         struct gaudi_device *gaudi = hdev->asic_specific;
1751         struct gaudi_internal_qman_info *q;
1752         u32 i;
1753
1754         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1755                 q = &gaudi->internal_qmans[i];
1756                 if (!q->pq_kernel_addr)
1757                         continue;
1758                 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1759         }
1760 }
1761
1762 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1763 {
1764         struct gaudi_device *gaudi = hdev->asic_specific;
1765         struct gaudi_internal_qman_info *q;
1766         int rc, i;
1767
1768         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1769                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1770                         continue;
1771
1772                 q = &gaudi->internal_qmans[i];
1773
1774                 switch (i) {
1775                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1776                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1777                         break;
1778                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1779                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1780                         break;
1781                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1782                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1783                         break;
1784                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1785                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1786                         break;
1787                 default:
1788                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1789                         rc = -EINVAL;
1790                         goto free_internal_qmans_pq_mem;
1791                 }
1792
1793                 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1794                                                                 GFP_KERNEL | __GFP_ZERO);
1795                 if (!q->pq_kernel_addr) {
1796                         rc = -ENOMEM;
1797                         goto free_internal_qmans_pq_mem;
1798                 }
1799         }
1800
1801         return 0;
1802
1803 free_internal_qmans_pq_mem:
1804         gaudi_free_internal_qmans_pq_mem(hdev);
1805         return rc;
1806 }
1807
1808 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1809 {
1810         struct asic_fixed_properties *prop = &hdev->asic_prop;
1811         struct pci_mem_region *region;
1812
1813         /* CFG */
1814         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1815         region->region_base = CFG_BASE;
1816         region->region_size = CFG_SIZE;
1817         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1818         region->bar_size = CFG_BAR_SIZE;
1819         region->bar_id = CFG_BAR_ID;
1820         region->used = 1;
1821
1822         /* SRAM */
1823         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1824         region->region_base = SRAM_BASE_ADDR;
1825         region->region_size = SRAM_SIZE;
1826         region->offset_in_bar = 0;
1827         region->bar_size = SRAM_BAR_SIZE;
1828         region->bar_id = SRAM_BAR_ID;
1829         region->used = 1;
1830
1831         /* DRAM */
1832         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1833         region->region_base = DRAM_PHYS_BASE;
1834         region->region_size = hdev->asic_prop.dram_size;
1835         region->offset_in_bar = 0;
1836         region->bar_size = prop->dram_pci_bar_size;
1837         region->bar_id = HBM_BAR_ID;
1838         region->used = 1;
1839
1840         /* SP SRAM */
1841         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1842         region->region_base = PSOC_SCRATCHPAD_ADDR;
1843         region->region_size = PSOC_SCRATCHPAD_SIZE;
1844         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1845         region->bar_size = CFG_BAR_SIZE;
1846         region->bar_id = CFG_BAR_ID;
1847         region->used = 1;
1848 }
1849
1850 static int gaudi_sw_init(struct hl_device *hdev)
1851 {
1852         struct gaudi_device *gaudi;
1853         u32 i, event_id = 0;
1854         int rc;
1855
1856         /* Allocate device structure */
1857         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1858         if (!gaudi)
1859                 return -ENOMEM;
1860
1861         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1862                 if (gaudi_irq_map_table[i].valid) {
1863                         if (event_id == GAUDI_EVENT_SIZE) {
1864                                 dev_err(hdev->dev,
1865                                         "Event array exceeds the limit of %u events\n",
1866                                         GAUDI_EVENT_SIZE);
1867                                 rc = -EINVAL;
1868                                 goto free_gaudi_device;
1869                         }
1870
1871                         gaudi->events[event_id++] =
1872                                         gaudi_irq_map_table[i].fc_id;
1873                 }
1874         }
1875
1876         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1877
1878         hdev->asic_specific = gaudi;
1879
1880         /* Create DMA pool for small allocations */
1881         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1882                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1883         if (!hdev->dma_pool) {
1884                 dev_err(hdev->dev, "failed to create DMA pool\n");
1885                 rc = -ENOMEM;
1886                 goto free_gaudi_device;
1887         }
1888
1889         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1890         if (rc)
1891                 goto free_dma_pool;
1892
1893         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1894         if (!hdev->cpu_accessible_dma_pool) {
1895                 dev_err(hdev->dev,
1896                         "Failed to create CPU accessible DMA pool\n");
1897                 rc = -ENOMEM;
1898                 goto free_cpu_dma_mem;
1899         }
1900
1901         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1902                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1903                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1904         if (rc) {
1905                 dev_err(hdev->dev,
1906                         "Failed to add memory to CPU accessible DMA pool\n");
1907                 rc = -EFAULT;
1908                 goto free_cpu_accessible_dma_pool;
1909         }
1910
1911         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1912         if (rc)
1913                 goto free_cpu_accessible_dma_pool;
1914
1915         spin_lock_init(&gaudi->hw_queues_lock);
1916
1917         hdev->supports_sync_stream = true;
1918         hdev->supports_coresight = true;
1919         hdev->supports_staged_submission = true;
1920         hdev->supports_wait_for_multi_cs = true;
1921
1922         hdev->asic_funcs->set_pci_memory_regions(hdev);
1923         hdev->stream_master_qid_arr =
1924                                 hdev->asic_funcs->get_stream_master_qid_arr();
1925         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1926
1927         return 0;
1928
1929 free_cpu_accessible_dma_pool:
1930         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1931 free_cpu_dma_mem:
1932         if (!hdev->asic_prop.fw_security_enabled)
1933                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1934                                         hdev->cpu_pci_msb_addr);
1935         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1936                                         hdev->cpu_accessible_dma_address);
1937 free_dma_pool:
1938         dma_pool_destroy(hdev->dma_pool);
1939 free_gaudi_device:
1940         kfree(gaudi);
1941         return rc;
1942 }
1943
1944 static int gaudi_sw_fini(struct hl_device *hdev)
1945 {
1946         struct gaudi_device *gaudi = hdev->asic_specific;
1947
1948         gaudi_free_internal_qmans_pq_mem(hdev);
1949
1950         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1951
1952         if (!hdev->asic_prop.fw_security_enabled)
1953                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1954                                         hdev->cpu_pci_msb_addr);
1955
1956         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1957                                         hdev->cpu_accessible_dma_address);
1958
1959         dma_pool_destroy(hdev->dma_pool);
1960
1961         kfree(gaudi);
1962
1963         return 0;
1964 }
1965
1966 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1967 {
1968         struct hl_device *hdev = arg;
1969         int i;
1970
1971         if (hdev->disabled)
1972                 return IRQ_HANDLED;
1973
1974         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1975                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1976
1977         hl_irq_handler_eq(irq, &hdev->event_queue);
1978
1979         return IRQ_HANDLED;
1980 }
1981
1982 /*
1983  * For backward compatibility, new MSI interrupts should be set after the
1984  * existing CPU and NIC interrupts.
1985  */
1986 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1987                                 bool cpu_eq)
1988 {
1989         int msi_vec;
1990
1991         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1992                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1993                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1994
1995         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1996                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1997
1998         return pci_irq_vector(hdev->pdev, msi_vec);
1999 }
2000
2001 static int gaudi_enable_msi_single(struct hl_device *hdev)
2002 {
2003         int rc, irq;
2004
2005         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2006
2007         irq = gaudi_pci_irq_vector(hdev, 0, false);
2008         rc = request_irq(irq, gaudi_irq_handler_single, 0,
2009                         "gaudi single msi", hdev);
2010         if (rc)
2011                 dev_err(hdev->dev,
2012                         "Failed to request single MSI IRQ\n");
2013
2014         return rc;
2015 }
2016
2017 static int gaudi_enable_msi(struct hl_device *hdev)
2018 {
2019         struct gaudi_device *gaudi = hdev->asic_specific;
2020         int rc;
2021
2022         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2023                 return 0;
2024
2025         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2026         if (rc < 0) {
2027                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2028                 return rc;
2029         }
2030
2031         rc = gaudi_enable_msi_single(hdev);
2032         if (rc)
2033                 goto free_pci_irq_vectors;
2034
2035         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2036
2037         return 0;
2038
2039 free_pci_irq_vectors:
2040         pci_free_irq_vectors(hdev->pdev);
2041         return rc;
2042 }
2043
2044 static void gaudi_sync_irqs(struct hl_device *hdev)
2045 {
2046         struct gaudi_device *gaudi = hdev->asic_specific;
2047
2048         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2049                 return;
2050
2051         /* Wait for all pending IRQs to be finished */
2052         synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2053 }
2054
2055 static void gaudi_disable_msi(struct hl_device *hdev)
2056 {
2057         struct gaudi_device *gaudi = hdev->asic_specific;
2058
2059         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060                 return;
2061
2062         gaudi_sync_irqs(hdev);
2063         free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2064         pci_free_irq_vectors(hdev->pdev);
2065
2066         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2067 }
2068
2069 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2070 {
2071         struct gaudi_device *gaudi = hdev->asic_specific;
2072
2073         if (hdev->asic_prop.fw_security_enabled)
2074                 return;
2075
2076         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2077                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2078                 return;
2079
2080         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2081                 return;
2082
2083         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2084                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2086                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2088                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2090                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2092                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2094                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2096                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2098                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2099
2100         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2101                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2103                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2105                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2107                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2109                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2111                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2113                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2115                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2116
2117         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2118                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2120                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2122                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2124                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2126                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2128                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2130                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2132                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2133
2134         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2135 }
2136
2137 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2138 {
2139         struct gaudi_device *gaudi = hdev->asic_specific;
2140
2141         if (hdev->asic_prop.fw_security_enabled)
2142                 return;
2143
2144         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2145                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2146                 return;
2147
2148         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2149                 return;
2150
2151         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2154                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2156                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2158                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2160                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2162                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2164                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2166                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2167
2168         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2169                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2171                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2173                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2175                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2177                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2179                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2181                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2183                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2184
2185         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2186                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2188                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2190                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2192                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2194                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2196                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2198                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2200                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2201
2202         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2203 }
2204
2205 static void gaudi_init_e2e(struct hl_device *hdev)
2206 {
2207         if (hdev->asic_prop.fw_security_enabled)
2208                 return;
2209
2210         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2211                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2212                 return;
2213
2214         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2215         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2216         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2217         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2218
2219         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2220         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2221         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2222         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2223
2224         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2225         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2226         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2227         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2228
2229         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2230         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2231         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2232         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2233
2234         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2235         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2236         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2237         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2238
2239         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2240         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2241         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2242         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2243
2244         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2245         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2246         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2247         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2248
2249         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2250         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2251         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2252         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2253
2254         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2255         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2256         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2257         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2258
2259         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263
2264         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268
2269         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273
2274         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278
2279         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283
2284         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288
2289         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2290         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2291         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2292         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2293
2294         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2295         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2296         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2297         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2298
2299         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2300         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2301         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2302         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2303
2304         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2305         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2306         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2307         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2308
2309         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2310         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2311         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2312         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2313
2314         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2315         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2316         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2317         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2318
2319         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2320         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2321         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2322         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2323
2324         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2325         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2326         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2327         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2328
2329         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2330         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2331         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2332         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2333
2334         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2335                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2336         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2337                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2338
2339         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2340                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2341         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2342                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2343
2344         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2345                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2346         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2347                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2348
2349         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2350                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2351         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2352                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2353
2354         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2355                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2356         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2357                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2358
2359         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2360                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2361         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2362                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2363
2364         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2365                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2366         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2367                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2368
2369         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2370                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2371         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2372                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2373
2374         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2375                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2376         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2377                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2378
2379         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2380                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2381         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2382                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2383
2384         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2385                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2386         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2387                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2388
2389         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2390                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2391         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2392                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2393
2394         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2395                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2396         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2397                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2398
2399         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2400                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2401         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2402                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2403
2404         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2405                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2406         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2407                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2408
2409         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2410                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2411         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2412                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2413
2414         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2415                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2416         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2417                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2418
2419         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2420                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2421         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2422                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2423
2424         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2425                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2426         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2427                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2428
2429         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2430                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2431         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2432                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2433
2434         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2435                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2436         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2437                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2438
2439         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2440                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2441         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2442                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2443
2444         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2445                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2446         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2447                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2448
2449         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2450                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2451         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2452                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2453 }
2454
2455 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2456 {
2457         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2458
2459         if (hdev->asic_prop.fw_security_enabled)
2460                 return;
2461
2462         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2463                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2464                 return;
2465
2466         hbm0_wr = 0x33333333;
2467         hbm0_rd = 0x77777777;
2468         hbm1_wr = 0x55555555;
2469         hbm1_rd = 0xDDDDDDDD;
2470
2471         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2472         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2473         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2474         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2475
2476         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2477         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2478         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2479         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2480
2481         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2482         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2483         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2484         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2485
2486         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2487         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2488         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2489         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2490
2491         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2492                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2493                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2494         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2495                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2496                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2497         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2498                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2499                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2500         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2501                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2503
2504         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2505                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2506                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2507         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2508                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2509                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2510         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2511                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2512                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2513         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2514                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2515                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2516 }
2517
2518 static void gaudi_init_golden_registers(struct hl_device *hdev)
2519 {
2520         u32 tpc_offset;
2521         int tpc_id, i;
2522
2523         gaudi_init_e2e(hdev);
2524         gaudi_init_hbm_cred(hdev);
2525
2526         for (tpc_id = 0, tpc_offset = 0;
2527                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2528                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2529                 /* Mask all arithmetic interrupts from TPC */
2530                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2531                 /* Set 16 cache lines */
2532                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2533                                 ICACHE_FETCH_LINE_NUM, 2);
2534         }
2535
2536         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2537         for (i = 0 ; i < 128 ; i += 8)
2538                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2539
2540         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544 }
2545
2546 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2547                                         int qman_id, dma_addr_t qman_pq_addr)
2548 {
2549         struct cpu_dyn_regs *dyn_regs =
2550                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2551         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2552         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2553         u32 q_off, dma_qm_offset;
2554         u32 dma_qm_err_cfg, irq_handler_offset;
2555
2556         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2557
2558         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2559                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2561                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2562         so_base_en_lo = lower_32_bits(CFG_BASE +
2563                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564         so_base_en_hi = upper_32_bits(CFG_BASE +
2565                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2566         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2567                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2569                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2570         so_base_ws_lo = lower_32_bits(CFG_BASE +
2571                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572         so_base_ws_hi = upper_32_bits(CFG_BASE +
2573                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2574
2575         q_off = dma_qm_offset + qman_id * 4;
2576
2577         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2578         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2579
2580         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2581         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2582         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2583
2584         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2585         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2586                                                         QMAN_LDMA_SRC_OFFSET);
2587         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2588                                                         QMAN_LDMA_DST_OFFSET);
2589
2590         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2591         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2592         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2593         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2594         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2595         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2596         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2597         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2598
2599         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2600
2601         /* The following configuration is needed only once per QMAN */
2602         if (qman_id == 0) {
2603                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2604                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2605                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2606
2607                 /* Configure RAZWI IRQ */
2608                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2609                 if (hdev->stop_on_err)
2610                         dma_qm_err_cfg |=
2611                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2612
2613                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2614
2615                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2616                         lower_32_bits(CFG_BASE + irq_handler_offset));
2617                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2618                         upper_32_bits(CFG_BASE + irq_handler_offset));
2619
2620                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2621                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2622                                                                         dma_id);
2623
2624                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2625                                 QM_ARB_ERR_MSG_EN_MASK);
2626
2627                 /* Set timeout to maximum */
2628                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2629
2630                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2631                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2632
2633                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2634         }
2635 }
2636
2637 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2638 {
2639         struct cpu_dyn_regs *dyn_regs =
2640                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2641         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2642         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2643         u32 irq_handler_offset;
2644
2645         /* Set to maximum possible according to physical size */
2646         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2647         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2648
2649         /* WA for H/W bug H3-2116 */
2650         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2651
2652         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2653         if (hdev->stop_on_err)
2654                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2655
2656         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2657
2658         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2659                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2660                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2661
2662         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2663                 lower_32_bits(CFG_BASE + irq_handler_offset));
2664         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2665                 upper_32_bits(CFG_BASE + irq_handler_offset));
2666
2667         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2668                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2669         WREG32(mmDMA0_CORE_PROT + dma_offset,
2670                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2671         /* If the channel is secured, it should be in MMU bypass mode */
2672         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2673                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2674         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2675 }
2676
2677 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2678                                 u32 enable_mask)
2679 {
2680         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2681
2682         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2683 }
2684
2685 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2686 {
2687         struct gaudi_device *gaudi = hdev->asic_specific;
2688         struct hl_hw_queue *q;
2689         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2690
2691         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2692                 return;
2693
2694         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2695                 dma_id = gaudi_dma_assignment[i];
2696                 /*
2697                  * For queues after the CPU Q need to add 1 to get the correct
2698                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2699                  * order to get the correct MSI register.
2700                  */
2701                 if (dma_id > 1) {
2702                         cpu_skip = 1;
2703                         nic_skip = NIC_NUMBER_OF_ENGINES;
2704                 } else {
2705                         cpu_skip = 0;
2706                         nic_skip = 0;
2707                 }
2708
2709                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2710                         q_idx = 4 * dma_id + j + cpu_skip;
2711                         q = &hdev->kernel_queues[q_idx];
2712                         q->cq_id = cq_id++;
2713                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2714                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2715                                                 q->bus_address);
2716                 }
2717
2718                 gaudi_init_dma_core(hdev, dma_id);
2719
2720                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2721         }
2722
2723         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2724 }
2725
2726 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2727                                         int qman_id, u64 qman_base_addr)
2728 {
2729         struct cpu_dyn_regs *dyn_regs =
2730                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2731         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2732         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2733         u32 dma_qm_err_cfg, irq_handler_offset;
2734         u32 q_off, dma_qm_offset;
2735
2736         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2737
2738         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2739                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2741                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2742         so_base_en_lo = lower_32_bits(CFG_BASE +
2743                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744         so_base_en_hi = upper_32_bits(CFG_BASE +
2745                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2746         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2747                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2749                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2750         so_base_ws_lo = lower_32_bits(CFG_BASE +
2751                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752         so_base_ws_hi = upper_32_bits(CFG_BASE +
2753                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2754
2755         q_off = dma_qm_offset + qman_id * 4;
2756
2757         if (qman_id < 4) {
2758                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2759                                         lower_32_bits(qman_base_addr));
2760                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2761                                         upper_32_bits(qman_base_addr));
2762
2763                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2764                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2765                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2766
2767                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2768                                                         QMAN_CPDMA_SIZE_OFFSET);
2769                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2770                                                         QMAN_CPDMA_SRC_OFFSET);
2771                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2772                                                         QMAN_CPDMA_DST_OFFSET);
2773         } else {
2774                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2775                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2776                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2777
2778                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2779                                                         QMAN_LDMA_SIZE_OFFSET);
2780                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2781                                                         QMAN_LDMA_SRC_OFFSET);
2782                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2783                                                         QMAN_LDMA_DST_OFFSET);
2784
2785                 /* Configure RAZWI IRQ */
2786                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2787                 if (hdev->stop_on_err)
2788                         dma_qm_err_cfg |=
2789                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2790
2791                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2792
2793                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2794                         lower_32_bits(CFG_BASE + irq_handler_offset));
2795                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2796                         upper_32_bits(CFG_BASE + irq_handler_offset));
2797
2798                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2799                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2800                                                                         dma_id);
2801
2802                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2803                                 QM_ARB_ERR_MSG_EN_MASK);
2804
2805                 /* Set timeout to maximum */
2806                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2807
2808                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2809                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2810                                 QMAN_INTERNAL_MAKE_TRUSTED);
2811         }
2812
2813         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2814         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2815         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2816         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2817
2818         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2819         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2820                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2821                                 mtr_base_ws_lo);
2822                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2823                                 mtr_base_ws_hi);
2824                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2825                                 so_base_ws_lo);
2826                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2827                                 so_base_ws_hi);
2828         }
2829 }
2830
2831 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2832 {
2833         struct gaudi_device *gaudi = hdev->asic_specific;
2834         struct gaudi_internal_qman_info *q;
2835         u64 qman_base_addr;
2836         int i, j, dma_id, internal_q_index;
2837
2838         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2839                 return;
2840
2841         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2842                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2843
2844                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2845                          /*
2846                           * Add the CPU queue in order to get the correct queue
2847                           * number as all internal queue are placed after it
2848                           */
2849                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2850
2851                         q = &gaudi->internal_qmans[internal_q_index];
2852                         qman_base_addr = (u64) q->pq_dma_addr;
2853                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2854                                                 qman_base_addr);
2855                 }
2856
2857                 /* Initializing lower CP for HBM DMA QMAN */
2858                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2859
2860                 gaudi_init_dma_core(hdev, dma_id);
2861
2862                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2863         }
2864
2865         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2866 }
2867
2868 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2869                                         int qman_id, u64 qman_base_addr)
2870 {
2871         struct cpu_dyn_regs *dyn_regs =
2872                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2873         u32 mtr_base_lo, mtr_base_hi;
2874         u32 so_base_lo, so_base_hi;
2875         u32 irq_handler_offset;
2876         u32 q_off, mme_id;
2877         u32 mme_qm_err_cfg;
2878
2879         mtr_base_lo = lower_32_bits(CFG_BASE +
2880                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881         mtr_base_hi = upper_32_bits(CFG_BASE +
2882                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883         so_base_lo = lower_32_bits(CFG_BASE +
2884                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885         so_base_hi = upper_32_bits(CFG_BASE +
2886                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2887
2888         q_off = mme_offset + qman_id * 4;
2889
2890         if (qman_id < 4) {
2891                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2892                                         lower_32_bits(qman_base_addr));
2893                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2894                                         upper_32_bits(qman_base_addr));
2895
2896                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2897                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2898                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2899
2900                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2901                                                         QMAN_CPDMA_SIZE_OFFSET);
2902                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2903                                                         QMAN_CPDMA_SRC_OFFSET);
2904                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2905                                                         QMAN_CPDMA_DST_OFFSET);
2906         } else {
2907                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2908                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2909                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2910
2911                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2912                                                         QMAN_LDMA_SIZE_OFFSET);
2913                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2914                                                         QMAN_LDMA_SRC_OFFSET);
2915                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2916                                                         QMAN_LDMA_DST_OFFSET);
2917
2918                 /* Configure RAZWI IRQ */
2919                 mme_id = mme_offset /
2920                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2921
2922                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2923                 if (hdev->stop_on_err)
2924                         mme_qm_err_cfg |=
2925                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2926
2927                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2928
2929                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2930                         lower_32_bits(CFG_BASE + irq_handler_offset));
2931                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2932                         upper_32_bits(CFG_BASE + irq_handler_offset));
2933
2934                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2935                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2936                                                                         mme_id);
2937
2938                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2939                                 QM_ARB_ERR_MSG_EN_MASK);
2940
2941                 /* Set timeout to maximum */
2942                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2943
2944                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2945                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2946                                 QMAN_INTERNAL_MAKE_TRUSTED);
2947         }
2948
2949         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2950         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2951         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2952         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2953 }
2954
2955 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2956 {
2957         struct gaudi_device *gaudi = hdev->asic_specific;
2958         struct gaudi_internal_qman_info *q;
2959         u64 qman_base_addr;
2960         u32 mme_offset;
2961         int i, internal_q_index;
2962
2963         if (gaudi->hw_cap_initialized & HW_CAP_MME)
2964                 return;
2965
2966         /*
2967          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2968          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2969          */
2970
2971         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2972
2973         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2974                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2975                 q = &gaudi->internal_qmans[internal_q_index];
2976                 qman_base_addr = (u64) q->pq_dma_addr;
2977                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2978                                         qman_base_addr);
2979                 if (i == 3)
2980                         mme_offset = 0;
2981         }
2982
2983         /* Initializing lower CP for MME QMANs */
2984         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2985         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2986         gaudi_init_mme_qman(hdev, 0, 4, 0);
2987
2988         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2990
2991         gaudi->hw_cap_initialized |= HW_CAP_MME;
2992 }
2993
2994 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2995                                 int qman_id, u64 qman_base_addr)
2996 {
2997         struct cpu_dyn_regs *dyn_regs =
2998                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2999         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3000         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3001         u32 tpc_qm_err_cfg, irq_handler_offset;
3002         u32 q_off, tpc_id;
3003
3004         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3005                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3007                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3008         so_base_en_lo = lower_32_bits(CFG_BASE +
3009                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010         so_base_en_hi = upper_32_bits(CFG_BASE +
3011                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3012         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3013                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3015                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3016         so_base_ws_lo = lower_32_bits(CFG_BASE +
3017                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018         so_base_ws_hi = upper_32_bits(CFG_BASE +
3019                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3020
3021         q_off = tpc_offset + qman_id * 4;
3022
3023         tpc_id = tpc_offset /
3024                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3025
3026         if (qman_id < 4) {
3027                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3028                                         lower_32_bits(qman_base_addr));
3029                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3030                                         upper_32_bits(qman_base_addr));
3031
3032                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3033                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3034                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3035
3036                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3037                                                         QMAN_CPDMA_SIZE_OFFSET);
3038                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3039                                                         QMAN_CPDMA_SRC_OFFSET);
3040                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3041                                                         QMAN_CPDMA_DST_OFFSET);
3042         } else {
3043                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3044                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3045                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3046
3047                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3048                                                         QMAN_LDMA_SIZE_OFFSET);
3049                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3050                                                         QMAN_LDMA_SRC_OFFSET);
3051                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3052                                                         QMAN_LDMA_DST_OFFSET);
3053
3054                 /* Configure RAZWI IRQ */
3055                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3056                 if (hdev->stop_on_err)
3057                         tpc_qm_err_cfg |=
3058                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3059
3060                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3061
3062                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3063                         lower_32_bits(CFG_BASE + irq_handler_offset));
3064                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3065                         upper_32_bits(CFG_BASE + irq_handler_offset));
3066
3067                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3068                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3069                                                                         tpc_id);
3070
3071                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3072                                 QM_ARB_ERR_MSG_EN_MASK);
3073
3074                 /* Set timeout to maximum */
3075                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3076
3077                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3078                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3079                                 QMAN_INTERNAL_MAKE_TRUSTED);
3080         }
3081
3082         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3083         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3084         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3085         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3086
3087         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3088         if (tpc_id == 6) {
3089                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3090                                 mtr_base_ws_lo);
3091                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3092                                 mtr_base_ws_hi);
3093                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3094                                 so_base_ws_lo);
3095                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3096                                 so_base_ws_hi);
3097         }
3098 }
3099
3100 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3101 {
3102         struct gaudi_device *gaudi = hdev->asic_specific;
3103         struct gaudi_internal_qman_info *q;
3104         u64 qman_base_addr;
3105         u32 so_base_hi, tpc_offset = 0;
3106         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3107                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3108         int i, tpc_id, internal_q_index;
3109
3110         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3111                 return;
3112
3113         so_base_hi = upper_32_bits(CFG_BASE +
3114                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3115
3116         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3117                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3118                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3119                                                 tpc_id * QMAN_STREAMS + i;
3120                         q = &gaudi->internal_qmans[internal_q_index];
3121                         qman_base_addr = (u64) q->pq_dma_addr;
3122                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3123                                                 qman_base_addr);
3124
3125                         if (i == 3) {
3126                                 /* Initializing lower CP for TPC QMAN */
3127                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3128
3129                                 /* Enable the QMAN and TPC channel */
3130                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3131                                                 QMAN_TPC_ENABLE);
3132                         }
3133                 }
3134
3135                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3136                                 so_base_hi);
3137
3138                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3139
3140                 gaudi->hw_cap_initialized |=
3141                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3142         }
3143 }
3144
3145 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3146                                 int qman_id, u64 qman_base_addr, int nic_id)
3147 {
3148         struct cpu_dyn_regs *dyn_regs =
3149                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3150         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3151         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3152         u32 nic_qm_err_cfg, irq_handler_offset;
3153         u32 q_off;
3154
3155         mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3156                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3158                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159         so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3160                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161         so_base_en_hi = upper_32_bits(CFG_BASE +
3162                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163         mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3164                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3166                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167         so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3168                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169         so_base_ws_hi = upper_32_bits(CFG_BASE +
3170                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171
3172         q_off = nic_offset + qman_id * 4;
3173
3174         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3175         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3176
3177         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3178         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3179         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3180
3181         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182                                                         QMAN_LDMA_SIZE_OFFSET);
3183         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184                                                         QMAN_LDMA_SRC_OFFSET);
3185         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186                                                         QMAN_LDMA_DST_OFFSET);
3187
3188         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3189         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3190         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3191         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3192
3193         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3194         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3195         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3196         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3197         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3198
3199         if (qman_id == 0) {
3200                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3201                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3202                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3203
3204                 /* Configure RAZWI IRQ */
3205                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3206                 if (hdev->stop_on_err)
3207                         nic_qm_err_cfg |=
3208                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3209
3210                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3211
3212                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3213                         lower_32_bits(CFG_BASE + irq_handler_offset));
3214                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3215                         upper_32_bits(CFG_BASE + irq_handler_offset));
3216
3217                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3218                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3219                                                                         nic_id);
3220
3221                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3222                                 QM_ARB_ERR_MSG_EN_MASK);
3223
3224                 /* Set timeout to maximum */
3225                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3226
3227                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3228                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3229                                 QMAN_INTERNAL_MAKE_TRUSTED);
3230         }
3231 }
3232
3233 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3234 {
3235         struct gaudi_device *gaudi = hdev->asic_specific;
3236         struct gaudi_internal_qman_info *q;
3237         u64 qman_base_addr;
3238         u32 nic_offset = 0;
3239         u32 nic_delta_between_qmans =
3240                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241         u32 nic_delta_between_nics =
3242                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3243         int i, nic_id, internal_q_index;
3244
3245         if (!hdev->nic_ports_mask)
3246                 return;
3247
3248         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3249                 return;
3250
3251         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3252
3253         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3254                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3255                         nic_offset += nic_delta_between_qmans;
3256                         if (nic_id & 1) {
3257                                 nic_offset -= (nic_delta_between_qmans * 2);
3258                                 nic_offset += nic_delta_between_nics;
3259                         }
3260                         continue;
3261                 }
3262
3263                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3264                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3265                                                 nic_id * QMAN_STREAMS + i;
3266                         q = &gaudi->internal_qmans[internal_q_index];
3267                         qman_base_addr = (u64) q->pq_dma_addr;
3268                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3269                                                 qman_base_addr, nic_id);
3270                 }
3271
3272                 /* Enable the QMAN */
3273                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3274
3275                 nic_offset += nic_delta_between_qmans;
3276                 if (nic_id & 1) {
3277                         nic_offset -= (nic_delta_between_qmans * 2);
3278                         nic_offset += nic_delta_between_nics;
3279                 }
3280
3281                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3282         }
3283 }
3284
3285 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3286 {
3287         struct gaudi_device *gaudi = hdev->asic_specific;
3288
3289         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3290                 return;
3291
3292         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3293         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3294         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3295 }
3296
3297 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3298 {
3299         struct gaudi_device *gaudi = hdev->asic_specific;
3300
3301         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3302                 return;
3303
3304         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3305         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3306         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3307         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3308         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3309 }
3310
3311 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3312 {
3313         struct gaudi_device *gaudi = hdev->asic_specific;
3314
3315         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3316                 return;
3317
3318         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3319         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3320 }
3321
3322 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3323 {
3324         struct gaudi_device *gaudi = hdev->asic_specific;
3325         u32 tpc_offset = 0;
3326         int tpc_id;
3327
3328         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3329                 return;
3330
3331         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3332                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3333                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3334         }
3335 }
3336
3337 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3338 {
3339         struct gaudi_device *gaudi = hdev->asic_specific;
3340         u32 nic_mask, nic_offset = 0;
3341         u32 nic_delta_between_qmans =
3342                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343         u32 nic_delta_between_nics =
3344                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3345         int nic_id;
3346
3347         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3348                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3349
3350                 if (gaudi->hw_cap_initialized & nic_mask)
3351                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3352
3353                 nic_offset += nic_delta_between_qmans;
3354                 if (nic_id & 1) {
3355                         nic_offset -= (nic_delta_between_qmans * 2);
3356                         nic_offset += nic_delta_between_nics;
3357                 }
3358         }
3359 }
3360
3361 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3362 {
3363         struct gaudi_device *gaudi = hdev->asic_specific;
3364
3365         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3366                 return;
3367
3368         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3369         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372 }
3373
3374 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3375 {
3376         struct gaudi_device *gaudi = hdev->asic_specific;
3377
3378         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3379                 return;
3380
3381         /* Stop CPs of HBM DMA QMANs */
3382
3383         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 }
3389
3390 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3391 {
3392         struct gaudi_device *gaudi = hdev->asic_specific;
3393
3394         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3395                 return;
3396
3397         /* Stop CPs of MME QMANs */
3398         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400 }
3401
3402 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3403 {
3404         struct gaudi_device *gaudi = hdev->asic_specific;
3405
3406         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3407                 return;
3408
3409         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 }
3418
3419 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3420 {
3421         struct gaudi_device *gaudi = hdev->asic_specific;
3422
3423         /* Stop upper CPs of QMANs */
3424
3425         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3426                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3427                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3428                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3429                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3430
3431         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3432                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3433                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3434                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3435                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3436
3437         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3438                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3439                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3440                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3441                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3442
3443         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3444                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3445                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3446                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3447                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3448
3449         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3450                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3451                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3452                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3453                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3454
3455         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3456                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3457                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3458                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3459                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3460
3461         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3462                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3463                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3464                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3465                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3466
3467         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3468                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3469                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3470                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3471                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3472
3473         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3474                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3475                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3476                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3477                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3478
3479         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3480                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3481                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3482                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3483                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3484 }
3485
3486 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3487 {
3488         struct gaudi_device *gaudi = hdev->asic_specific;
3489
3490         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3491                 return;
3492
3493         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496 }
3497
3498 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3499 {
3500         struct gaudi_device *gaudi = hdev->asic_specific;
3501
3502         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3503                 return;
3504
3505         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 }
3511
3512 static void gaudi_mme_stall(struct hl_device *hdev)
3513 {
3514         struct gaudi_device *gaudi = hdev->asic_specific;
3515
3516         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3517                 return;
3518
3519         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3520         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536 }
3537
3538 static void gaudi_tpc_stall(struct hl_device *hdev)
3539 {
3540         struct gaudi_device *gaudi = hdev->asic_specific;
3541
3542         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3543                 return;
3544
3545         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 }
3554
3555 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3556 {
3557         u32 qman_offset;
3558         int i;
3559
3560         if (hdev->asic_prop.fw_security_enabled)
3561                 return;
3562
3563         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3564                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3565                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3566
3567                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3568         }
3569
3570         WREG32(mmMME0_QM_CGM_CFG, 0);
3571         WREG32(mmMME0_QM_CGM_CFG1, 0);
3572         WREG32(mmMME2_QM_CGM_CFG, 0);
3573         WREG32(mmMME2_QM_CGM_CFG1, 0);
3574
3575         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3576                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3577                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3578
3579                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3580         }
3581 }
3582
3583 static void gaudi_enable_timestamp(struct hl_device *hdev)
3584 {
3585         /* Disable the timestamp counter */
3586         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3587
3588         /* Zero the lower/upper parts of the 64-bit counter */
3589         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3590         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3591
3592         /* Enable the counter */
3593         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3594 }
3595
3596 static void gaudi_disable_timestamp(struct hl_device *hdev)
3597 {
3598         /* Disable the timestamp counter */
3599         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3600 }
3601
3602 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3603 {
3604         u32 wait_timeout_ms;
3605
3606         if (hdev->pldm)
3607                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3608         else
3609                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3610
3611         if (fw_reset)
3612                 goto skip_engines;
3613
3614         gaudi_stop_nic_qmans(hdev);
3615         gaudi_stop_mme_qmans(hdev);
3616         gaudi_stop_tpc_qmans(hdev);
3617         gaudi_stop_hbm_dma_qmans(hdev);
3618         gaudi_stop_pci_dma_qmans(hdev);
3619
3620         msleep(wait_timeout_ms);
3621
3622         gaudi_pci_dma_stall(hdev);
3623         gaudi_hbm_dma_stall(hdev);
3624         gaudi_tpc_stall(hdev);
3625         gaudi_mme_stall(hdev);
3626
3627         msleep(wait_timeout_ms);
3628
3629         gaudi_disable_nic_qmans(hdev);
3630         gaudi_disable_mme_qmans(hdev);
3631         gaudi_disable_tpc_qmans(hdev);
3632         gaudi_disable_hbm_dma_qmans(hdev);
3633         gaudi_disable_pci_dma_qmans(hdev);
3634
3635         gaudi_disable_timestamp(hdev);
3636
3637 skip_engines:
3638         gaudi_disable_msi(hdev);
3639 }
3640
3641 static int gaudi_mmu_init(struct hl_device *hdev)
3642 {
3643         struct asic_fixed_properties *prop = &hdev->asic_prop;
3644         struct gaudi_device *gaudi = hdev->asic_specific;
3645         u64 hop0_addr;
3646         int rc, i;
3647
3648         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3649                 return 0;
3650
3651         for (i = 0 ; i < prop->max_asid ; i++) {
3652                 hop0_addr = prop->mmu_pgt_addr +
3653                                 (i * prop->mmu_hop_table_size);
3654
3655                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3656                 if (rc) {
3657                         dev_err(hdev->dev,
3658                                 "failed to set hop0 addr for asid %d\n", i);
3659                         return rc;
3660                 }
3661         }
3662
3663         /* init MMU cache manage page */
3664         WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3665         WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3666
3667         /* mem cache invalidation */
3668         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3669
3670         rc = hl_mmu_invalidate_cache(hdev, true, 0);
3671         if (rc)
3672                 return rc;
3673
3674         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3675         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3676
3677         WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3678
3679         /*
3680          * The H/W expects the first PI after init to be 1. After wraparound
3681          * we'll write 0.
3682          */
3683         gaudi->mmu_cache_inv_pi = 1;
3684
3685         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3686
3687         return 0;
3688 }
3689
3690 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3691 {
3692         void __iomem *dst;
3693
3694         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3695
3696         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3697 }
3698
3699 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3700 {
3701         void __iomem *dst;
3702
3703         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3704
3705         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3706 }
3707
3708 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3709 {
3710         struct dynamic_fw_load_mgr *dynamic_loader;
3711         struct cpu_dyn_regs *dyn_regs;
3712
3713         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3714
3715         /*
3716          * here we update initial values for few specific dynamic regs (as
3717          * before reading the first descriptor from FW those value has to be
3718          * hard-coded) in later stages of the protocol those values will be
3719          * updated automatically by reading the FW descriptor so data there
3720          * will always be up-to-date
3721          */
3722         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3723         dyn_regs->kmd_msg_to_cpu =
3724                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3725         dyn_regs->cpu_cmd_status_to_host =
3726                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3727
3728         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3729 }
3730
3731 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3732 {
3733         struct static_fw_load_mgr *static_loader;
3734
3735         static_loader = &hdev->fw_loader.static_loader;
3736
3737         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3739         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3740         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3741         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3742         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3743         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3744         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3745         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3746         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3747         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3748         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3749         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3750                         GAUDI_PLDM_RESET_WAIT_MSEC :
3751                         GAUDI_CPU_RESET_WAIT_MSEC;
3752 }
3753
3754 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3755 {
3756         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3757
3758         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3759         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3760         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3761         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3762         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3763         pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3764 }
3765
3766 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3767 {
3768         struct asic_fixed_properties *prop = &hdev->asic_prop;
3769         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3770
3771         /* fill common fields */
3772         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3773         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3774         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3775         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3776         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3777         fw_loader->skip_bmc = !hdev->bmc_enable;
3778         fw_loader->sram_bar_id = SRAM_BAR_ID;
3779         fw_loader->dram_bar_id = HBM_BAR_ID;
3780
3781         if (prop->dynamic_fw_load)
3782                 gaudi_init_dynamic_firmware_loader(hdev);
3783         else
3784                 gaudi_init_static_firmware_loader(hdev);
3785 }
3786
3787 static int gaudi_init_cpu(struct hl_device *hdev)
3788 {
3789         struct gaudi_device *gaudi = hdev->asic_specific;
3790         int rc;
3791
3792         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3793                 return 0;
3794
3795         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3796                 return 0;
3797
3798         /*
3799          * The device CPU works with 40 bits addresses.
3800          * This register sets the extension to 50 bits.
3801          */
3802         if (!hdev->asic_prop.fw_security_enabled)
3803                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3804
3805         rc = hl_fw_init_cpu(hdev);
3806
3807         if (rc)
3808                 return rc;
3809
3810         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3811
3812         return 0;
3813 }
3814
3815 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3816 {
3817         struct cpu_dyn_regs *dyn_regs =
3818                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3819         struct asic_fixed_properties *prop = &hdev->asic_prop;
3820         struct gaudi_device *gaudi = hdev->asic_specific;
3821         u32 status, irq_handler_offset;
3822         struct hl_eq *eq;
3823         struct hl_hw_queue *cpu_pq =
3824                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3825         int err;
3826
3827         if (!hdev->cpu_queues_enable)
3828                 return 0;
3829
3830         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3831                 return 0;
3832
3833         eq = &hdev->event_queue;
3834
3835         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3836         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3837
3838         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3839         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3840
3841         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3842                         lower_32_bits(hdev->cpu_accessible_dma_address));
3843         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3844                         upper_32_bits(hdev->cpu_accessible_dma_address));
3845
3846         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3847         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3848         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3849
3850         /* Used for EQ CI */
3851         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3852
3853         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3854
3855         WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3856
3857         irq_handler_offset = prop->gic_interrupts_enable ?
3858                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3859                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3860
3861         WREG32(irq_handler_offset,
3862                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3863
3864         err = hl_poll_timeout(
3865                 hdev,
3866                 mmCPU_IF_QUEUE_INIT,
3867                 status,
3868                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3869                 1000,
3870                 cpu_timeout);
3871
3872         if (err) {
3873                 dev_err(hdev->dev,
3874                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3875                 return -EIO;
3876         }
3877
3878         /* update FW application security bits */
3879         if (prop->fw_cpu_boot_dev_sts0_valid)
3880                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3881         if (prop->fw_cpu_boot_dev_sts1_valid)
3882                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3883
3884         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3885         return 0;
3886 }
3887
3888 static void gaudi_pre_hw_init(struct hl_device *hdev)
3889 {
3890         /* Perform read from the device to make sure device is up */
3891         RREG32(mmHW_STATE);
3892
3893         if (!hdev->asic_prop.fw_security_enabled) {
3894                 /* Set the access through PCI bars (Linux driver only) as
3895                  * secured
3896                  */
3897                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3898                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3899                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3900
3901                 /* Perform read to flush the waiting writes to ensure
3902                  * configuration was set in the device
3903                  */
3904                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3905         }
3906
3907         /*
3908          * Let's mark in the H/W that we have reached this point. We check
3909          * this value in the reset_before_init function to understand whether
3910          * we need to reset the chip before doing H/W init. This register is
3911          * cleared by the H/W upon H/W reset
3912          */
3913         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3914 }
3915
3916 static int gaudi_hw_init(struct hl_device *hdev)
3917 {
3918         struct gaudi_device *gaudi = hdev->asic_specific;
3919         int rc;
3920
3921         gaudi_pre_hw_init(hdev);
3922
3923         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3924          * So we set it here and if anyone tries to move it later to
3925          * a different address, there will be an error
3926          */
3927         if (hdev->asic_prop.iatu_done_by_fw)
3928                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3929
3930         /*
3931          * Before pushing u-boot/linux to device, need to set the hbm bar to
3932          * base address of dram
3933          */
3934         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3935                 dev_err(hdev->dev,
3936                         "failed to map HBM bar to DRAM base address\n");
3937                 return -EIO;
3938         }
3939
3940         rc = gaudi_init_cpu(hdev);
3941         if (rc) {
3942                 dev_err(hdev->dev, "failed to initialize CPU\n");
3943                 return rc;
3944         }
3945
3946         /* In case the clock gating was enabled in preboot we need to disable
3947          * it here before touching the MME/TPC registers.
3948          */
3949         gaudi_disable_clock_gating(hdev);
3950
3951         /* SRAM scrambler must be initialized after CPU is running from HBM */
3952         gaudi_init_scrambler_sram(hdev);
3953
3954         /* This is here just in case we are working without CPU */
3955         gaudi_init_scrambler_hbm(hdev);
3956
3957         gaudi_init_golden_registers(hdev);
3958
3959         rc = gaudi_mmu_init(hdev);
3960         if (rc)
3961                 return rc;
3962
3963         gaudi_init_security(hdev);
3964
3965         gaudi_init_pci_dma_qmans(hdev);
3966
3967         gaudi_init_hbm_dma_qmans(hdev);
3968
3969         gaudi_init_mme_qmans(hdev);
3970
3971         gaudi_init_tpc_qmans(hdev);
3972
3973         gaudi_init_nic_qmans(hdev);
3974
3975         gaudi_enable_timestamp(hdev);
3976
3977         /* MSI must be enabled before CPU queues and NIC are initialized */
3978         rc = gaudi_enable_msi(hdev);
3979         if (rc)
3980                 goto disable_queues;
3981
3982         /* must be called after MSI was enabled */
3983         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3984         if (rc) {
3985                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3986                         rc);
3987                 goto disable_msi;
3988         }
3989
3990         /* Perform read from the device to flush all configuration */
3991         RREG32(mmHW_STATE);
3992
3993         return 0;
3994
3995 disable_msi:
3996         gaudi_disable_msi(hdev);
3997 disable_queues:
3998         gaudi_disable_mme_qmans(hdev);
3999         gaudi_disable_pci_dma_qmans(hdev);
4000
4001         return rc;
4002 }
4003
4004 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4005 {
4006         struct cpu_dyn_regs *dyn_regs =
4007                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4008         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4009         struct gaudi_device *gaudi = hdev->asic_specific;
4010         bool driver_performs_reset;
4011
4012         if (!hard_reset) {
4013                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4014                 return 0;
4015         }
4016
4017         if (hdev->pldm) {
4018                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4019                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4020         } else {
4021                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4022                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4023         }
4024
4025         if (fw_reset) {
4026                 dev_dbg(hdev->dev,
4027                         "Firmware performs HARD reset, going to wait %dms\n",
4028                         reset_timeout_ms);
4029
4030                 goto skip_reset;
4031         }
4032
4033         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4034                                         !hdev->asic_prop.hard_reset_done_by_fw);
4035
4036         /* Set device to handle FLR by H/W as we will put the device CPU to
4037          * halt mode
4038          */
4039         if (driver_performs_reset)
4040                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4041                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4042
4043         /* If linux is loaded in the device CPU we need to communicate with it
4044          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4045          * registers in case of old F/Ws
4046          */
4047         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4048                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4049                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4050                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4051
4052                 WREG32(irq_handler_offset,
4053                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4054
4055                 /* This is a hail-mary attempt to revive the card in the small chance that the
4056                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4057                  * In that case, triggering reset through GIC won't help. We need to trigger the
4058                  * reset as if Linux wasn't loaded.
4059                  *
4060                  * We do it only if the reset cause was HB, because that would be the indication
4061                  * of such an event.
4062                  *
4063                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4064                  * damage.
4065                  */
4066                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4067                         if (hdev->asic_prop.hard_reset_done_by_fw)
4068                                 hl_fw_ask_hard_reset_without_linux(hdev);
4069                         else
4070                                 hl_fw_ask_halt_machine_without_linux(hdev);
4071                 }
4072         } else {
4073                 if (hdev->asic_prop.hard_reset_done_by_fw)
4074                         hl_fw_ask_hard_reset_without_linux(hdev);
4075                 else
4076                         hl_fw_ask_halt_machine_without_linux(hdev);
4077         }
4078
4079         if (driver_performs_reset) {
4080
4081                 /* Configure the reset registers. Must be done as early as
4082                  * possible in case we fail during H/W initialization
4083                  */
4084                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4085                                                 (CFG_RST_H_DMA_MASK |
4086                                                 CFG_RST_H_MME_MASK |
4087                                                 CFG_RST_H_SM_MASK |
4088                                                 CFG_RST_H_TPC_7_MASK));
4089
4090                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4091
4092                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4093                                                 (CFG_RST_H_HBM_MASK |
4094                                                 CFG_RST_H_TPC_7_MASK |
4095                                                 CFG_RST_H_NIC_MASK |
4096                                                 CFG_RST_H_SM_MASK |
4097                                                 CFG_RST_H_DMA_MASK |
4098                                                 CFG_RST_H_MME_MASK |
4099                                                 CFG_RST_H_CPU_MASK |
4100                                                 CFG_RST_H_MMU_MASK));
4101
4102                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4103                                                 (CFG_RST_L_IF_MASK |
4104                                                 CFG_RST_L_PSOC_MASK |
4105                                                 CFG_RST_L_TPC_MASK));
4106
4107                 msleep(cpu_timeout_ms);
4108
4109                 /* Tell ASIC not to re-initialize PCIe */
4110                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4111
4112                 /* Restart BTL/BLR upon hard-reset */
4113                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4114
4115                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4116                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4117
4118                 dev_dbg(hdev->dev,
4119                         "Issued HARD reset command, going to wait %dms\n",
4120                         reset_timeout_ms);
4121         } else {
4122                 dev_dbg(hdev->dev,
4123                         "Firmware performs HARD reset, going to wait %dms\n",
4124                         reset_timeout_ms);
4125         }
4126
4127 skip_reset:
4128         /*
4129          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4130          * itself is in reset. Need to wait until the reset is deasserted
4131          */
4132         msleep(reset_timeout_ms);
4133
4134         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4135         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4136                 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4137                 return -ETIMEDOUT;
4138         }
4139
4140         if (gaudi) {
4141                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4142                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4143                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4144                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4145                                                 HW_CAP_HBM_SCRAMBLER);
4146
4147                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4148
4149                 hdev->device_cpu_is_halted = false;
4150         }
4151         return 0;
4152 }
4153
4154 static int gaudi_suspend(struct hl_device *hdev)
4155 {
4156         int rc;
4157
4158         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4159         if (rc)
4160                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4161
4162         return rc;
4163 }
4164
4165 static int gaudi_resume(struct hl_device *hdev)
4166 {
4167         return gaudi_init_iatu(hdev);
4168 }
4169
4170 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4171                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4172 {
4173         int rc;
4174
4175         vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4176                         VM_DONTCOPY | VM_NORESERVE);
4177
4178         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4179                                 (dma_addr - HOST_PHYS_BASE), size);
4180         if (rc)
4181                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4182
4183         return rc;
4184 }
4185
4186 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4187 {
4188         struct cpu_dyn_regs *dyn_regs =
4189                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4190         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4191         struct gaudi_device *gaudi = hdev->asic_specific;
4192         bool invalid_queue = false;
4193         int dma_id;
4194
4195         switch (hw_queue_id) {
4196         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4197                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4198                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4199                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4200                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4201                 break;
4202
4203         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4204                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4205                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4206                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4207                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4208                 break;
4209
4210         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4211                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4212                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4213                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4214                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4215                 break;
4216
4217         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4218                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4219                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4220                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4221                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4222                 break;
4223
4224         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4225                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4226                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4227                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4228                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4229                 break;
4230
4231         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4232                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4233                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4234                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4235                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4236                 break;
4237
4238         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4239                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4240                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4241                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4242                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4243                 break;
4244
4245         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4246                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4247                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4248                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4249                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4250                 break;
4251
4252         case GAUDI_QUEUE_ID_CPU_PQ:
4253                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4254                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4255                 else
4256                         invalid_queue = true;
4257                 break;
4258
4259         case GAUDI_QUEUE_ID_MME_0_0:
4260                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4261                 break;
4262
4263         case GAUDI_QUEUE_ID_MME_0_1:
4264                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4265                 break;
4266
4267         case GAUDI_QUEUE_ID_MME_0_2:
4268                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4269                 break;
4270
4271         case GAUDI_QUEUE_ID_MME_0_3:
4272                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4273                 break;
4274
4275         case GAUDI_QUEUE_ID_MME_1_0:
4276                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4277                 break;
4278
4279         case GAUDI_QUEUE_ID_MME_1_1:
4280                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4281                 break;
4282
4283         case GAUDI_QUEUE_ID_MME_1_2:
4284                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4285                 break;
4286
4287         case GAUDI_QUEUE_ID_MME_1_3:
4288                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4289                 break;
4290
4291         case GAUDI_QUEUE_ID_TPC_0_0:
4292                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4293                 break;
4294
4295         case GAUDI_QUEUE_ID_TPC_0_1:
4296                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4297                 break;
4298
4299         case GAUDI_QUEUE_ID_TPC_0_2:
4300                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4301                 break;
4302
4303         case GAUDI_QUEUE_ID_TPC_0_3:
4304                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4305                 break;
4306
4307         case GAUDI_QUEUE_ID_TPC_1_0:
4308                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4309                 break;
4310
4311         case GAUDI_QUEUE_ID_TPC_1_1:
4312                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4313                 break;
4314
4315         case GAUDI_QUEUE_ID_TPC_1_2:
4316                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4317                 break;
4318
4319         case GAUDI_QUEUE_ID_TPC_1_3:
4320                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4321                 break;
4322
4323         case GAUDI_QUEUE_ID_TPC_2_0:
4324                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4325                 break;
4326
4327         case GAUDI_QUEUE_ID_TPC_2_1:
4328                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4329                 break;
4330
4331         case GAUDI_QUEUE_ID_TPC_2_2:
4332                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4333                 break;
4334
4335         case GAUDI_QUEUE_ID_TPC_2_3:
4336                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4337                 break;
4338
4339         case GAUDI_QUEUE_ID_TPC_3_0:
4340                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4341                 break;
4342
4343         case GAUDI_QUEUE_ID_TPC_3_1:
4344                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4345                 break;
4346
4347         case GAUDI_QUEUE_ID_TPC_3_2:
4348                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4349                 break;
4350
4351         case GAUDI_QUEUE_ID_TPC_3_3:
4352                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4353                 break;
4354
4355         case GAUDI_QUEUE_ID_TPC_4_0:
4356                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4357                 break;
4358
4359         case GAUDI_QUEUE_ID_TPC_4_1:
4360                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4361                 break;
4362
4363         case GAUDI_QUEUE_ID_TPC_4_2:
4364                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4365                 break;
4366
4367         case GAUDI_QUEUE_ID_TPC_4_3:
4368                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4369                 break;
4370
4371         case GAUDI_QUEUE_ID_TPC_5_0:
4372                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4373                 break;
4374
4375         case GAUDI_QUEUE_ID_TPC_5_1:
4376                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4377                 break;
4378
4379         case GAUDI_QUEUE_ID_TPC_5_2:
4380                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4381                 break;
4382
4383         case GAUDI_QUEUE_ID_TPC_5_3:
4384                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4385                 break;
4386
4387         case GAUDI_QUEUE_ID_TPC_6_0:
4388                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4389                 break;
4390
4391         case GAUDI_QUEUE_ID_TPC_6_1:
4392                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4393                 break;
4394
4395         case GAUDI_QUEUE_ID_TPC_6_2:
4396                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4397                 break;
4398
4399         case GAUDI_QUEUE_ID_TPC_6_3:
4400                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4401                 break;
4402
4403         case GAUDI_QUEUE_ID_TPC_7_0:
4404                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4405                 break;
4406
4407         case GAUDI_QUEUE_ID_TPC_7_1:
4408                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4409                 break;
4410
4411         case GAUDI_QUEUE_ID_TPC_7_2:
4412                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4413                 break;
4414
4415         case GAUDI_QUEUE_ID_TPC_7_3:
4416                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4417                 break;
4418
4419         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4420                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4421                         invalid_queue = true;
4422
4423                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4424                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4425                 break;
4426
4427         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4428                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4429                         invalid_queue = true;
4430
4431                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4432                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4433                 break;
4434
4435         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4436                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4437                         invalid_queue = true;
4438
4439                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4440                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4441                 break;
4442
4443         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4444                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4445                         invalid_queue = true;
4446
4447                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4448                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4449                 break;
4450
4451         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4452                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4453                         invalid_queue = true;
4454
4455                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4456                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4457                 break;
4458
4459         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4460                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4461                         invalid_queue = true;
4462
4463                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4464                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4465                 break;
4466
4467         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4468                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4469                         invalid_queue = true;
4470
4471                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4472                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4473                 break;
4474
4475         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4476                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4477                         invalid_queue = true;
4478
4479                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4480                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4481                 break;
4482
4483         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4484                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4485                         invalid_queue = true;
4486
4487                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4488                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4489                 break;
4490
4491         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4492                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4493                         invalid_queue = true;
4494
4495                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4496                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4497                 break;
4498
4499         default:
4500                 invalid_queue = true;
4501         }
4502
4503         if (invalid_queue) {
4504                 /* Should never get here */
4505                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4506                         hw_queue_id);
4507                 return;
4508         }
4509
4510         db_value = pi;
4511
4512         /* ring the doorbell */
4513         WREG32(db_reg_offset, db_value);
4514
4515         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4516                 /* make sure device CPU will read latest data from host */
4517                 mb();
4518
4519                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4520                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4521                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4522
4523                 WREG32(irq_handler_offset,
4524                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4525         }
4526 }
4527
4528 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4529                                 struct hl_bd *bd)
4530 {
4531         __le64 *pbd = (__le64 *) bd;
4532
4533         /* The QMANs are on the host memory so a simple copy suffice */
4534         pqe[0] = pbd[0];
4535         pqe[1] = pbd[1];
4536 }
4537
4538 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4539                                         dma_addr_t *dma_handle, gfp_t flags)
4540 {
4541         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4542                                                 dma_handle, flags);
4543
4544         /* Shift to the device's base physical address of host memory */
4545         if (kernel_addr)
4546                 *dma_handle += HOST_PHYS_BASE;
4547
4548         return kernel_addr;
4549 }
4550
4551 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4552                 void *cpu_addr, dma_addr_t dma_handle)
4553 {
4554         /* Cancel the device's base physical address of host memory */
4555         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4556
4557         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4558 }
4559
4560 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4561 {
4562         struct asic_fixed_properties *prop = &hdev->asic_prop;
4563         u64 cur_addr = prop->dram_user_base_address;
4564         u32 chunk_size, busy;
4565         int rc, dma_id;
4566
4567         while (cur_addr < prop->dram_end_address) {
4568                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4569                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4570
4571                         chunk_size =
4572                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4573
4574                         dev_dbg(hdev->dev,
4575                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4576                                 cur_addr, cur_addr + chunk_size);
4577
4578                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4579                                         lower_32_bits(val));
4580                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4581                                         upper_32_bits(val));
4582                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4583                                                 lower_32_bits(cur_addr));
4584                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4585                                                 upper_32_bits(cur_addr));
4586                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4587                                         chunk_size);
4588                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4589                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4590                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4591
4592                         cur_addr += chunk_size;
4593
4594                         if (cur_addr == prop->dram_end_address)
4595                                 break;
4596                 }
4597
4598                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4599                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4600
4601                         rc = hl_poll_timeout(
4602                                 hdev,
4603                                 mmDMA0_CORE_STS0 + dma_offset,
4604                                 busy,
4605                                 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4606                                 1000,
4607                                 HBM_SCRUBBING_TIMEOUT_US);
4608
4609                         if (rc) {
4610                                 dev_err(hdev->dev,
4611                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4612                                         dma_id);
4613                                 return -EIO;
4614                         }
4615                 }
4616         }
4617
4618         return 0;
4619 }
4620
4621 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4622 {
4623         struct asic_fixed_properties *prop = &hdev->asic_prop;
4624         u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4625         u64 addr, size, val = hdev->memory_scrub_val;
4626         ktime_t timeout;
4627         int rc = 0;
4628
4629         if (!hdev->memory_scrub)
4630                 return 0;
4631
4632         timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4633         while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4634                 if (ktime_compare(ktime_get(), timeout) > 0) {
4635                         dev_err(hdev->dev, "waiting for idle timeout\n");
4636                         return -ETIMEDOUT;
4637                 }
4638                 usleep_range((1000 >> 2) + 1, 1000);
4639         }
4640
4641         /* Scrub SRAM */
4642         addr = prop->sram_user_base_address;
4643         size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4644
4645         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4646                         addr, addr + size, val);
4647         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4648         if (rc) {
4649                 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4650                 return rc;
4651         }
4652
4653         /* Scrub HBM using all DMA channels in parallel */
4654         rc = gaudi_scrub_device_dram(hdev, val);
4655         if (rc) {
4656                 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4657                 return rc;
4658         }
4659
4660         return 0;
4661 }
4662
4663 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4664                                 u32 queue_id, dma_addr_t *dma_handle,
4665                                 u16 *queue_len)
4666 {
4667         struct gaudi_device *gaudi = hdev->asic_specific;
4668         struct gaudi_internal_qman_info *q;
4669
4670         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4671                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4672                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4673                 return NULL;
4674         }
4675
4676         q = &gaudi->internal_qmans[queue_id];
4677         *dma_handle = q->pq_dma_addr;
4678         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4679
4680         return q->pq_kernel_addr;
4681 }
4682
4683 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4684                                 u16 len, u32 timeout, u64 *result)
4685 {
4686         struct gaudi_device *gaudi = hdev->asic_specific;
4687
4688         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4689                 if (result)
4690                         *result = 0;
4691                 return 0;
4692         }
4693
4694         if (!timeout)
4695                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4696
4697         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4698                                                 timeout, result);
4699 }
4700
4701 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4702 {
4703         struct packet_msg_prot *fence_pkt;
4704         dma_addr_t pkt_dma_addr;
4705         u32 fence_val, tmp, timeout_usec;
4706         dma_addr_t fence_dma_addr;
4707         u32 *fence_ptr;
4708         int rc;
4709
4710         if (hdev->pldm)
4711                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4712         else
4713                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4714
4715         fence_val = GAUDI_QMAN0_FENCE_VAL;
4716
4717         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4718         if (!fence_ptr) {
4719                 dev_err(hdev->dev,
4720                         "Failed to allocate memory for H/W queue %d testing\n",
4721                         hw_queue_id);
4722                 return -ENOMEM;
4723         }
4724
4725         *fence_ptr = 0;
4726
4727         fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4728                                                 &pkt_dma_addr);
4729         if (!fence_pkt) {
4730                 dev_err(hdev->dev,
4731                         "Failed to allocate packet for H/W queue %d testing\n",
4732                         hw_queue_id);
4733                 rc = -ENOMEM;
4734                 goto free_fence_ptr;
4735         }
4736
4737         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4738         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4739         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4740
4741         fence_pkt->ctl = cpu_to_le32(tmp);
4742         fence_pkt->value = cpu_to_le32(fence_val);
4743         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4744
4745         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4746                                         sizeof(struct packet_msg_prot),
4747                                         pkt_dma_addr);
4748         if (rc) {
4749                 dev_err(hdev->dev,
4750                         "Failed to send fence packet to H/W queue %d\n",
4751                         hw_queue_id);
4752                 goto free_pkt;
4753         }
4754
4755         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4756                                         1000, timeout_usec, true);
4757
4758         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4759
4760         if (rc == -ETIMEDOUT) {
4761                 dev_err(hdev->dev,
4762                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4763                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4764                 rc = -EIO;
4765         }
4766
4767 free_pkt:
4768         hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4769 free_fence_ptr:
4770         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4771         return rc;
4772 }
4773
4774 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4775 {
4776         struct gaudi_device *gaudi = hdev->asic_specific;
4777
4778         /*
4779          * check capability here as send_cpu_message() won't update the result
4780          * value if no capability
4781          */
4782         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4783                 return 0;
4784
4785         return hl_fw_test_cpu_queue(hdev);
4786 }
4787
4788 static int gaudi_test_queues(struct hl_device *hdev)
4789 {
4790         int i, rc, ret_val = 0;
4791
4792         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4793                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4794                         rc = gaudi_test_queue(hdev, i);
4795                         if (rc)
4796                                 ret_val = -EINVAL;
4797                 }
4798         }
4799
4800         rc = gaudi_test_cpu_queue(hdev);
4801         if (rc)
4802                 ret_val = -EINVAL;
4803
4804         return ret_val;
4805 }
4806
4807 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4808                 gfp_t mem_flags, dma_addr_t *dma_handle)
4809 {
4810         void *kernel_addr;
4811
4812         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4813                 return NULL;
4814
4815         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4816
4817         /* Shift to the device's base physical address of host memory */
4818         if (kernel_addr)
4819                 *dma_handle += HOST_PHYS_BASE;
4820
4821         return kernel_addr;
4822 }
4823
4824 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4825                         dma_addr_t dma_addr)
4826 {
4827         /* Cancel the device's base physical address of host memory */
4828         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4829
4830         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4831 }
4832
4833 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4834                                         size_t size, dma_addr_t *dma_handle)
4835 {
4836         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4837 }
4838
4839 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4840                                                 size_t size, void *vaddr)
4841 {
4842         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4843 }
4844
4845 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4846 {
4847         struct scatterlist *sg, *sg_next_iter;
4848         u32 count, dma_desc_cnt;
4849         u64 len, len_next;
4850         dma_addr_t addr, addr_next;
4851
4852         dma_desc_cnt = 0;
4853
4854         for_each_sgtable_dma_sg(sgt, sg, count) {
4855                 len = sg_dma_len(sg);
4856                 addr = sg_dma_address(sg);
4857
4858                 if (len == 0)
4859                         break;
4860
4861                 while ((count + 1) < sgt->nents) {
4862                         sg_next_iter = sg_next(sg);
4863                         len_next = sg_dma_len(sg_next_iter);
4864                         addr_next = sg_dma_address(sg_next_iter);
4865
4866                         if (len_next == 0)
4867                                 break;
4868
4869                         if ((addr + len == addr_next) &&
4870                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4871                                 len += len_next;
4872                                 count++;
4873                                 sg = sg_next_iter;
4874                         } else {
4875                                 break;
4876                         }
4877                 }
4878
4879                 dma_desc_cnt++;
4880         }
4881
4882         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4883 }
4884
4885 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4886                                 struct hl_cs_parser *parser,
4887                                 struct packet_lin_dma *user_dma_pkt,
4888                                 u64 addr, enum dma_data_direction dir)
4889 {
4890         struct hl_userptr *userptr;
4891         int rc;
4892
4893         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4894                         parser->job_userptr_list, &userptr))
4895                 goto already_pinned;
4896
4897         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4898         if (!userptr)
4899                 return -ENOMEM;
4900
4901         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4902                                 userptr);
4903         if (rc)
4904                 goto free_userptr;
4905
4906         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4907
4908         rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4909         if (rc) {
4910                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4911                 goto unpin_memory;
4912         }
4913
4914         userptr->dma_mapped = true;
4915         userptr->dir = dir;
4916
4917 already_pinned:
4918         parser->patched_cb_size +=
4919                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4920
4921         return 0;
4922
4923 unpin_memory:
4924         list_del(&userptr->job_node);
4925         hl_unpin_host_memory(hdev, userptr);
4926 free_userptr:
4927         kfree(userptr);
4928         return rc;
4929 }
4930
4931 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4932                                 struct hl_cs_parser *parser,
4933                                 struct packet_lin_dma *user_dma_pkt,
4934                                 bool src_in_host)
4935 {
4936         enum dma_data_direction dir;
4937         bool skip_host_mem_pin = false, user_memset;
4938         u64 addr;
4939         int rc = 0;
4940
4941         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4942                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4943                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4944
4945         if (src_in_host) {
4946                 if (user_memset)
4947                         skip_host_mem_pin = true;
4948
4949                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4950                 dir = DMA_TO_DEVICE;
4951                 addr = le64_to_cpu(user_dma_pkt->src_addr);
4952         } else {
4953                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4954                 dir = DMA_FROM_DEVICE;
4955                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4956                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4957                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4958         }
4959
4960         if (skip_host_mem_pin)
4961                 parser->patched_cb_size += sizeof(*user_dma_pkt);
4962         else
4963                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4964                                                 addr, dir);
4965
4966         return rc;
4967 }
4968
4969 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4970                                 struct hl_cs_parser *parser,
4971                                 struct packet_lin_dma *user_dma_pkt)
4972 {
4973         bool src_in_host = false;
4974         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4975                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4976                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4977
4978         dev_dbg(hdev->dev, "DMA packet details:\n");
4979         dev_dbg(hdev->dev, "source == 0x%llx\n",
4980                                 le64_to_cpu(user_dma_pkt->src_addr));
4981         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4982         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4983
4984         /*
4985          * Special handling for DMA with size 0. Bypass all validations
4986          * because no transactions will be done except for WR_COMP, which
4987          * is not a security issue
4988          */
4989         if (!le32_to_cpu(user_dma_pkt->tsize)) {
4990                 parser->patched_cb_size += sizeof(*user_dma_pkt);
4991                 return 0;
4992         }
4993
4994         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4995                 src_in_host = true;
4996
4997         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4998                                                 src_in_host);
4999 }
5000
5001 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5002                                         struct hl_cs_parser *parser,
5003                                         struct packet_load_and_exe *user_pkt)
5004 {
5005         u32 cfg;
5006
5007         cfg = le32_to_cpu(user_pkt->cfg);
5008
5009         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5010                 dev_err(hdev->dev,
5011                         "User not allowed to use Load and Execute\n");
5012                 return -EPERM;
5013         }
5014
5015         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5016
5017         return 0;
5018 }
5019
5020 static int gaudi_validate_cb(struct hl_device *hdev,
5021                         struct hl_cs_parser *parser, bool is_mmu)
5022 {
5023         u32 cb_parsed_length = 0;
5024         int rc = 0;
5025
5026         parser->patched_cb_size = 0;
5027
5028         /* cb_user_size is more than 0 so loop will always be executed */
5029         while (cb_parsed_length < parser->user_cb_size) {
5030                 enum packet_id pkt_id;
5031                 u16 pkt_size;
5032                 struct gaudi_packet *user_pkt;
5033
5034                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5035
5036                 pkt_id = (enum packet_id) (
5037                                 (le64_to_cpu(user_pkt->header) &
5038                                 PACKET_HEADER_PACKET_ID_MASK) >>
5039                                         PACKET_HEADER_PACKET_ID_SHIFT);
5040
5041                 if (!validate_packet_id(pkt_id)) {
5042                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5043                         rc = -EINVAL;
5044                         break;
5045                 }
5046
5047                 pkt_size = gaudi_packet_sizes[pkt_id];
5048                 cb_parsed_length += pkt_size;
5049                 if (cb_parsed_length > parser->user_cb_size) {
5050                         dev_err(hdev->dev,
5051                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5052                         rc = -EINVAL;
5053                         break;
5054                 }
5055
5056                 switch (pkt_id) {
5057                 case PACKET_MSG_PROT:
5058                         dev_err(hdev->dev,
5059                                 "User not allowed to use MSG_PROT\n");
5060                         rc = -EPERM;
5061                         break;
5062
5063                 case PACKET_CP_DMA:
5064                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5065                         rc = -EPERM;
5066                         break;
5067
5068                 case PACKET_STOP:
5069                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5070                         rc = -EPERM;
5071                         break;
5072
5073                 case PACKET_WREG_BULK:
5074                         dev_err(hdev->dev,
5075                                 "User not allowed to use WREG_BULK\n");
5076                         rc = -EPERM;
5077                         break;
5078
5079                 case PACKET_LOAD_AND_EXE:
5080                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5081                                 (struct packet_load_and_exe *) user_pkt);
5082                         break;
5083
5084                 case PACKET_LIN_DMA:
5085                         parser->contains_dma_pkt = true;
5086                         if (is_mmu)
5087                                 parser->patched_cb_size += pkt_size;
5088                         else
5089                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5090                                         (struct packet_lin_dma *) user_pkt);
5091                         break;
5092
5093                 case PACKET_WREG_32:
5094                 case PACKET_MSG_LONG:
5095                 case PACKET_MSG_SHORT:
5096                 case PACKET_REPEAT:
5097                 case PACKET_FENCE:
5098                 case PACKET_NOP:
5099                 case PACKET_ARB_POINT:
5100                         parser->patched_cb_size += pkt_size;
5101                         break;
5102
5103                 default:
5104                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5105                                 pkt_id);
5106                         rc = -EINVAL;
5107                         break;
5108                 }
5109
5110                 if (rc)
5111                         break;
5112         }
5113
5114         /*
5115          * The new CB should have space at the end for two MSG_PROT packets:
5116          * 1. Optional NOP padding for cacheline alignment
5117          * 2. A packet that will act as a completion packet
5118          * 3. A packet that will generate MSI interrupt
5119          */
5120         if (parser->completion)
5121                 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5122                         parser->patched_cb_size);
5123
5124         return rc;
5125 }
5126
5127 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5128                                 struct hl_cs_parser *parser,
5129                                 struct packet_lin_dma *user_dma_pkt,
5130                                 struct packet_lin_dma *new_dma_pkt,
5131                                 u32 *new_dma_pkt_size)
5132 {
5133         struct hl_userptr *userptr;
5134         struct scatterlist *sg, *sg_next_iter;
5135         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5136         u64 len, len_next;
5137         dma_addr_t dma_addr, dma_addr_next;
5138         u64 device_memory_addr, addr;
5139         enum dma_data_direction dir;
5140         struct sg_table *sgt;
5141         bool src_in_host = false;
5142         bool skip_host_mem_pin = false;
5143         bool user_memset;
5144
5145         ctl = le32_to_cpu(user_dma_pkt->ctl);
5146
5147         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5148                 src_in_host = true;
5149
5150         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5151                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5152
5153         if (src_in_host) {
5154                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5155                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5156                 dir = DMA_TO_DEVICE;
5157                 if (user_memset)
5158                         skip_host_mem_pin = true;
5159         } else {
5160                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5161                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5162                 dir = DMA_FROM_DEVICE;
5163         }
5164
5165         if ((!skip_host_mem_pin) &&
5166                 (!hl_userptr_is_pinned(hdev, addr,
5167                                         le32_to_cpu(user_dma_pkt->tsize),
5168                                         parser->job_userptr_list, &userptr))) {
5169                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5170                                 addr, user_dma_pkt->tsize);
5171                 return -EFAULT;
5172         }
5173
5174         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5175                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5176                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5177                 return 0;
5178         }
5179
5180         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5181
5182         sgt = userptr->sgt;
5183         dma_desc_cnt = 0;
5184
5185         for_each_sgtable_dma_sg(sgt, sg, count) {
5186                 len = sg_dma_len(sg);
5187                 dma_addr = sg_dma_address(sg);
5188
5189                 if (len == 0)
5190                         break;
5191
5192                 while ((count + 1) < sgt->nents) {
5193                         sg_next_iter = sg_next(sg);
5194                         len_next = sg_dma_len(sg_next_iter);
5195                         dma_addr_next = sg_dma_address(sg_next_iter);
5196
5197                         if (len_next == 0)
5198                                 break;
5199
5200                         if ((dma_addr + len == dma_addr_next) &&
5201                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5202                                 len += len_next;
5203                                 count++;
5204                                 sg = sg_next_iter;
5205                         } else {
5206                                 break;
5207                         }
5208                 }
5209
5210                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5211                 if (likely(dma_desc_cnt))
5212                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5213                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5214                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5215                 new_dma_pkt->tsize = cpu_to_le32(len);
5216
5217                 if (dir == DMA_TO_DEVICE) {
5218                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5219                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5220                 } else {
5221                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5222                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5223                 }
5224
5225                 if (!user_memset)
5226                         device_memory_addr += len;
5227                 dma_desc_cnt++;
5228                 new_dma_pkt++;
5229         }
5230
5231         if (!dma_desc_cnt) {
5232                 dev_err(hdev->dev,
5233                         "Error of 0 SG entries when patching DMA packet\n");
5234                 return -EFAULT;
5235         }
5236
5237         /* Fix the last dma packet - wrcomp must be as user set it */
5238         new_dma_pkt--;
5239         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5240
5241         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5242
5243         return 0;
5244 }
5245
5246 static int gaudi_patch_cb(struct hl_device *hdev,
5247                                 struct hl_cs_parser *parser)
5248 {
5249         u32 cb_parsed_length = 0;
5250         u32 cb_patched_cur_length = 0;
5251         int rc = 0;
5252
5253         /* cb_user_size is more than 0 so loop will always be executed */
5254         while (cb_parsed_length < parser->user_cb_size) {
5255                 enum packet_id pkt_id;
5256                 u16 pkt_size;
5257                 u32 new_pkt_size = 0;
5258                 struct gaudi_packet *user_pkt, *kernel_pkt;
5259
5260                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5261                 kernel_pkt = parser->patched_cb->kernel_address +
5262                                         cb_patched_cur_length;
5263
5264                 pkt_id = (enum packet_id) (
5265                                 (le64_to_cpu(user_pkt->header) &
5266                                 PACKET_HEADER_PACKET_ID_MASK) >>
5267                                         PACKET_HEADER_PACKET_ID_SHIFT);
5268
5269                 if (!validate_packet_id(pkt_id)) {
5270                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5271                         rc = -EINVAL;
5272                         break;
5273                 }
5274
5275                 pkt_size = gaudi_packet_sizes[pkt_id];
5276                 cb_parsed_length += pkt_size;
5277                 if (cb_parsed_length > parser->user_cb_size) {
5278                         dev_err(hdev->dev,
5279                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5280                         rc = -EINVAL;
5281                         break;
5282                 }
5283
5284                 switch (pkt_id) {
5285                 case PACKET_LIN_DMA:
5286                         rc = gaudi_patch_dma_packet(hdev, parser,
5287                                         (struct packet_lin_dma *) user_pkt,
5288                                         (struct packet_lin_dma *) kernel_pkt,
5289                                         &new_pkt_size);
5290                         cb_patched_cur_length += new_pkt_size;
5291                         break;
5292
5293                 case PACKET_MSG_PROT:
5294                         dev_err(hdev->dev,
5295                                 "User not allowed to use MSG_PROT\n");
5296                         rc = -EPERM;
5297                         break;
5298
5299                 case PACKET_CP_DMA:
5300                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5301                         rc = -EPERM;
5302                         break;
5303
5304                 case PACKET_STOP:
5305                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5306                         rc = -EPERM;
5307                         break;
5308
5309                 case PACKET_WREG_32:
5310                 case PACKET_WREG_BULK:
5311                 case PACKET_MSG_LONG:
5312                 case PACKET_MSG_SHORT:
5313                 case PACKET_REPEAT:
5314                 case PACKET_FENCE:
5315                 case PACKET_NOP:
5316                 case PACKET_ARB_POINT:
5317                 case PACKET_LOAD_AND_EXE:
5318                         memcpy(kernel_pkt, user_pkt, pkt_size);
5319                         cb_patched_cur_length += pkt_size;
5320                         break;
5321
5322                 default:
5323                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5324                                 pkt_id);
5325                         rc = -EINVAL;
5326                         break;
5327                 }
5328
5329                 if (rc)
5330                         break;
5331         }
5332
5333         return rc;
5334 }
5335
5336 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5337                 struct hl_cs_parser *parser)
5338 {
5339         u64 handle;
5340         u32 patched_cb_size;
5341         struct hl_cb *user_cb;
5342         int rc;
5343
5344         /*
5345          * The new CB should have space at the end for two MSG_PROT packets:
5346          * 1. Optional NOP padding for cacheline alignment
5347          * 2. A packet that will act as a completion packet
5348          * 3. A packet that will generate MSI interrupt
5349          */
5350         if (parser->completion)
5351                 parser->patched_cb_size = parser->user_cb_size +
5352                                 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5353         else
5354                 parser->patched_cb_size = parser->user_cb_size;
5355
5356         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5357                                 parser->patched_cb_size, false, false,
5358                                 &handle);
5359
5360         if (rc) {
5361                 dev_err(hdev->dev,
5362                         "Failed to allocate patched CB for DMA CS %d\n",
5363                         rc);
5364                 return rc;
5365         }
5366
5367         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5368         /* hl_cb_get should never fail */
5369         if (!parser->patched_cb) {
5370                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5371                 rc = -EFAULT;
5372                 goto out;
5373         }
5374
5375         /*
5376          * We are protected from overflow because the check
5377          * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5378          * in the common code. That check is done only if is_kernel_allocated_cb is true.
5379          *
5380          * There is no option to reach here without going through that check because:
5381          * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5382          *    an external queue.
5383          * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5384          */
5385         memcpy(parser->patched_cb->kernel_address,
5386                 parser->user_cb->kernel_address,
5387                 parser->user_cb_size);
5388
5389         patched_cb_size = parser->patched_cb_size;
5390
5391         /* Validate patched CB instead of user CB */
5392         user_cb = parser->user_cb;
5393         parser->user_cb = parser->patched_cb;
5394         rc = gaudi_validate_cb(hdev, parser, true);
5395         parser->user_cb = user_cb;
5396
5397         if (rc) {
5398                 hl_cb_put(parser->patched_cb);
5399                 goto out;
5400         }
5401
5402         if (patched_cb_size != parser->patched_cb_size) {
5403                 dev_err(hdev->dev, "user CB size mismatch\n");
5404                 hl_cb_put(parser->patched_cb);
5405                 rc = -EINVAL;
5406                 goto out;
5407         }
5408
5409 out:
5410         /*
5411          * Always call cb destroy here because we still have 1 reference
5412          * to it by calling cb_get earlier. After the job will be completed,
5413          * cb_put will release it, but here we want to remove it from the
5414          * idr
5415          */
5416         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5417
5418         return rc;
5419 }
5420
5421 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5422                 struct hl_cs_parser *parser)
5423 {
5424         u64 handle;
5425         int rc;
5426
5427         rc = gaudi_validate_cb(hdev, parser, false);
5428
5429         if (rc)
5430                 goto free_userptr;
5431
5432         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5433                                 parser->patched_cb_size, false, false,
5434                                 &handle);
5435         if (rc) {
5436                 dev_err(hdev->dev,
5437                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5438                 goto free_userptr;
5439         }
5440
5441         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5442         /* hl_cb_get should never fail here */
5443         if (!parser->patched_cb) {
5444                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5445                 rc = -EFAULT;
5446                 goto out;
5447         }
5448
5449         rc = gaudi_patch_cb(hdev, parser);
5450
5451         if (rc)
5452                 hl_cb_put(parser->patched_cb);
5453
5454 out:
5455         /*
5456          * Always call cb destroy here because we still have 1 reference
5457          * to it by calling cb_get earlier. After the job will be completed,
5458          * cb_put will release it, but here we want to remove it from the
5459          * idr
5460          */
5461         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5462
5463 free_userptr:
5464         if (rc)
5465                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5466         return rc;
5467 }
5468
5469 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5470                                         struct hl_cs_parser *parser)
5471 {
5472         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5473         struct gaudi_device *gaudi = hdev->asic_specific;
5474         u32 nic_queue_offset, nic_mask_q_id;
5475
5476         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5477                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5478                 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5479                 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5480
5481                 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5482                         dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5483                         return -EINVAL;
5484                 }
5485         }
5486
5487         /* For internal queue jobs just check if CB address is valid */
5488         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5489                                         parser->user_cb_size,
5490                                         asic_prop->sram_user_base_address,
5491                                         asic_prop->sram_end_address))
5492                 return 0;
5493
5494         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5495                                         parser->user_cb_size,
5496                                         asic_prop->dram_user_base_address,
5497                                         asic_prop->dram_end_address))
5498                 return 0;
5499
5500         /* PMMU and HPMMU addresses are equal, check only one of them */
5501         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5502                                         parser->user_cb_size,
5503                                         asic_prop->pmmu.start_addr,
5504                                         asic_prop->pmmu.end_addr))
5505                 return 0;
5506
5507         dev_err(hdev->dev,
5508                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5509                 parser->user_cb, parser->user_cb_size);
5510
5511         return -EFAULT;
5512 }
5513
5514 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5515 {
5516         struct gaudi_device *gaudi = hdev->asic_specific;
5517
5518         if (parser->queue_type == QUEUE_TYPE_INT)
5519                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5520
5521         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5522                 return gaudi_parse_cb_mmu(hdev, parser);
5523         else
5524                 return gaudi_parse_cb_no_mmu(hdev, parser);
5525 }
5526
5527 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5528                                 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5529                                 u32 msi_vec, bool eb)
5530 {
5531         struct packet_msg_prot *cq_pkt;
5532         struct packet_nop *cq_padding;
5533         u64 msi_addr;
5534         u32 tmp;
5535
5536         cq_padding = kernel_address + original_len;
5537         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5538
5539         while ((void *)cq_padding < (void *)cq_pkt) {
5540                 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5541                 cq_padding++;
5542         }
5543
5544         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5545         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5546
5547         if (eb)
5548                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5549
5550         cq_pkt->ctl = cpu_to_le32(tmp);
5551         cq_pkt->value = cpu_to_le32(cq_val);
5552         cq_pkt->addr = cpu_to_le64(cq_addr);
5553
5554         cq_pkt++;
5555
5556         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5557         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5558         cq_pkt->ctl = cpu_to_le32(tmp);
5559         cq_pkt->value = cpu_to_le32(1);
5560         msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5561         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5562 }
5563
5564 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5565 {
5566         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5567 }
5568
5569 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5570                                         u32 size, u64 val)
5571 {
5572         struct packet_lin_dma *lin_dma_pkt;
5573         struct hl_cs_job *job;
5574         u32 cb_size, ctl, err_cause;
5575         struct hl_cb *cb;
5576         int rc;
5577
5578         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5579         if (!cb)
5580                 return -EFAULT;
5581
5582         lin_dma_pkt = cb->kernel_address;
5583         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5584         cb_size = sizeof(*lin_dma_pkt);
5585
5586         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5587         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5588         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5589         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5590         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5591
5592         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5593         lin_dma_pkt->src_addr = cpu_to_le64(val);
5594         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5595         lin_dma_pkt->tsize = cpu_to_le32(size);
5596
5597         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5598         if (!job) {
5599                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5600                 rc = -ENOMEM;
5601                 goto release_cb;
5602         }
5603
5604         /* Verify DMA is OK */
5605         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5606         if (err_cause && !hdev->init_done) {
5607                 dev_dbg(hdev->dev,
5608                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5609                         err_cause);
5610                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5611         }
5612
5613         job->id = 0;
5614         job->user_cb = cb;
5615         atomic_inc(&job->user_cb->cs_cnt);
5616         job->user_cb_size = cb_size;
5617         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5618         job->patched_cb = job->user_cb;
5619         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5620
5621         hl_debugfs_add_job(hdev, job);
5622
5623         rc = gaudi_send_job_on_qman0(hdev, job);
5624         hl_debugfs_remove_job(hdev, job);
5625         kfree(job);
5626         atomic_dec(&cb->cs_cnt);
5627
5628         /* Verify DMA is OK */
5629         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5630         if (err_cause) {
5631                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5632                 rc = -EIO;
5633                 if (!hdev->init_done) {
5634                         dev_dbg(hdev->dev,
5635                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5636                                 err_cause);
5637                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5638                 }
5639         }
5640
5641 release_cb:
5642         hl_cb_put(cb);
5643         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5644
5645         return rc;
5646 }
5647
5648 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5649                                         u32 num_regs, u32 val)
5650 {
5651         struct packet_msg_long *pkt;
5652         struct hl_cs_job *job;
5653         u32 cb_size, ctl;
5654         struct hl_cb *cb;
5655         int i, rc;
5656
5657         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5658
5659         if (cb_size > SZ_2M) {
5660                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5661                 return -ENOMEM;
5662         }
5663
5664         cb = hl_cb_kernel_create(hdev, cb_size, false);
5665         if (!cb)
5666                 return -EFAULT;
5667
5668         pkt = cb->kernel_address;
5669
5670         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5671         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5672         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5673         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5674         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5675
5676         for (i = 0; i < num_regs ; i++, pkt++) {
5677                 pkt->ctl = cpu_to_le32(ctl);
5678                 pkt->value = cpu_to_le32(val);
5679                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5680         }
5681
5682         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5683         if (!job) {
5684                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5685                 rc = -ENOMEM;
5686                 goto release_cb;
5687         }
5688
5689         job->id = 0;
5690         job->user_cb = cb;
5691         atomic_inc(&job->user_cb->cs_cnt);
5692         job->user_cb_size = cb_size;
5693         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5694         job->patched_cb = job->user_cb;
5695         job->job_cb_size = cb_size;
5696
5697         hl_debugfs_add_job(hdev, job);
5698
5699         rc = gaudi_send_job_on_qman0(hdev, job);
5700         hl_debugfs_remove_job(hdev, job);
5701         kfree(job);
5702         atomic_dec(&cb->cs_cnt);
5703
5704 release_cb:
5705         hl_cb_put(cb);
5706         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5707
5708         return rc;
5709 }
5710
5711 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5712 {
5713         u64 base_addr;
5714         u32 num_regs;
5715         int rc;
5716
5717         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5718         num_regs = NUM_OF_SOB_IN_BLOCK;
5719         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5720         if (rc) {
5721                 dev_err(hdev->dev, "failed resetting SM registers");
5722                 return -ENOMEM;
5723         }
5724
5725         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5726         num_regs = NUM_OF_SOB_IN_BLOCK;
5727         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5728         if (rc) {
5729                 dev_err(hdev->dev, "failed resetting SM registers");
5730                 return -ENOMEM;
5731         }
5732
5733         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5734         num_regs = NUM_OF_SOB_IN_BLOCK;
5735         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5736         if (rc) {
5737                 dev_err(hdev->dev, "failed resetting SM registers");
5738                 return -ENOMEM;
5739         }
5740
5741         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5742         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5743         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5744         if (rc) {
5745                 dev_err(hdev->dev, "failed resetting SM registers");
5746                 return -ENOMEM;
5747         }
5748
5749         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5750         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5751         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5752         if (rc) {
5753                 dev_err(hdev->dev, "failed resetting SM registers");
5754                 return -ENOMEM;
5755         }
5756
5757         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5758         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5759         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5760         if (rc) {
5761                 dev_err(hdev->dev, "failed resetting SM registers");
5762                 return -ENOMEM;
5763         }
5764
5765         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5766                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5767         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5768         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5769         if (rc) {
5770                 dev_err(hdev->dev, "failed resetting SM registers");
5771                 return -ENOMEM;
5772         }
5773
5774         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5775                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5776         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5777         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5778         if (rc) {
5779                 dev_err(hdev->dev, "failed resetting SM registers");
5780                 return -ENOMEM;
5781         }
5782
5783         return 0;
5784 }
5785
5786 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5787 {
5788         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5789                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5790         int i;
5791
5792         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5793                 u64 sob_addr = CFG_BASE +
5794                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5795                                 (i * sob_delta);
5796                 u32 dma_offset = i * DMA_CORE_OFFSET;
5797
5798                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5799                                 lower_32_bits(sob_addr));
5800                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5801                                 upper_32_bits(sob_addr));
5802                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5803
5804                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5805                  * modified by the user for SRAM reduction
5806                  */
5807                 if (i > 1)
5808                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5809                                                                 0x00000001);
5810         }
5811 }
5812
5813 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5814 {
5815         u32 qman_offset;
5816         int i;
5817
5818         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5819                 qman_offset = i * DMA_QMAN_OFFSET;
5820                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5821         }
5822
5823         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5824                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5825                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5826         }
5827
5828         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5829                 qman_offset = i * TPC_QMAN_OFFSET;
5830                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5831         }
5832
5833         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5834                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5835                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5836                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5837         }
5838 }
5839
5840 static int gaudi_restore_user_registers(struct hl_device *hdev)
5841 {
5842         int rc;
5843
5844         rc = gaudi_restore_sm_registers(hdev);
5845         if (rc)
5846                 return rc;
5847
5848         gaudi_restore_dma_registers(hdev);
5849         gaudi_restore_qm_registers(hdev);
5850
5851         return 0;
5852 }
5853
5854 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5855 {
5856         return 0;
5857 }
5858
5859 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5860 {
5861         u32 size = hdev->asic_prop.mmu_pgt_size +
5862                         hdev->asic_prop.mmu_cache_mng_size;
5863         struct gaudi_device *gaudi = hdev->asic_specific;
5864         u64 addr = hdev->asic_prop.mmu_pgt_addr;
5865
5866         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5867                 return 0;
5868
5869         return gaudi_memset_device_memory(hdev, addr, size, 0);
5870 }
5871
5872 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5873 {
5874
5875 }
5876
5877 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5878                                         u32 size_to_dma, dma_addr_t dma_addr)
5879 {
5880         u32 err_cause, val;
5881         u64 dma_offset;
5882         int rc;
5883
5884         dma_offset = dma_id * DMA_CORE_OFFSET;
5885
5886         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5887         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5888         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5889         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5890         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5891         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5892                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5893
5894         rc = hl_poll_timeout(
5895                 hdev,
5896                 mmDMA0_CORE_STS0 + dma_offset,
5897                 val,
5898                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5899                 0,
5900                 1000000);
5901
5902         if (rc) {
5903                 dev_err(hdev->dev,
5904                         "DMA %d timed-out during reading of 0x%llx\n",
5905                         dma_id, addr);
5906                 return -EIO;
5907         }
5908
5909         /* Verify DMA is OK */
5910         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5911         if (err_cause) {
5912                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5913                 dev_dbg(hdev->dev,
5914                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5915                         err_cause);
5916                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5917
5918                 return -EIO;
5919         }
5920
5921         return 0;
5922 }
5923
5924 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5925                                 void *blob_addr)
5926 {
5927         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5928         u32 qm_glbl_sts0, qm_cgm_sts;
5929         u64 dma_offset, qm_offset;
5930         dma_addr_t dma_addr;
5931         void *kernel_addr;
5932         bool is_eng_idle;
5933         int rc = 0, dma_id;
5934
5935         kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5936
5937         if (!kernel_addr)
5938                 return -ENOMEM;
5939
5940         hdev->asic_funcs->hw_queues_lock(hdev);
5941
5942         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5943         dma_offset = dma_id * DMA_CORE_OFFSET;
5944         qm_offset = dma_id * DMA_QMAN_OFFSET;
5945         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5946         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5947         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5948         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5949                       IS_DMA_IDLE(dma_core_sts0);
5950
5951         if (!is_eng_idle) {
5952                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5953                 dma_offset = dma_id * DMA_CORE_OFFSET;
5954                 qm_offset = dma_id * DMA_QMAN_OFFSET;
5955                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5956                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5957                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5958                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5959                               IS_DMA_IDLE(dma_core_sts0);
5960
5961                 if (!is_eng_idle) {
5962                         dev_err_ratelimited(hdev->dev,
5963                                 "Can't read via DMA because it is BUSY\n");
5964                         rc = -EAGAIN;
5965                         goto out;
5966                 }
5967         }
5968
5969         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5970         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5971                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5972
5973         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5974          * using the compute ctx ASID, if exists. If not, use the kernel ctx
5975          * ASID
5976          */
5977         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5978
5979         /* Verify DMA is OK */
5980         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5981         if (err_cause) {
5982                 dev_dbg(hdev->dev,
5983                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5984                         err_cause);
5985                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5986         }
5987
5988         pos = 0;
5989         size_left = size;
5990         size_to_dma = SZ_2M;
5991
5992         while (size_left > 0) {
5993
5994                 if (size_left < SZ_2M)
5995                         size_to_dma = size_left;
5996
5997                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5998                                                 dma_addr);
5999                 if (rc)
6000                         break;
6001
6002                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6003
6004                 if (size_left <= SZ_2M)
6005                         break;
6006
6007                 pos += SZ_2M;
6008                 addr += SZ_2M;
6009                 size_left -= SZ_2M;
6010         }
6011
6012         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6013          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6014          * ASID
6015          */
6016         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6017                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6018
6019         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6020
6021 out:
6022         hdev->asic_funcs->hw_queues_unlock(hdev);
6023
6024         hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6025
6026         return rc;
6027 }
6028
6029 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6030 {
6031         struct gaudi_device *gaudi = hdev->asic_specific;
6032
6033         if (hdev->reset_info.hard_reset_pending)
6034                 return U64_MAX;
6035
6036         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6037                         (addr - gaudi->hbm_bar_cur_addr));
6038 }
6039
6040 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6041 {
6042         struct gaudi_device *gaudi = hdev->asic_specific;
6043
6044         if (hdev->reset_info.hard_reset_pending)
6045                 return;
6046
6047         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6048                         (addr - gaudi->hbm_bar_cur_addr));
6049 }
6050
6051 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6052 {
6053         /* mask to zero the MMBP and ASID bits */
6054         WREG32_AND(reg, ~0x7FF);
6055         WREG32_OR(reg, asid);
6056 }
6057
6058 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6059 {
6060         struct gaudi_device *gaudi = hdev->asic_specific;
6061
6062         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6063                 return;
6064
6065         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6066                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6067                 return;
6068         }
6069
6070         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6071         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6072         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6073         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6074         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6075
6076         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6077         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6078         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6079         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6080         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6081
6082         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6083         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6084         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6085         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6086         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6087
6088         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6089         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6090         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6091         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6092         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6093
6094         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6095         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6096         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6097         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6098         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6099
6100         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6101         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6102         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6103         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6104         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6105
6106         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6107         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6108         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6109         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6110         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6111
6112         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6113         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6114         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6115         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6116         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6117
6118         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6119         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6120         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6121         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6122         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6123         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6124         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6125         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6126
6127         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6128         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6129         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6130         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6131         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6132         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6133         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6134
6135         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6136         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6137         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6138         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6139         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6140         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6141         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6142
6143         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6144         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6145         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6146         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6147         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6148         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6149         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6150
6151         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6152         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6153         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6154         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6155         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6156         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6157         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6158
6159         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6160         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6161         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6162         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6163         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6165         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6166
6167         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6168         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6169         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6170         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6171         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6172         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6173         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6174
6175         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6176         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6177         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6178         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6179         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6180         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6181         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6182
6183         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6184         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6185         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6186         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6187         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6188         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6189         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6190
6191         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6192         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6193         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6194         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6195         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6196         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6197         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6198         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6199         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6200         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6201
6202         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6203         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6204         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6205         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6206         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6207         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6208         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6209         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6210         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6211         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6212         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6213         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6214
6215         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6216                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6217                                 asid);
6218                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6219                                 asid);
6220                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6221                                 asid);
6222                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6223                                 asid);
6224                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6225                                 asid);
6226         }
6227
6228         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6229                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6230                                 asid);
6231                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6232                                 asid);
6233                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6234                                 asid);
6235                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6236                                 asid);
6237                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6238                                 asid);
6239         }
6240
6241         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6242                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6243                                 asid);
6244                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6245                                 asid);
6246                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6247                                 asid);
6248                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6249                                 asid);
6250                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6251                                 asid);
6252         }
6253
6254         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6255                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6256                                 asid);
6257                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6258                                 asid);
6259                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6260                                 asid);
6261                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6262                                 asid);
6263                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6264                                 asid);
6265         }
6266
6267         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6268                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6269                                 asid);
6270                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6271                                 asid);
6272                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6273                                 asid);
6274                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6275                                 asid);
6276                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6277                                 asid);
6278         }
6279
6280         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6281                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6282                                 asid);
6283                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6284                                 asid);
6285                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6286                                 asid);
6287                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6288                                 asid);
6289                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6290                                 asid);
6291         }
6292
6293         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6294                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6295                                 asid);
6296                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6297                                 asid);
6298                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6299                                 asid);
6300                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6301                                 asid);
6302                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6303                                 asid);
6304         }
6305
6306         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6307                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6308                                 asid);
6309                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6310                                 asid);
6311                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6312                                 asid);
6313                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6314                                 asid);
6315                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6316                                 asid);
6317         }
6318
6319         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6320                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6321                                 asid);
6322                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6323                                 asid);
6324                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6325                                 asid);
6326                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6327                                 asid);
6328                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6329                                 asid);
6330         }
6331
6332         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6333                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6334                                 asid);
6335                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6336                                 asid);
6337                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6338                                 asid);
6339                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6340                                 asid);
6341                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6342                                 asid);
6343         }
6344
6345         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6346         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6347 }
6348
6349 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6350                 struct hl_cs_job *job)
6351 {
6352         struct packet_msg_prot *fence_pkt;
6353         u32 *fence_ptr;
6354         dma_addr_t fence_dma_addr;
6355         struct hl_cb *cb;
6356         u32 tmp, timeout, dma_offset;
6357         int rc;
6358
6359         if (hdev->pldm)
6360                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6361         else
6362                 timeout = HL_DEVICE_TIMEOUT_USEC;
6363
6364         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6365         if (!fence_ptr) {
6366                 dev_err(hdev->dev,
6367                         "Failed to allocate fence memory for QMAN0\n");
6368                 return -ENOMEM;
6369         }
6370
6371         cb = job->patched_cb;
6372
6373         fence_pkt = cb->kernel_address +
6374                         job->job_cb_size - sizeof(struct packet_msg_prot);
6375
6376         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6377         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6378         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6379
6380         fence_pkt->ctl = cpu_to_le32(tmp);
6381         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6382         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6383
6384         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6385
6386         WREG32(mmDMA0_CORE_PROT + dma_offset,
6387                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6388
6389         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6390                                         job->job_cb_size, cb->bus_address);
6391         if (rc) {
6392                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6393                 goto free_fence_ptr;
6394         }
6395
6396         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6397                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6398                                 timeout, true);
6399
6400         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6401
6402         if (rc == -ETIMEDOUT) {
6403                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6404                 goto free_fence_ptr;
6405         }
6406
6407 free_fence_ptr:
6408         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6409
6410         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6411         return rc;
6412 }
6413
6414 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6415 {
6416         if (event_type >= GAUDI_EVENT_SIZE)
6417                 goto event_not_supported;
6418
6419         if (!gaudi_irq_map_table[event_type].valid)
6420                 goto event_not_supported;
6421
6422         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6423
6424         return;
6425
6426 event_not_supported:
6427         snprintf(desc, size, "N/A");
6428 }
6429
6430 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6431                                                         bool is_write, u16 *engine_id_1,
6432                                                         u16 *engine_id_2)
6433 {
6434         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6435
6436         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6437                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6438
6439         switch (x_y) {
6440         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6441         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6442                 dma_id[0] = 0;
6443                 dma_id[1] = 2;
6444                 break;
6445         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6446         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6447                 dma_id[0] = 1;
6448                 dma_id[1] = 3;
6449                 break;
6450         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6451         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6452                 dma_id[0] = 4;
6453                 dma_id[1] = 6;
6454                 break;
6455         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6456         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6457                 dma_id[0] = 5;
6458                 dma_id[1] = 7;
6459                 break;
6460         default:
6461                 goto unknown_initiator;
6462         }
6463
6464         for (i = 0 ; i < 2 ; i++) {
6465                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6466                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6467         }
6468
6469         switch (x_y) {
6470         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6471         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6472                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6473                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6474                         return "DMA0";
6475                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6476                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6477                         return "DMA2";
6478                 } else {
6479                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6480                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6481                         return "DMA0 or DMA2";
6482                 }
6483         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6484         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6485                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6486                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6487                         return "DMA1";
6488                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6489                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6490                         return "DMA3";
6491                 } else {
6492                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6493                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6494                         return "DMA1 or DMA3";
6495                 }
6496         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6497         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6498                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6499                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6500                         return "DMA4";
6501                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6502                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6503                         return "DMA6";
6504                 } else {
6505                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6506                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6507                         return "DMA4 or DMA6";
6508                 }
6509         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6510         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6511                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6512                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6513                         return "DMA5";
6514                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6515                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6516                         return "DMA7";
6517                 } else {
6518                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6519                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6520                         return "DMA5 or DMA7";
6521                 }
6522         }
6523
6524 unknown_initiator:
6525         return "unknown initiator";
6526 }
6527
6528 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6529                                                         u16 *engine_id_1, u16 *engine_id_2)
6530 {
6531         u32 val, x_y, axi_id;
6532
6533         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6534                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6535         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6536                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6537         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6538                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6539
6540         switch (x_y) {
6541         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6542                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6543                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6544                         return "TPC0";
6545                 }
6546                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6547                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6548                         return "NIC0";
6549                 }
6550                 break;
6551         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6552                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6553                 return "TPC1";
6554         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6555         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6556                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6557                 return "MME0";
6558         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6559         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6560                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6561                 return "MME1";
6562         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6563                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6564                 return "TPC2";
6565         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6566                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6567                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6568                         return "TPC3";
6569                 }
6570                 /* PCI, CPU or PSOC does not have engine id*/
6571                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6572                         return "PCI";
6573                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6574                         return "CPU";
6575                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6576                         return "PSOC";
6577                 break;
6578         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6579         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6580         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6581         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6582         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6583         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6584         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6585         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6586                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6587                                 engine_id_1, engine_id_2);
6588         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6589                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6590                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6591                         return "TPC4";
6592                 }
6593                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6594                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6595                         return "NIC1";
6596                 }
6597                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6598                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6599                         return "NIC2";
6600                 }
6601                 break;
6602         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6603                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6604                 return "TPC5";
6605         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6606         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6607                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6608                 return "MME2";
6609         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6610         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6611                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6612                 return "MME3";
6613         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6614                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6615                 return "TPC6";
6616         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6617                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6618                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6619                         return "TPC7";
6620                 }
6621                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6622                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6623                         return "NIC4";
6624                 }
6625                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6626                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6627                         return "NIC5";
6628                 }
6629                 break;
6630         default:
6631                 break;
6632         }
6633
6634         dev_err(hdev->dev,
6635                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6636                 val,
6637                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6638                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6639                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6640                         RAZWI_INITIATOR_AXI_ID_MASK);
6641
6642         return "unknown initiator";
6643 }
6644
6645 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6646                                                 u16 *engine_id_2, bool *is_read, bool *is_write)
6647 {
6648
6649         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6650                 dev_err_ratelimited(hdev->dev,
6651                         "RAZWI event caused by illegal write of %s\n",
6652                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6653                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6654                 *is_write = true;
6655         }
6656
6657         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6658                 dev_err_ratelimited(hdev->dev,
6659                         "RAZWI event caused by illegal read of %s\n",
6660                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6661                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6662                 *is_read = true;
6663         }
6664 }
6665
6666 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6667 {
6668         struct gaudi_device *gaudi = hdev->asic_specific;
6669         u32 val;
6670
6671         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6672                 return;
6673
6674         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6675         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6676                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6677                 *addr <<= 32;
6678                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6679
6680                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6681                 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6682
6683                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6684         }
6685
6686         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6687         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6688                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6689                 *addr <<= 32;
6690                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6691
6692                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6693
6694                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6695         }
6696 }
6697
6698 /*
6699  *  +-------------------+------------------------------------------------------+
6700  *  | Configuration Reg |                     Description                      |
6701  *  |      Address      |                                                      |
6702  *  +-------------------+------------------------------------------------------+
6703  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6704  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6705  *  |                   |0xF34 memory wrappers 63:32                           |
6706  *  |                   |0xF38 memory wrappers 95:64                           |
6707  *  |                   |0xF3C memory wrappers 127:96                          |
6708  *  +-------------------+------------------------------------------------------+
6709  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6710  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6711  *  |                   |0xF44 memory wrappers 63:32                           |
6712  *  |                   |0xF48 memory wrappers 95:64                           |
6713  *  |                   |0xF4C memory wrappers 127:96                          |
6714  *  +-------------------+------------------------------------------------------+
6715  */
6716 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6717                 struct ecc_info_extract_params *params, u64 *ecc_address,
6718                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6719 {
6720         u32 i, num_mem_regs, reg, err_bit;
6721         u64 err_addr, err_word = 0;
6722
6723         num_mem_regs = params->num_memories / 32 +
6724                         ((params->num_memories % 32) ? 1 : 0);
6725
6726         if (params->block_address >= CFG_BASE)
6727                 params->block_address -= CFG_BASE;
6728
6729         if (params->derr)
6730                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6731         else
6732                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6733
6734         /* Set invalid wrapper index */
6735         *memory_wrapper_idx = 0xFF;
6736
6737         /* Iterate through memory wrappers, a single bit must be set */
6738         for (i = 0 ; i < num_mem_regs ; i++) {
6739                 err_addr += i * 4;
6740                 err_word = RREG32(err_addr);
6741                 if (err_word) {
6742                         err_bit = __ffs(err_word);
6743                         *memory_wrapper_idx = err_bit + (32 * i);
6744                         break;
6745                 }
6746         }
6747
6748         if (*memory_wrapper_idx == 0xFF) {
6749                 dev_err(hdev->dev, "ECC error information cannot be found\n");
6750                 return -EINVAL;
6751         }
6752
6753         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6754                         *memory_wrapper_idx);
6755
6756         *ecc_address =
6757                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6758         *ecc_syndrom =
6759                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6760
6761         /* Clear error indication */
6762         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6763         if (params->derr)
6764                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6765         else
6766                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6767
6768         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6769
6770         return 0;
6771 }
6772
6773 /*
6774  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6775  *
6776  * @idx: the current pi/ci value
6777  * @q_len: the queue length (power of 2)
6778  *
6779  * @return the cyclically decremented index
6780  */
6781 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6782 {
6783         u32 mask = q_len - 1;
6784
6785         /*
6786          * modular decrement is equivalent to adding (queue_size -1)
6787          * later we take LSBs to make sure the value is in the
6788          * range [0, queue_len - 1]
6789          */
6790         return (idx + q_len - 1) & mask;
6791 }
6792
6793 /**
6794  * gaudi_handle_sw_config_stream_data - print SW config stream data
6795  *
6796  * @hdev: pointer to the habanalabs device structure
6797  * @stream: the QMAN's stream
6798  * @qman_base: base address of QMAN registers block
6799  * @event_mask: mask of the last events occurred
6800  */
6801 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6802                                                 u64 qman_base, u64 event_mask)
6803 {
6804         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6805         u32 cq_ptr_lo_off, size;
6806
6807         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6808
6809         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6810                                                 stream * cq_ptr_lo_off;
6811         cq_ptr_hi = cq_ptr_lo +
6812                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6813         cq_tsize = cq_ptr_lo +
6814                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6815
6816         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6817         size = RREG32(cq_tsize);
6818         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6819                                                         stream, cq_ptr, size);
6820
6821         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6822                 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6823                 hdev->captured_err_info.undef_opcode.cq_size = size;
6824                 hdev->captured_err_info.undef_opcode.stream_id = stream;
6825         }
6826 }
6827
6828 /**
6829  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6830  *
6831  * @hdev: pointer to the habanalabs device structure
6832  * @qid_base: first QID of the QMAN (out of 4 streams)
6833  * @stream: the QMAN's stream
6834  * @qman_base: base address of QMAN registers block
6835  * @event_mask: mask of the last events occurred
6836  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6837  */
6838 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6839                                                 u32 stream, u64 qman_base,
6840                                                 u64 event_mask,
6841                                                 bool pr_sw_conf)
6842 {
6843         u32 ci, qm_ci_stream_off, queue_len;
6844         struct hl_hw_queue *q;
6845         u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6846         int i;
6847
6848         q = &hdev->kernel_queues[qid_base + stream];
6849
6850         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6851         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6852                                                 stream * qm_ci_stream_off;
6853
6854         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6855                                         q->int_queue_len : HL_QUEUE_LENGTH;
6856
6857         hdev->asic_funcs->hw_queues_lock(hdev);
6858
6859         if (pr_sw_conf)
6860                 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6861
6862         ci = RREG32(pq_ci);
6863
6864         /* we should start printing form ci -1 */
6865         ci = gaudi_queue_idx_dec(ci, queue_len);
6866         memset(addr, 0, sizeof(addr));
6867
6868         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6869                 struct hl_bd *bd;
6870                 u32 len;
6871
6872                 bd = q->kernel_address;
6873                 bd += ci;
6874
6875                 len = le32_to_cpu(bd->len);
6876                 /* len 0 means uninitialized entry- break */
6877                 if (!len)
6878                         break;
6879
6880                 addr[i] = le64_to_cpu(bd->ptr);
6881
6882                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6883                                                         stream, ci, addr[i], len);
6884
6885                 /* get previous ci, wrap if needed */
6886                 ci = gaudi_queue_idx_dec(ci, queue_len);
6887         }
6888
6889         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6890                 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6891                 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6892
6893                 if (arr_idx == 0) {
6894                         undef_opcode->timestamp = ktime_get();
6895                         undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6896                 }
6897
6898                 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6899                 undef_opcode->cb_addr_streams_len++;
6900         }
6901
6902         hdev->asic_funcs->hw_queues_unlock(hdev);
6903 }
6904
6905 /**
6906  * handle_qman_data_on_err - extract QMAN data on error
6907  *
6908  * @hdev: pointer to the habanalabs device structure
6909  * @qid_base: first QID of the QMAN (out of 4 streams)
6910  * @stream: the QMAN's stream
6911  * @qman_base: base address of QMAN registers block
6912  * @event_mask: mask of the last events occurred
6913  *
6914  * This function attempt to exatract as much data as possible on QMAN error.
6915  * On upper CP print the SW config stream data and last 8 PQEs.
6916  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6917  */
6918 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6919                                    u32 stream, u64 qman_base, u64 event_mask)
6920 {
6921         u32 i;
6922
6923         if (stream != QMAN_STREAMS) {
6924                 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6925                         qman_base, event_mask, true);
6926                 return;
6927         }
6928
6929         /* handle Lower-CP */
6930         gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6931
6932         for (i = 0; i < QMAN_STREAMS; i++)
6933                 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6934                         qman_base, event_mask, false);
6935 }
6936
6937 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6938                                           const char *qm_name,
6939                                           u64 qman_base,
6940                                           u32 qid_base,
6941                                           u64 *event_mask)
6942 {
6943         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6944         u64 glbl_sts_addr, arb_err_addr;
6945         char reg_desc[32];
6946
6947         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6948         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6949
6950         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6951         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6952                 glbl_sts_clr_val = 0;
6953                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6954
6955                 if (!glbl_sts_val)
6956                         continue;
6957
6958                 if (i == QMAN_STREAMS)
6959                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6960                 else
6961                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6962
6963                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6964                         if (glbl_sts_val & BIT(j)) {
6965                                 dev_err_ratelimited(hdev->dev,
6966                                                 "%s %s. err cause: %s\n",
6967                                                 qm_name, reg_desc,
6968                                                 gaudi_qman_error_cause[j]);
6969                                 glbl_sts_clr_val |= BIT(j);
6970                         }
6971                 }
6972                 /* check for undefined opcode */
6973                 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6974                                 hdev->captured_err_info.undef_opcode.write_enable) {
6975                         memset(&hdev->captured_err_info.undef_opcode, 0,
6976                                                 sizeof(hdev->captured_err_info.undef_opcode));
6977
6978                         hdev->captured_err_info.undef_opcode.write_enable = false;
6979                         *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6980                 }
6981
6982                 /* Write 1 clear errors */
6983                 if (!hdev->stop_on_err)
6984                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6985                 else
6986                         handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6987         }
6988
6989         arb_err_val = RREG32(arb_err_addr);
6990
6991         if (!arb_err_val)
6992                 return;
6993
6994         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6995                 if (arb_err_val & BIT(j)) {
6996                         dev_err_ratelimited(hdev->dev,
6997                                         "%s ARB_ERR. err cause: %s\n",
6998                                         qm_name,
6999                                         gaudi_qman_arb_error_cause[j]);
7000                 }
7001         }
7002 }
7003
7004 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7005                 struct hl_eq_sm_sei_data *sei_data)
7006 {
7007         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7008
7009         /* Flip the bits as the enum is ordered in the opposite way */
7010         index = (index ^ 0x3) & 0x3;
7011
7012         switch (sei_data->sei_cause) {
7013         case SM_SEI_SO_OVERFLOW:
7014                 dev_err_ratelimited(hdev->dev,
7015                         "%s SEI Error: SOB Group %u overflow/underflow",
7016                         gaudi_sync_manager_names[index],
7017                         le32_to_cpu(sei_data->sei_log));
7018                 break;
7019         case SM_SEI_LBW_4B_UNALIGNED:
7020                 dev_err_ratelimited(hdev->dev,
7021                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7022                         gaudi_sync_manager_names[index],
7023                         le32_to_cpu(sei_data->sei_log));
7024                 break;
7025         case SM_SEI_AXI_RESPONSE_ERR:
7026                 dev_err_ratelimited(hdev->dev,
7027                         "%s SEI Error: AXI ID %u response error",
7028                         gaudi_sync_manager_names[index],
7029                         le32_to_cpu(sei_data->sei_log));
7030                 break;
7031         default:
7032                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7033                                 le32_to_cpu(sei_data->sei_log));
7034                 break;
7035         }
7036 }
7037
7038 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7039                 struct hl_eq_ecc_data *ecc_data)
7040 {
7041         struct ecc_info_extract_params params;
7042         u64 ecc_address = 0, ecc_syndrom = 0;
7043         u8 index, memory_wrapper_idx = 0;
7044         bool extract_info_from_fw;
7045         int rc;
7046
7047         if (hdev->asic_prop.fw_security_enabled) {
7048                 extract_info_from_fw = true;
7049                 goto extract_ecc_info;
7050         }
7051
7052         switch (event_type) {
7053         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7054         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7055                 extract_info_from_fw = true;
7056                 break;
7057         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7058                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7059                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7060                 params.num_memories = 90;
7061                 params.derr = false;
7062                 extract_info_from_fw = false;
7063                 break;
7064         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7065                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7066                 params.block_address =
7067                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7068                 params.num_memories = 90;
7069                 params.derr = true;
7070                 extract_info_from_fw = false;
7071                 break;
7072         case GAUDI_EVENT_MME0_ACC_SERR:
7073         case GAUDI_EVENT_MME1_ACC_SERR:
7074         case GAUDI_EVENT_MME2_ACC_SERR:
7075         case GAUDI_EVENT_MME3_ACC_SERR:
7076                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7077                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7078                 params.num_memories = 128;
7079                 params.derr = false;
7080                 extract_info_from_fw = false;
7081                 break;
7082         case GAUDI_EVENT_MME0_ACC_DERR:
7083         case GAUDI_EVENT_MME1_ACC_DERR:
7084         case GAUDI_EVENT_MME2_ACC_DERR:
7085         case GAUDI_EVENT_MME3_ACC_DERR:
7086                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7087                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7088                 params.num_memories = 128;
7089                 params.derr = true;
7090                 extract_info_from_fw = false;
7091                 break;
7092         case GAUDI_EVENT_MME0_SBAB_SERR:
7093         case GAUDI_EVENT_MME1_SBAB_SERR:
7094         case GAUDI_EVENT_MME2_SBAB_SERR:
7095         case GAUDI_EVENT_MME3_SBAB_SERR:
7096                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7097                 params.block_address =
7098                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7099                 params.num_memories = 33;
7100                 params.derr = false;
7101                 extract_info_from_fw = false;
7102                 break;
7103         case GAUDI_EVENT_MME0_SBAB_DERR:
7104         case GAUDI_EVENT_MME1_SBAB_DERR:
7105         case GAUDI_EVENT_MME2_SBAB_DERR:
7106         case GAUDI_EVENT_MME3_SBAB_DERR:
7107                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7108                 params.block_address =
7109                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7110                 params.num_memories = 33;
7111                 params.derr = true;
7112                 extract_info_from_fw = false;
7113                 break;
7114         default:
7115                 return;
7116         }
7117
7118 extract_ecc_info:
7119         if (extract_info_from_fw) {
7120                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7121                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7122                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7123         } else {
7124                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7125                                 &ecc_syndrom, &memory_wrapper_idx);
7126                 if (rc)
7127                         return;
7128         }
7129
7130         dev_err(hdev->dev,
7131                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7132                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7133 }
7134
7135 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7136 {
7137         u64 qman_base;
7138         char desc[32];
7139         u32 qid_base;
7140         u8 index;
7141
7142         switch (event_type) {
7143         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7144                 index = event_type - GAUDI_EVENT_TPC0_QM;
7145                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7146                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7147                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7148                 break;
7149         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7150                 if (event_type == GAUDI_EVENT_MME0_QM) {
7151                         index = 0;
7152                         qid_base = GAUDI_QUEUE_ID_MME_0_0;
7153                 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7154                         index = 2;
7155                         qid_base = GAUDI_QUEUE_ID_MME_1_0;
7156                 }
7157                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7158                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7159                 break;
7160         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7161                 index = event_type - GAUDI_EVENT_DMA0_QM;
7162                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7163                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7164                 if (index > 1)
7165                         qid_base++;
7166                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7167                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7168                 break;
7169         case GAUDI_EVENT_NIC0_QM0:
7170                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7171                 qman_base = mmNIC0_QM0_BASE;
7172                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7173                 break;
7174         case GAUDI_EVENT_NIC0_QM1:
7175                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7176                 qman_base = mmNIC0_QM1_BASE;
7177                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7178                 break;
7179         case GAUDI_EVENT_NIC1_QM0:
7180                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7181                 qman_base = mmNIC1_QM0_BASE;
7182                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7183                 break;
7184         case GAUDI_EVENT_NIC1_QM1:
7185                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7186                 qman_base = mmNIC1_QM1_BASE;
7187                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7188                 break;
7189         case GAUDI_EVENT_NIC2_QM0:
7190                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7191                 qman_base = mmNIC2_QM0_BASE;
7192                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7193                 break;
7194         case GAUDI_EVENT_NIC2_QM1:
7195                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7196                 qman_base = mmNIC2_QM1_BASE;
7197                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7198                 break;
7199         case GAUDI_EVENT_NIC3_QM0:
7200                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7201                 qman_base = mmNIC3_QM0_BASE;
7202                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7203                 break;
7204         case GAUDI_EVENT_NIC3_QM1:
7205                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7206                 qman_base = mmNIC3_QM1_BASE;
7207                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7208                 break;
7209         case GAUDI_EVENT_NIC4_QM0:
7210                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7211                 qman_base = mmNIC4_QM0_BASE;
7212                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7213                 break;
7214         case GAUDI_EVENT_NIC4_QM1:
7215                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7216                 qman_base = mmNIC4_QM1_BASE;
7217                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7218                 break;
7219         default:
7220                 return;
7221         }
7222
7223         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7224 }
7225
7226 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7227                                         bool check_razwi, u64 *event_mask)
7228 {
7229         bool is_read = false, is_write = false;
7230         u16 engine_id[2], num_of_razwi_eng = 0;
7231         char desc[64] = "";
7232         u64 razwi_addr = 0;
7233         u8 razwi_flags = 0;
7234
7235         /*
7236          * Init engine id by default as not valid and only if razwi initiated from engine with
7237          * engine id it will get valid value.
7238          */
7239         engine_id[0] = HL_RAZWI_NA_ENG_ID;
7240         engine_id[1] = HL_RAZWI_NA_ENG_ID;
7241
7242         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7243         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7244                 event_type, desc);
7245
7246         if (check_razwi) {
7247                 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7248                                                 &is_write);
7249                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7250
7251                 if (is_read)
7252                         razwi_flags |= HL_RAZWI_READ;
7253                 if (is_write)
7254                         razwi_flags |= HL_RAZWI_WRITE;
7255
7256                 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7257                         if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7258                                 num_of_razwi_eng = 2;
7259                         else
7260                                 num_of_razwi_eng = 1;
7261                 }
7262
7263                 if (razwi_flags)
7264                         hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7265                                         razwi_flags, event_mask);
7266         }
7267 }
7268
7269 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7270                                         struct cpucp_pkt_sync_err *sync_err)
7271 {
7272         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7273
7274         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7275                 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7276 }
7277
7278 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7279                                         struct hl_eq_fw_alive *fw_alive)
7280 {
7281         dev_err(hdev->dev,
7282                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7283                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7284                 le32_to_cpu(fw_alive->process_id),
7285                 le32_to_cpu(fw_alive->thread_id),
7286                 le64_to_cpu(fw_alive->uptime_seconds));
7287 }
7288
7289 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7290                                                 void *data)
7291 {
7292         char desc[64] = "", *type;
7293         struct eq_nic_sei_event *eq_nic_sei = data;
7294         u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7295
7296         switch (eq_nic_sei->axi_error_cause) {
7297         case RXB:
7298                 type = "RXB";
7299                 break;
7300         case RXE:
7301                 type = "RXE";
7302                 break;
7303         case TXS:
7304                 type = "TXS";
7305                 break;
7306         case TXE:
7307                 type = "TXE";
7308                 break;
7309         case QPC_RESP:
7310                 type = "QPC_RESP";
7311                 break;
7312         case NON_AXI_ERR:
7313                 type = "NON_AXI_ERR";
7314                 break;
7315         case TMR:
7316                 type = "TMR";
7317                 break;
7318         default:
7319                 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7320                         eq_nic_sei->axi_error_cause);
7321                 type = "N/A";
7322                 break;
7323         }
7324
7325         snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7326                         eq_nic_sei->id);
7327         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7328                 event_type, desc);
7329 }
7330
7331 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7332 {
7333         /* GAUDI doesn't support any reset except hard-reset */
7334         return -EPERM;
7335 }
7336
7337 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7338                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7339 {
7340         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7341         int rc = 0;
7342
7343         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7344                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7345                 if (!hbm_ecc_data) {
7346                         dev_err(hdev->dev, "No FW ECC data");
7347                         return 0;
7348                 }
7349
7350                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7351                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7352                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7353                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7354                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7355                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7356                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7357                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7358                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7359                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7360                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7361                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7362                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7363                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364
7365                 dev_err(hdev->dev,
7366                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7367                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7368                 dev_err(hdev->dev,
7369                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7370                         device, ch, hbm_ecc_data->first_addr, type,
7371                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7372                         hbm_ecc_data->dec_cnt);
7373                 return 0;
7374         }
7375
7376         if (hdev->asic_prop.fw_security_enabled) {
7377                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7378                 return 0;
7379         }
7380
7381         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7382         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7383                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7384                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7385                 if (val) {
7386                         rc = -EIO;
7387                         dev_err(hdev->dev,
7388                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7389                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7390                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7391                                 (val >> 4) & 0x1);
7392
7393                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7394                         dev_err(hdev->dev,
7395                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7396                                 device, ch * 2,
7397                                 RREG32(base + ch * 0x1000 + 0x064),
7398                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7399                                 (val2 & 0xFF0000) >> 16,
7400                                 (val2 & 0xFF000000) >> 24);
7401                 }
7402
7403                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7404                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7405                 if (val) {
7406                         rc = -EIO;
7407                         dev_err(hdev->dev,
7408                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7409                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7410                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7411                                 (val >> 4) & 0x1);
7412
7413                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7414                         dev_err(hdev->dev,
7415                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7416                                 device, ch * 2 + 1,
7417                                 RREG32(base + ch * 0x1000 + 0x074),
7418                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7419                                 (val2 & 0xFF0000) >> 16,
7420                                 (val2 & 0xFF000000) >> 24);
7421                 }
7422
7423                 /* Clear interrupts */
7424                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7425                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7426                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7427                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7428                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7429                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7430         }
7431
7432         val  = RREG32(base + 0x8F30);
7433         val2 = RREG32(base + 0x8F34);
7434         if (val | val2) {
7435                 rc = -EIO;
7436                 dev_err(hdev->dev,
7437                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7438                         device, val, val2);
7439         }
7440         val  = RREG32(base + 0x8F40);
7441         val2 = RREG32(base + 0x8F44);
7442         if (val | val2) {
7443                 rc = -EIO;
7444                 dev_err(hdev->dev,
7445                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7446                         device, val, val2);
7447         }
7448
7449         return rc;
7450 }
7451
7452 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7453 {
7454         switch (hbm_event_type) {
7455         case GAUDI_EVENT_HBM0_SPI_0:
7456         case GAUDI_EVENT_HBM0_SPI_1:
7457                 return 0;
7458         case GAUDI_EVENT_HBM1_SPI_0:
7459         case GAUDI_EVENT_HBM1_SPI_1:
7460                 return 1;
7461         case GAUDI_EVENT_HBM2_SPI_0:
7462         case GAUDI_EVENT_HBM2_SPI_1:
7463                 return 2;
7464         case GAUDI_EVENT_HBM3_SPI_0:
7465         case GAUDI_EVENT_HBM3_SPI_1:
7466                 return 3;
7467         default:
7468                 break;
7469         }
7470
7471         /* Should never happen */
7472         return 0;
7473 }
7474
7475 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7476                                         char *interrupt_name)
7477 {
7478         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7479         bool soft_reset_required = false;
7480
7481         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7482                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7483
7484         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7485                 if (tpc_interrupts_cause & BIT(i)) {
7486                         dev_err_ratelimited(hdev->dev,
7487                                         "TPC%d_%s interrupt cause: %s\n",
7488                                         tpc_id, interrupt_name,
7489                                         gaudi_tpc_interrupts_cause[i]);
7490                         /* If this is QM error, we need to soft-reset */
7491                         if (i == 15)
7492                                 soft_reset_required = true;
7493                 }
7494
7495         /* Clear interrupts */
7496         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7497
7498         return soft_reset_required;
7499 }
7500
7501 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7502 {
7503         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7504 }
7505
7506 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7507 {
7508         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7509 }
7510
7511 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7512 {
7513         ktime_t zero_time = ktime_set(0, 0);
7514
7515         mutex_lock(&hdev->clk_throttling.lock);
7516
7517         switch (event_type) {
7518         case GAUDI_EVENT_FIX_POWER_ENV_S:
7519                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7520                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7521                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7522                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7523                 dev_info_ratelimited(hdev->dev,
7524                         "Clock throttling due to power consumption\n");
7525                 break;
7526
7527         case GAUDI_EVENT_FIX_POWER_ENV_E:
7528                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7529                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7530                 dev_info_ratelimited(hdev->dev,
7531                         "Power envelop is safe, back to optimal clock\n");
7532                 break;
7533
7534         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7535                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7536                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7537                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7538                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7539                 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7540                 dev_info_ratelimited(hdev->dev,
7541                         "Clock throttling due to overheating\n");
7542                 break;
7543
7544         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7545                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7546                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7547                 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7548                 dev_info_ratelimited(hdev->dev,
7549                         "Thermal envelop is safe, back to optimal clock\n");
7550                 break;
7551
7552         default:
7553                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7554                         event_type);
7555                 break;
7556         }
7557
7558         mutex_unlock(&hdev->clk_throttling.lock);
7559 }
7560
7561 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7562 {
7563         struct gaudi_device *gaudi = hdev->asic_specific;
7564         struct hl_info_fw_err_info fw_err_info;
7565         u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7566         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7567         u32 fw_fatal_err_flag = 0, flags = 0;
7568         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7569                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7570         bool reset_required, reset_direct = false;
7571         u8 cause;
7572         int rc;
7573
7574         if (event_type >= GAUDI_EVENT_SIZE) {
7575                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7576                                 event_type, GAUDI_EVENT_SIZE - 1);
7577                 return;
7578         }
7579
7580         gaudi->events_stat[event_type]++;
7581         gaudi->events_stat_aggregate[event_type]++;
7582
7583         switch (event_type) {
7584         case GAUDI_EVENT_PCIE_CORE_DERR:
7585         case GAUDI_EVENT_PCIE_IF_DERR:
7586         case GAUDI_EVENT_PCIE_PHY_DERR:
7587         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7588         case GAUDI_EVENT_MME0_ACC_DERR:
7589         case GAUDI_EVENT_MME0_SBAB_DERR:
7590         case GAUDI_EVENT_MME1_ACC_DERR:
7591         case GAUDI_EVENT_MME1_SBAB_DERR:
7592         case GAUDI_EVENT_MME2_ACC_DERR:
7593         case GAUDI_EVENT_MME2_SBAB_DERR:
7594         case GAUDI_EVENT_MME3_ACC_DERR:
7595         case GAUDI_EVENT_MME3_SBAB_DERR:
7596         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7597                 fallthrough;
7598         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7599         case GAUDI_EVENT_PSOC_MEM_DERR:
7600         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7601         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7602         case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7603         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7604         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7605         case GAUDI_EVENT_MMU_DERR:
7606         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7607                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7608                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7609                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7610                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7611                 goto reset_device;
7612
7613         case GAUDI_EVENT_GIC500:
7614         case GAUDI_EVENT_AXI_ECC:
7615         case GAUDI_EVENT_L2_RAM_ECC:
7616         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7617                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7618                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7619                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7620                 goto reset_device;
7621
7622         case GAUDI_EVENT_HBM0_SPI_0:
7623         case GAUDI_EVENT_HBM1_SPI_0:
7624         case GAUDI_EVENT_HBM2_SPI_0:
7625         case GAUDI_EVENT_HBM3_SPI_0:
7626                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7627                 gaudi_hbm_read_interrupts(hdev,
7628                                 gaudi_hbm_event_to_dev(event_type),
7629                                 &eq_entry->hbm_ecc_data);
7630                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7631                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7632                 goto reset_device;
7633
7634         case GAUDI_EVENT_HBM0_SPI_1:
7635         case GAUDI_EVENT_HBM1_SPI_1:
7636         case GAUDI_EVENT_HBM2_SPI_1:
7637         case GAUDI_EVENT_HBM3_SPI_1:
7638                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7639                 gaudi_hbm_read_interrupts(hdev,
7640                                 gaudi_hbm_event_to_dev(event_type),
7641                                 &eq_entry->hbm_ecc_data);
7642                 hl_fw_unmask_irq(hdev, event_type);
7643                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7644                 break;
7645
7646         case GAUDI_EVENT_TPC0_DEC:
7647         case GAUDI_EVENT_TPC1_DEC:
7648         case GAUDI_EVENT_TPC2_DEC:
7649         case GAUDI_EVENT_TPC3_DEC:
7650         case GAUDI_EVENT_TPC4_DEC:
7651         case GAUDI_EVENT_TPC5_DEC:
7652         case GAUDI_EVENT_TPC6_DEC:
7653         case GAUDI_EVENT_TPC7_DEC:
7654                 /* In TPC DEC event, notify on TPC assertion. While there isn't
7655                  * a specific event for assertion yet, the FW generates TPC DEC event.
7656                  * The SW upper layer will inspect an internal mapped area to indicate
7657                  * if the event is a TPC Assertion or a "real" TPC DEC.
7658                  */
7659                 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7660                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7661                 reset_required = gaudi_tpc_read_interrupts(hdev,
7662                                         tpc_dec_event_to_tpc_id(event_type),
7663                                         "AXI_SLV_DEC_Error");
7664                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7665                 if (reset_required) {
7666                         dev_err(hdev->dev, "reset required due to %s\n",
7667                                 gaudi_irq_map_table[event_type].name);
7668
7669                         reset_direct = true;
7670                         goto reset_device;
7671                 } else {
7672                         hl_fw_unmask_irq(hdev, event_type);
7673                         event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7674                 }
7675                 break;
7676
7677         case GAUDI_EVENT_TPC0_KRN_ERR:
7678         case GAUDI_EVENT_TPC1_KRN_ERR:
7679         case GAUDI_EVENT_TPC2_KRN_ERR:
7680         case GAUDI_EVENT_TPC3_KRN_ERR:
7681         case GAUDI_EVENT_TPC4_KRN_ERR:
7682         case GAUDI_EVENT_TPC5_KRN_ERR:
7683         case GAUDI_EVENT_TPC6_KRN_ERR:
7684         case GAUDI_EVENT_TPC7_KRN_ERR:
7685                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7686                 reset_required = gaudi_tpc_read_interrupts(hdev,
7687                                         tpc_krn_event_to_tpc_id(event_type),
7688                                         "KRN_ERR");
7689                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7690                 if (reset_required) {
7691                         dev_err(hdev->dev, "reset required due to %s\n",
7692                                 gaudi_irq_map_table[event_type].name);
7693
7694                         reset_direct = true;
7695                         goto reset_device;
7696                 } else {
7697                         hl_fw_unmask_irq(hdev, event_type);
7698                         event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7699                 }
7700                 break;
7701
7702         case GAUDI_EVENT_PCIE_CORE_SERR:
7703         case GAUDI_EVENT_PCIE_IF_SERR:
7704         case GAUDI_EVENT_PCIE_PHY_SERR:
7705         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7706         case GAUDI_EVENT_MME0_ACC_SERR:
7707         case GAUDI_EVENT_MME0_SBAB_SERR:
7708         case GAUDI_EVENT_MME1_ACC_SERR:
7709         case GAUDI_EVENT_MME1_SBAB_SERR:
7710         case GAUDI_EVENT_MME2_ACC_SERR:
7711         case GAUDI_EVENT_MME2_SBAB_SERR:
7712         case GAUDI_EVENT_MME3_ACC_SERR:
7713         case GAUDI_EVENT_MME3_SBAB_SERR:
7714         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7715         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7716         case GAUDI_EVENT_PSOC_MEM_SERR:
7717         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7718         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7719         case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7720         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7721         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7722                 fallthrough;
7723         case GAUDI_EVENT_MMU_SERR:
7724                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7725                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7726                 hl_fw_unmask_irq(hdev, event_type);
7727                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7728                 break;
7729
7730         case GAUDI_EVENT_PCIE_DEC:
7731         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7732         case GAUDI_EVENT_PSOC_AXI_DEC:
7733         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7734                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7735                 hl_fw_unmask_irq(hdev, event_type);
7736                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7737                 break;
7738
7739         case GAUDI_EVENT_MMU_PAGE_FAULT:
7740         case GAUDI_EVENT_MMU_WR_PERM:
7741                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7742                 hl_fw_unmask_irq(hdev, event_type);
7743                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7744                 break;
7745
7746         case GAUDI_EVENT_MME0_WBC_RSP:
7747         case GAUDI_EVENT_MME0_SBAB0_RSP:
7748         case GAUDI_EVENT_MME1_WBC_RSP:
7749         case GAUDI_EVENT_MME1_SBAB0_RSP:
7750         case GAUDI_EVENT_MME2_WBC_RSP:
7751         case GAUDI_EVENT_MME2_SBAB0_RSP:
7752         case GAUDI_EVENT_MME3_WBC_RSP:
7753         case GAUDI_EVENT_MME3_SBAB0_RSP:
7754         case GAUDI_EVENT_RAZWI_OR_ADC:
7755         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7756         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7757                 fallthrough;
7758         case GAUDI_EVENT_NIC0_QM0:
7759         case GAUDI_EVENT_NIC0_QM1:
7760         case GAUDI_EVENT_NIC1_QM0:
7761         case GAUDI_EVENT_NIC1_QM1:
7762         case GAUDI_EVENT_NIC2_QM0:
7763         case GAUDI_EVENT_NIC2_QM1:
7764         case GAUDI_EVENT_NIC3_QM0:
7765         case GAUDI_EVENT_NIC3_QM1:
7766         case GAUDI_EVENT_NIC4_QM0:
7767         case GAUDI_EVENT_NIC4_QM1:
7768         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7769         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7770                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7771                 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7772                 hl_fw_unmask_irq(hdev, event_type);
7773                 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7774                 break;
7775
7776         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7777                 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7778                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7779                 goto reset_device;
7780
7781         case GAUDI_EVENT_TPC0_BMON_SPMU:
7782         case GAUDI_EVENT_TPC1_BMON_SPMU:
7783         case GAUDI_EVENT_TPC2_BMON_SPMU:
7784         case GAUDI_EVENT_TPC3_BMON_SPMU:
7785         case GAUDI_EVENT_TPC4_BMON_SPMU:
7786         case GAUDI_EVENT_TPC5_BMON_SPMU:
7787         case GAUDI_EVENT_TPC6_BMON_SPMU:
7788         case GAUDI_EVENT_TPC7_BMON_SPMU:
7789         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7790                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7791                 hl_fw_unmask_irq(hdev, event_type);
7792                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7793                 break;
7794
7795         case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7796                 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7797                 hl_fw_unmask_irq(hdev, event_type);
7798                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7799                 break;
7800
7801         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7802                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7803                 gaudi_print_sm_sei_info(hdev, event_type,
7804                                         &eq_entry->sm_sei_data);
7805                 rc = hl_state_dump(hdev);
7806                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7807                 if (rc)
7808                         dev_err(hdev->dev,
7809                                 "Error during system state dump %d\n", rc);
7810                 hl_fw_unmask_irq(hdev, event_type);
7811                 break;
7812
7813         case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7814                 break;
7815
7816         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7817                 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7818                 hl_fw_unmask_irq(hdev, event_type);
7819                 break;
7820
7821         case GAUDI_EVENT_PSOC_GPIO_U16_0:
7822                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7823                 dev_err(hdev->dev,
7824                         "Received high temp H/W interrupt %d (cause %d)\n",
7825                         event_type, cause);
7826                 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7827                 break;
7828
7829         case GAUDI_EVENT_DEV_RESET_REQ:
7830                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7831                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7832                 goto reset_device;
7833
7834         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7835                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7836                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7837                 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7838                 goto reset_device;
7839
7840         case GAUDI_EVENT_FW_ALIVE_S:
7841                 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7842                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7843                 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7844                 fw_err_info.event_id = event_type;
7845                 fw_err_info.event_mask = &event_mask;
7846                 hl_handle_fw_err(hdev, &fw_err_info);
7847                 goto reset_device;
7848
7849         default:
7850                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7851                                 event_type);
7852                 break;
7853         }
7854
7855         if (event_mask)
7856                 hl_notifier_event_send_all(hdev, event_mask);
7857
7858         return;
7859
7860 reset_device:
7861         reset_required = true;
7862
7863         if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7864                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7865
7866                 /* notify on device unavailable while the reset triggered by fw */
7867                 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7868                                         HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7869         } else if (hdev->hard_reset_on_fw_events) {
7870                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7871                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7872         } else {
7873                 reset_required = false;
7874         }
7875
7876         if (reset_required) {
7877                 /* escalate general hw errors to critical/fatal error */
7878                 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7879                         hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7880
7881                 hl_device_cond_reset(hdev, flags, event_mask);
7882         } else {
7883                 hl_fw_unmask_irq(hdev, event_type);
7884                 /* Notification on occurred event needs to be sent although reset is not executed */
7885                 if (event_mask)
7886                         hl_notifier_event_send_all(hdev, event_mask);
7887         }
7888 }
7889
7890 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7891 {
7892         struct gaudi_device *gaudi = hdev->asic_specific;
7893
7894         if (aggregate) {
7895                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7896                 return gaudi->events_stat_aggregate;
7897         }
7898
7899         *size = (u32) sizeof(gaudi->events_stat);
7900         return gaudi->events_stat;
7901 }
7902
7903 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7904 {
7905         struct gaudi_device *gaudi = hdev->asic_specific;
7906         u32 status, timeout_usec;
7907         int rc;
7908
7909         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7910                 hdev->reset_info.hard_reset_pending)
7911                 return 0;
7912
7913         if (hdev->pldm)
7914                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7915         else
7916                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7917
7918         /* L0 & L1 invalidation */
7919         WREG32(mmSTLB_INV_PS, 3);
7920         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7921         WREG32(mmSTLB_INV_PS, 2);
7922
7923         rc = hl_poll_timeout(
7924                 hdev,
7925                 mmSTLB_INV_PS,
7926                 status,
7927                 !status,
7928                 1000,
7929                 timeout_usec);
7930
7931         WREG32(mmSTLB_INV_SET, 0);
7932
7933         return rc;
7934 }
7935
7936 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7937                                                 bool is_hard, u32 flags,
7938                                                 u32 asid, u64 va, u64 size)
7939 {
7940         /* Treat as invalidate all because there is no range invalidation
7941          * in Gaudi
7942          */
7943         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7944 }
7945
7946 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7947 {
7948         u32 status, timeout_usec;
7949         int rc;
7950
7951         if (hdev->pldm)
7952                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7953         else
7954                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7955
7956         WREG32(MMU_ASID, asid);
7957         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7958         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7959         WREG32(MMU_BUSY, 0x80000000);
7960
7961         rc = hl_poll_timeout(
7962                 hdev,
7963                 MMU_BUSY,
7964                 status,
7965                 !(status & 0x80000000),
7966                 1000,
7967                 timeout_usec);
7968
7969         if (rc) {
7970                 dev_err(hdev->dev,
7971                         "Timeout during MMU hop0 config of asid %d\n", asid);
7972                 return rc;
7973         }
7974
7975         return 0;
7976 }
7977
7978 static int gaudi_send_heartbeat(struct hl_device *hdev)
7979 {
7980         struct gaudi_device *gaudi = hdev->asic_specific;
7981
7982         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7983                 return 0;
7984
7985         return hl_fw_send_heartbeat(hdev);
7986 }
7987
7988 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7989 {
7990         struct gaudi_device *gaudi = hdev->asic_specific;
7991         struct asic_fixed_properties *prop = &hdev->asic_prop;
7992         int rc;
7993
7994         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7995                 return 0;
7996
7997         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7998                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7999                                         mmCPU_BOOT_ERR1);
8000         if (rc)
8001                 return rc;
8002
8003         if (!strlen(prop->cpucp_info.card_name))
8004                 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8005                                 CARD_NAME_MAX_LEN);
8006
8007         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8008
8009         set_default_power_values(hdev);
8010
8011         return 0;
8012 }
8013
8014 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8015                 struct engines_data *e)
8016 {
8017         struct gaudi_device *gaudi = hdev->asic_specific;
8018         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8019         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8020         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8021         unsigned long *mask = (unsigned long *)mask_arr;
8022         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8023         bool is_idle = true, is_eng_idle, is_slave;
8024         u64 offset;
8025         int i, dma_id, port;
8026
8027         if (e)
8028                 hl_engine_data_sprintf(e,
8029                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8030                         "---  -------  ------------  ----------  -------------\n");
8031
8032         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8033                 dma_id = gaudi_dma_assignment[i];
8034                 offset = dma_id * DMA_QMAN_OFFSET;
8035
8036                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8037                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8038                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8039                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8040                                 IS_DMA_IDLE(dma_core_sts0);
8041                 is_idle &= is_eng_idle;
8042
8043                 if (mask && !is_eng_idle)
8044                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8045                 if (e)
8046                         hl_engine_data_sprintf(e, fmt, dma_id,
8047                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8048                                 qm_cgm_sts, dma_core_sts0);
8049         }
8050
8051         if (e)
8052                 hl_engine_data_sprintf(e,
8053                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8054                         "---  -------  ------------  ----------  ----------\n");
8055
8056         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8057                 offset = i * TPC_QMAN_OFFSET;
8058                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8059                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8060                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8061                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8062                                 IS_TPC_IDLE(tpc_cfg_sts);
8063                 is_idle &= is_eng_idle;
8064
8065                 if (mask && !is_eng_idle)
8066                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8067                 if (e)
8068                         hl_engine_data_sprintf(e, fmt, i,
8069                                 is_eng_idle ? "Y" : "N",
8070                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8071         }
8072
8073         if (e)
8074                 hl_engine_data_sprintf(e,
8075                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8076                         "---  -------  ------------  ----------  -----------\n");
8077
8078         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8079                 offset = i * MME_QMAN_OFFSET;
8080                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8081                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8082
8083                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8084                 is_slave = i % 2;
8085                 if (!is_slave) {
8086                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8087                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8088                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8089                 }
8090
8091                 is_idle &= is_eng_idle;
8092
8093                 if (mask && !is_eng_idle)
8094                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8095                 if (e) {
8096                         if (!is_slave)
8097                                 hl_engine_data_sprintf(e, fmt, i,
8098                                         is_eng_idle ? "Y" : "N",
8099                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8100                         else
8101                                 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8102                                         is_eng_idle ? "Y" : "N", "-",
8103                                         "-", mme_arch_sts);
8104                 }
8105         }
8106
8107         if (e)
8108                 hl_engine_data_sprintf(e,
8109                                 "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8110                                 "---  -------  ------------  ----------\n");
8111
8112         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8113                 offset = i * NIC_MACRO_QMAN_OFFSET;
8114                 port = 2 * i;
8115                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8116                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8117                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8118                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8119                         is_idle &= is_eng_idle;
8120
8121                         if (mask && !is_eng_idle)
8122                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8123                         if (e)
8124                                 hl_engine_data_sprintf(e, nic_fmt, port,
8125                                                 is_eng_idle ? "Y" : "N",
8126                                                 qm_glbl_sts0, qm_cgm_sts);
8127                 }
8128
8129                 port = 2 * i + 1;
8130                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8131                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8132                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8133                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8134                         is_idle &= is_eng_idle;
8135
8136                         if (mask && !is_eng_idle)
8137                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8138                         if (e)
8139                                 hl_engine_data_sprintf(e, nic_fmt, port,
8140                                                 is_eng_idle ? "Y" : "N",
8141                                                 qm_glbl_sts0, qm_cgm_sts);
8142                 }
8143         }
8144
8145         if (e)
8146                 hl_engine_data_sprintf(e, "\n");
8147
8148         return is_idle;
8149 }
8150
8151 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8152         __acquires(&gaudi->hw_queues_lock)
8153 {
8154         struct gaudi_device *gaudi = hdev->asic_specific;
8155
8156         spin_lock(&gaudi->hw_queues_lock);
8157 }
8158
8159 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8160         __releases(&gaudi->hw_queues_lock)
8161 {
8162         struct gaudi_device *gaudi = hdev->asic_specific;
8163
8164         spin_unlock(&gaudi->hw_queues_lock);
8165 }
8166
8167 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8168 {
8169         return hdev->pdev->device;
8170 }
8171
8172 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8173                                 size_t max_size)
8174 {
8175         struct gaudi_device *gaudi = hdev->asic_specific;
8176
8177         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8178                 return 0;
8179
8180         return hl_fw_get_eeprom_data(hdev, data, max_size);
8181 }
8182
8183 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8184 {
8185         struct gaudi_device *gaudi = hdev->asic_specific;
8186
8187         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8188                 return 0;
8189
8190         return hl_fw_get_monitor_dump(hdev, data);
8191 }
8192
8193 /*
8194  * this function should be used only during initialization and/or after reset,
8195  * when there are no active users.
8196  */
8197 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8198 {
8199         u64 kernel_timeout;
8200         u32 status, offset;
8201         int rc;
8202
8203         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8204
8205         if (hdev->pldm)
8206                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8207         else
8208                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8209
8210         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8211                         lower_32_bits(tpc_kernel));
8212         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8213                         upper_32_bits(tpc_kernel));
8214
8215         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8216                         lower_32_bits(tpc_kernel));
8217         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8218                         upper_32_bits(tpc_kernel));
8219         /* set a valid LUT pointer, content is of no significance */
8220         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8221                         lower_32_bits(tpc_kernel));
8222         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8223                         upper_32_bits(tpc_kernel));
8224
8225         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8226                         lower_32_bits(CFG_BASE +
8227                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8228
8229         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8230                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8231                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8232         /* wait a bit for the engine to start executing */
8233         usleep_range(1000, 1500);
8234
8235         /* wait until engine has finished executing */
8236         rc = hl_poll_timeout(
8237                 hdev,
8238                 mmTPC0_CFG_STATUS + offset,
8239                 status,
8240                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8241                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8242                 1000,
8243                 kernel_timeout);
8244
8245         if (rc) {
8246                 dev_err(hdev->dev,
8247                         "Timeout while waiting for TPC%d icache prefetch\n",
8248                         tpc_id);
8249                 return -EIO;
8250         }
8251
8252         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8253                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8254
8255         /* wait a bit for the engine to start executing */
8256         usleep_range(1000, 1500);
8257
8258         /* wait until engine has finished executing */
8259         rc = hl_poll_timeout(
8260                 hdev,
8261                 mmTPC0_CFG_STATUS + offset,
8262                 status,
8263                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8264                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8265                 1000,
8266                 kernel_timeout);
8267
8268         if (rc) {
8269                 dev_err(hdev->dev,
8270                         "Timeout while waiting for TPC%d vector pipe\n",
8271                         tpc_id);
8272                 return -EIO;
8273         }
8274
8275         rc = hl_poll_timeout(
8276                 hdev,
8277                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8278                 status,
8279                 (status == 0),
8280                 1000,
8281                 kernel_timeout);
8282
8283         if (rc) {
8284                 dev_err(hdev->dev,
8285                         "Timeout while waiting for TPC%d kernel to execute\n",
8286                         tpc_id);
8287                 return -EIO;
8288         }
8289
8290         return 0;
8291 }
8292
8293 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8294                 struct hl_ctx *ctx)
8295 {
8296         struct gaudi_device *gaudi = hdev->asic_specific;
8297         int min_alloc_order, rc, collective_cb_size;
8298
8299         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8300                 return 0;
8301
8302         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8303                                                         HOST_SPACE_INTERNAL_CB_SZ,
8304                                                         &hdev->internal_cb_pool_dma_addr,
8305                                                         GFP_KERNEL | __GFP_ZERO);
8306
8307         if (!hdev->internal_cb_pool_virt_addr)
8308                 return -ENOMEM;
8309
8310         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8311                         sizeof(struct packet_fence);
8312         min_alloc_order = ilog2(collective_cb_size);
8313
8314         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8315         if (!hdev->internal_cb_pool) {
8316                 dev_err(hdev->dev,
8317                         "Failed to create internal CB pool\n");
8318                 rc = -ENOMEM;
8319                 goto free_internal_cb_pool;
8320         }
8321
8322         rc = gen_pool_add(hdev->internal_cb_pool,
8323                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8324                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8325         if (rc) {
8326                 dev_err(hdev->dev,
8327                         "Failed to add memory to internal CB pool\n");
8328                 rc = -EFAULT;
8329                 goto destroy_internal_cb_pool;
8330         }
8331
8332         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8333                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8334                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8335
8336         if (!hdev->internal_cb_va_base) {
8337                 rc = -ENOMEM;
8338                 goto destroy_internal_cb_pool;
8339         }
8340
8341         mutex_lock(&hdev->mmu_lock);
8342
8343         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8344                         hdev->internal_cb_pool_dma_addr,
8345                         HOST_SPACE_INTERNAL_CB_SZ);
8346         if (rc)
8347                 goto unreserve_internal_cb_pool;
8348
8349         rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8350         if (rc)
8351                 goto unmap_internal_cb_pool;
8352
8353         mutex_unlock(&hdev->mmu_lock);
8354
8355         return 0;
8356
8357 unmap_internal_cb_pool:
8358         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8359                         HOST_SPACE_INTERNAL_CB_SZ);
8360 unreserve_internal_cb_pool:
8361         mutex_unlock(&hdev->mmu_lock);
8362         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8363                         HOST_SPACE_INTERNAL_CB_SZ);
8364 destroy_internal_cb_pool:
8365         gen_pool_destroy(hdev->internal_cb_pool);
8366 free_internal_cb_pool:
8367         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8368                                         hdev->internal_cb_pool_dma_addr);
8369
8370         return rc;
8371 }
8372
8373 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8374                 struct hl_ctx *ctx)
8375 {
8376         struct gaudi_device *gaudi = hdev->asic_specific;
8377
8378         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8379                 return;
8380
8381         mutex_lock(&hdev->mmu_lock);
8382         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8383                         HOST_SPACE_INTERNAL_CB_SZ);
8384         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8385                         HOST_SPACE_INTERNAL_CB_SZ);
8386         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8387         mutex_unlock(&hdev->mmu_lock);
8388
8389         gen_pool_destroy(hdev->internal_cb_pool);
8390
8391         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8392                                         hdev->internal_cb_pool_dma_addr);
8393 }
8394
8395 static int gaudi_ctx_init(struct hl_ctx *ctx)
8396 {
8397         int rc;
8398
8399         if (ctx->asid == HL_KERNEL_ASID_ID)
8400                 return 0;
8401
8402         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8403         if (rc)
8404                 return rc;
8405
8406         rc = gaudi_restore_user_registers(ctx->hdev);
8407         if (rc)
8408                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8409
8410         return rc;
8411 }
8412
8413 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8414 {
8415         if (ctx->asid == HL_KERNEL_ASID_ID)
8416                 return;
8417
8418         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8419 }
8420
8421 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8422 {
8423         return 0;
8424 }
8425
8426 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8427 {
8428         return gaudi_cq_assignment[cq_idx];
8429 }
8430
8431 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8432 {
8433         return sizeof(struct packet_msg_short) +
8434                         sizeof(struct packet_msg_prot) * 2;
8435 }
8436
8437 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8438 {
8439         return sizeof(struct packet_msg_short) * 4 +
8440                         sizeof(struct packet_fence) +
8441                         sizeof(struct packet_msg_prot) * 2;
8442 }
8443
8444 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8445 {
8446         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8447 }
8448
8449 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8450                                 u32 size, bool eb)
8451 {
8452         struct hl_cb *cb = (struct hl_cb *) data;
8453         struct packet_msg_short *pkt;
8454         u32 value, ctl, pkt_size = sizeof(*pkt);
8455
8456         pkt = cb->kernel_address + size;
8457         memset(pkt, 0, pkt_size);
8458
8459         /* Inc by 1, Mode ADD */
8460         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8461         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8462
8463         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8464         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8465         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8466         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8467         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8468         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8469         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8470
8471         pkt->value = cpu_to_le32(value);
8472         pkt->ctl = cpu_to_le32(ctl);
8473
8474         return size + pkt_size;
8475 }
8476
8477 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8478                                         u16 addr)
8479 {
8480         u32 ctl, pkt_size = sizeof(*pkt);
8481
8482         memset(pkt, 0, pkt_size);
8483
8484         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8485         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8486         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8487         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8488         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8489         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8490
8491         pkt->value = cpu_to_le32(value);
8492         pkt->ctl = cpu_to_le32(ctl);
8493
8494         return pkt_size;
8495 }
8496
8497 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8498                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8499                 u16 sob_val, u16 mon_id)
8500 {
8501         u64 monitor_base;
8502         u32 ctl, value, pkt_size = sizeof(*pkt);
8503         u16 msg_addr_offset;
8504         u8 mask;
8505
8506         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8507                 dev_err(hdev->dev,
8508                         "sob_base %u (mask %#x) is not valid\n",
8509                         sob_base, sob_mask);
8510                 return 0;
8511         }
8512
8513         /*
8514          * monitor_base should be the content of the base0 address registers,
8515          * so it will be added to the msg short offsets
8516          */
8517         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8518
8519         msg_addr_offset =
8520                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8521                                 monitor_base;
8522
8523         memset(pkt, 0, pkt_size);
8524
8525         /* Monitor config packet: bind the monitor to a sync object */
8526         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8527         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8528         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8529                         0); /* GREATER OR EQUAL*/
8530         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8531
8532         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8533         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8534         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8535         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8536         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8537         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8538         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8539
8540         pkt->value = cpu_to_le32(value);
8541         pkt->ctl = cpu_to_le32(ctl);
8542
8543         return pkt_size;
8544 }
8545
8546 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8547 {
8548         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8549
8550         memset(pkt, 0, pkt_size);
8551
8552         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8553         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8554         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8555
8556         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8557         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8558         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8559         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8560
8561         pkt->cfg = cpu_to_le32(cfg);
8562         pkt->ctl = cpu_to_le32(ctl);
8563
8564         return pkt_size;
8565 }
8566
8567 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8568 {
8569         u32 offset, nic_index;
8570
8571         switch (queue_id) {
8572         case GAUDI_QUEUE_ID_DMA_0_0:
8573                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8574                 break;
8575         case GAUDI_QUEUE_ID_DMA_0_1:
8576                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8577                 break;
8578         case GAUDI_QUEUE_ID_DMA_0_2:
8579                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8580                 break;
8581         case GAUDI_QUEUE_ID_DMA_0_3:
8582                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8583                 break;
8584         case GAUDI_QUEUE_ID_DMA_1_0:
8585                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8586                 break;
8587         case GAUDI_QUEUE_ID_DMA_1_1:
8588                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8589                 break;
8590         case GAUDI_QUEUE_ID_DMA_1_2:
8591                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8592                 break;
8593         case GAUDI_QUEUE_ID_DMA_1_3:
8594                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8595                 break;
8596         case GAUDI_QUEUE_ID_DMA_5_0:
8597                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8598                 break;
8599         case GAUDI_QUEUE_ID_DMA_5_1:
8600                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8601                 break;
8602         case GAUDI_QUEUE_ID_DMA_5_2:
8603                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8604                 break;
8605         case GAUDI_QUEUE_ID_DMA_5_3:
8606                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8607                 break;
8608         case GAUDI_QUEUE_ID_TPC_7_0:
8609                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8610                 break;
8611         case GAUDI_QUEUE_ID_TPC_7_1:
8612                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8613                 break;
8614         case GAUDI_QUEUE_ID_TPC_7_2:
8615                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8616                 break;
8617         case GAUDI_QUEUE_ID_TPC_7_3:
8618                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8619                 break;
8620         case GAUDI_QUEUE_ID_NIC_0_0:
8621         case GAUDI_QUEUE_ID_NIC_1_0:
8622         case GAUDI_QUEUE_ID_NIC_2_0:
8623         case GAUDI_QUEUE_ID_NIC_3_0:
8624         case GAUDI_QUEUE_ID_NIC_4_0:
8625         case GAUDI_QUEUE_ID_NIC_5_0:
8626         case GAUDI_QUEUE_ID_NIC_6_0:
8627         case GAUDI_QUEUE_ID_NIC_7_0:
8628         case GAUDI_QUEUE_ID_NIC_8_0:
8629         case GAUDI_QUEUE_ID_NIC_9_0:
8630                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8631                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8632                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8633                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8634                 break;
8635         case GAUDI_QUEUE_ID_NIC_0_1:
8636         case GAUDI_QUEUE_ID_NIC_1_1:
8637         case GAUDI_QUEUE_ID_NIC_2_1:
8638         case GAUDI_QUEUE_ID_NIC_3_1:
8639         case GAUDI_QUEUE_ID_NIC_4_1:
8640         case GAUDI_QUEUE_ID_NIC_5_1:
8641         case GAUDI_QUEUE_ID_NIC_6_1:
8642         case GAUDI_QUEUE_ID_NIC_7_1:
8643         case GAUDI_QUEUE_ID_NIC_8_1:
8644         case GAUDI_QUEUE_ID_NIC_9_1:
8645                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8646                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8647                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8648                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8649                 break;
8650         case GAUDI_QUEUE_ID_NIC_0_2:
8651         case GAUDI_QUEUE_ID_NIC_1_2:
8652         case GAUDI_QUEUE_ID_NIC_2_2:
8653         case GAUDI_QUEUE_ID_NIC_3_2:
8654         case GAUDI_QUEUE_ID_NIC_4_2:
8655         case GAUDI_QUEUE_ID_NIC_5_2:
8656         case GAUDI_QUEUE_ID_NIC_6_2:
8657         case GAUDI_QUEUE_ID_NIC_7_2:
8658         case GAUDI_QUEUE_ID_NIC_8_2:
8659         case GAUDI_QUEUE_ID_NIC_9_2:
8660                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8661                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8662                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8663                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8664                 break;
8665         case GAUDI_QUEUE_ID_NIC_0_3:
8666         case GAUDI_QUEUE_ID_NIC_1_3:
8667         case GAUDI_QUEUE_ID_NIC_2_3:
8668         case GAUDI_QUEUE_ID_NIC_3_3:
8669         case GAUDI_QUEUE_ID_NIC_4_3:
8670         case GAUDI_QUEUE_ID_NIC_5_3:
8671         case GAUDI_QUEUE_ID_NIC_6_3:
8672         case GAUDI_QUEUE_ID_NIC_7_3:
8673         case GAUDI_QUEUE_ID_NIC_8_3:
8674         case GAUDI_QUEUE_ID_NIC_9_3:
8675                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8676                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8677                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8678                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8679                 break;
8680         default:
8681                 return -EINVAL;
8682         }
8683
8684         *addr = CFG_BASE + offset;
8685
8686         return 0;
8687 }
8688
8689 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8690 {
8691         u64 monitor_base;
8692         u32 size = 0;
8693         u16 msg_addr_offset;
8694
8695         /*
8696          * monitor_base should be the content of the base0 address registers,
8697          * so it will be added to the msg short offsets
8698          */
8699         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8700
8701         /* First monitor config packet: low address of the sync */
8702         msg_addr_offset =
8703                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8704                                 monitor_base;
8705
8706         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8707                                         msg_addr_offset);
8708
8709         /* Second monitor config packet: high address of the sync */
8710         msg_addr_offset =
8711                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8712                                 monitor_base;
8713
8714         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8715                                         msg_addr_offset);
8716
8717         /*
8718          * Third monitor config packet: the payload, i.e. what to write when the
8719          * sync triggers
8720          */
8721         msg_addr_offset =
8722                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8723                                 monitor_base;
8724
8725         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8726
8727         return size;
8728 }
8729
8730 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8731                                 struct hl_gen_wait_properties *prop)
8732 {
8733         struct hl_cb *cb = (struct hl_cb *) prop->data;
8734         void *buf = cb->kernel_address;
8735         u64 fence_addr = 0;
8736         u32 size = prop->size;
8737
8738         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8739                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8740                                 prop->q_idx);
8741                 return 0;
8742         }
8743
8744         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8745         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8746                         prop->sob_mask, prop->sob_val, prop->mon_id);
8747         size += gaudi_add_fence_pkt(buf + size);
8748
8749         return size;
8750 }
8751
8752 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8753 {
8754         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8755
8756         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8757                 hw_sob->sob_id);
8758
8759         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8760                         hw_sob->sob_id * 4, 0);
8761
8762         kref_init(&hw_sob->kref);
8763 }
8764
8765 static u64 gaudi_get_device_time(struct hl_device *hdev)
8766 {
8767         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8768
8769         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8770 }
8771
8772 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8773                                 u32 *block_size, u32 *block_id)
8774 {
8775         return -EPERM;
8776 }
8777
8778 static int gaudi_block_mmap(struct hl_device *hdev,
8779                                 struct vm_area_struct *vma,
8780                                 u32 block_id, u32 block_size)
8781 {
8782         return -EPERM;
8783 }
8784
8785 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8786 {
8787         struct cpu_dyn_regs *dyn_regs =
8788                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8789         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8790                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8791                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8792
8793         WREG32(irq_handler_offset,
8794                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8795 }
8796
8797 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8798 {
8799         return -EINVAL;
8800 }
8801
8802 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8803 {
8804         switch (pll_idx) {
8805         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8806         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8807         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8808         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8809         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8810         case HL_GAUDI_MME_PLL: return MME_PLL;
8811         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8812         case HL_GAUDI_IF_PLL: return IF_PLL;
8813         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8814         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8815         default: return -EINVAL;
8816         }
8817 }
8818
8819 static int gaudi_add_sync_to_engine_map_entry(
8820         struct hl_sync_to_engine_map *map, u32 reg_value,
8821         enum hl_sync_engine_type engine_type, u32 engine_id)
8822 {
8823         struct hl_sync_to_engine_map_entry *entry;
8824
8825         /* Reg value represents a partial address of sync object,
8826          * it is used as unique identifier. For this we need to
8827          * clear the cutoff cfg base bits from the value.
8828          */
8829         if (reg_value == 0 || reg_value == 0xffffffff)
8830                 return 0;
8831         reg_value -= lower_32_bits(CFG_BASE);
8832
8833         /* create a new hash entry */
8834         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8835         if (!entry)
8836                 return -ENOMEM;
8837         entry->engine_type = engine_type;
8838         entry->engine_id = engine_id;
8839         entry->sync_id = reg_value;
8840         hash_add(map->tb, &entry->node, reg_value);
8841
8842         return 0;
8843 }
8844
8845 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8846                                 struct hl_sync_to_engine_map *map)
8847 {
8848         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8849         int i, j, rc;
8850         u32 reg_value;
8851
8852         /* Iterate over TPC engines */
8853         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8854
8855                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8856                                         sds->props[SP_NEXT_TPC] * i);
8857
8858                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8859                                                         ENGINE_TPC, i);
8860                 if (rc)
8861                         goto free_sync_to_engine_map;
8862         }
8863
8864         /* Iterate over MME engines */
8865         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8866                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8867
8868                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8869                                                 sds->props[SP_NEXT_MME] * i +
8870                                                 j * sizeof(u32));
8871
8872                         rc = gaudi_add_sync_to_engine_map_entry(
8873                                 map, reg_value, ENGINE_MME,
8874                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8875                         if (rc)
8876                                 goto free_sync_to_engine_map;
8877                 }
8878         }
8879
8880         /* Iterate over DMA engines */
8881         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8882                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8883                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
8884                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8885                                                         ENGINE_DMA, i);
8886                 if (rc)
8887                         goto free_sync_to_engine_map;
8888         }
8889
8890         return 0;
8891
8892 free_sync_to_engine_map:
8893         hl_state_dump_free_sync_to_engine_map(map);
8894
8895         return rc;
8896 }
8897
8898 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8899 {
8900         return FIELD_GET(
8901                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8902                 mon->status);
8903 }
8904
8905 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8906 {
8907         const size_t max_write = 10;
8908         u32 gid, mask, sob;
8909         int i, offset;
8910
8911         /* Sync object ID is calculated as follows:
8912          * (8 * group_id + cleared bits in mask)
8913          */
8914         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8915                         mon->arm_data);
8916         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8917                         mon->arm_data);
8918
8919         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8920                 max_write; mask >>= 1, i++) {
8921                 if (!(mask & 1)) {
8922                         sob = gid * MONITOR_MAX_SOBS + i;
8923
8924                         if (offset > 0)
8925                                 offset += snprintf(sobs + offset, max_write,
8926                                                         ", ");
8927
8928                         offset += snprintf(sobs + offset, max_write, "%u", sob);
8929                 }
8930         }
8931 }
8932
8933 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8934                                 struct hl_device *hdev,
8935                                 struct hl_mon_state_dump *mon)
8936 {
8937         const char *name;
8938         char scratch_buf1[BIN_REG_STRING_SIZE],
8939                 scratch_buf2[BIN_REG_STRING_SIZE];
8940         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8941
8942         name = hl_state_dump_get_monitor_name(hdev, mon);
8943         if (!name)
8944                 name = "";
8945
8946         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8947
8948         return hl_snprintf_resize(
8949                 buf, size, offset,
8950                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8951                 mon->id, name,
8952                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8953                                 mon->arm_data),
8954                 hl_format_as_binary(
8955                         scratch_buf1, sizeof(scratch_buf1),
8956                         FIELD_GET(
8957                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8958                                 mon->arm_data)),
8959                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8960                                 mon->arm_data),
8961                 mon->wr_data,
8962                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8963                 hl_format_as_binary(
8964                         scratch_buf2, sizeof(scratch_buf2),
8965                         FIELD_GET(
8966                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8967                                 mon->status)),
8968                 monitored_sobs);
8969 }
8970
8971
8972 static int gaudi_print_fences_single_engine(
8973         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8974         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8975         size_t *size, size_t *offset)
8976 {
8977         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8978         int rc = -ENOMEM, i;
8979         u32 *statuses, *fences;
8980
8981         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8982                         sizeof(*statuses), GFP_KERNEL);
8983         if (!statuses)
8984                 goto out;
8985
8986         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8987                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8988                          sizeof(*fences), GFP_KERNEL);
8989         if (!fences)
8990                 goto free_status;
8991
8992         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8993                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8994
8995         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8996                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8997                 fences[i] = RREG32(base_offset + i * sizeof(u32));
8998
8999         /* The actual print */
9000         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9001                 u32 fence_id;
9002                 u64 fence_cnt, fence_rdata;
9003                 const char *engine_name;
9004
9005                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9006                         statuses[i]))
9007                         continue;
9008
9009                 fence_id =
9010                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9011                 fence_cnt = base_offset + CFG_BASE +
9012                         sizeof(u32) *
9013                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9014                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9015                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9016                 engine_name = hl_sync_engine_to_string(engine_type);
9017
9018                 rc = hl_snprintf_resize(
9019                         buf, size, offset,
9020                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9021                         engine_name, engine_id,
9022                         i, fence_id,
9023                         fence_cnt, engine_name, engine_id, fence_id, i,
9024                         fence_rdata, engine_name, engine_id, fence_id, i,
9025                         fences[fence_id],
9026                         statuses[i]);
9027                 if (rc)
9028                         goto free_fences;
9029         }
9030
9031         rc = 0;
9032
9033 free_fences:
9034         kfree(fences);
9035 free_status:
9036         kfree(statuses);
9037 out:
9038         return rc;
9039 }
9040
9041
9042 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9043         .monitor_valid = gaudi_monitor_valid,
9044         .print_single_monitor = gaudi_print_single_monitor,
9045         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9046         .print_fences_single_engine = gaudi_print_fences_single_engine,
9047 };
9048
9049 static void gaudi_state_dump_init(struct hl_device *hdev)
9050 {
9051         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9052         int i;
9053
9054         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9055                 hash_add(sds->so_id_to_str_tb,
9056                         &gaudi_so_id_to_str[i].node,
9057                         gaudi_so_id_to_str[i].id);
9058
9059         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9060                 hash_add(sds->monitor_id_to_str_tb,
9061                         &gaudi_monitor_id_to_str[i].node,
9062                         gaudi_monitor_id_to_str[i].id);
9063
9064         sds->props = gaudi_state_dump_specs_props;
9065
9066         sds->sync_namager_names = gaudi_sync_manager_names;
9067
9068         sds->funcs = gaudi_state_dump_funcs;
9069 }
9070
9071 static u32 *gaudi_get_stream_master_qid_arr(void)
9072 {
9073         return gaudi_stream_master;
9074 }
9075
9076 static int gaudi_set_dram_properties(struct hl_device *hdev)
9077 {
9078         return 0;
9079 }
9080
9081 static int gaudi_set_binning_masks(struct hl_device *hdev)
9082 {
9083         return 0;
9084 }
9085
9086 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9087 {
9088 }
9089
9090 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9091 {
9092         struct hl_device *hdev = dev_get_drvdata(dev);
9093         struct cpucp_info *cpucp_info;
9094
9095         cpucp_info = &hdev->asic_prop.cpucp_info;
9096
9097         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9098 }
9099
9100 static DEVICE_ATTR_RO(infineon_ver);
9101
9102 static struct attribute *gaudi_vrm_dev_attrs[] = {
9103         &dev_attr_infineon_ver.attr,
9104         NULL,
9105 };
9106
9107 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9108                                         struct attribute_group *dev_vrm_attr_grp)
9109 {
9110         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9111         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9112 }
9113
9114 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9115 {
9116         return 0;
9117 }
9118
9119 static const struct hl_asic_funcs gaudi_funcs = {
9120         .early_init = gaudi_early_init,
9121         .early_fini = gaudi_early_fini,
9122         .late_init = gaudi_late_init,
9123         .late_fini = gaudi_late_fini,
9124         .sw_init = gaudi_sw_init,
9125         .sw_fini = gaudi_sw_fini,
9126         .hw_init = gaudi_hw_init,
9127         .hw_fini = gaudi_hw_fini,
9128         .halt_engines = gaudi_halt_engines,
9129         .suspend = gaudi_suspend,
9130         .resume = gaudi_resume,
9131         .mmap = gaudi_mmap,
9132         .ring_doorbell = gaudi_ring_doorbell,
9133         .pqe_write = gaudi_pqe_write,
9134         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9135         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9136         .scrub_device_mem = gaudi_scrub_device_mem,
9137         .scrub_device_dram = gaudi_scrub_device_dram,
9138         .get_int_queue_base = gaudi_get_int_queue_base,
9139         .test_queues = gaudi_test_queues,
9140         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9141         .asic_dma_pool_free = gaudi_dma_pool_free,
9142         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9143         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9144         .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9145         .cs_parser = gaudi_cs_parser,
9146         .dma_map_sgtable = hl_asic_dma_map_sgtable,
9147         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9148         .update_eq_ci = gaudi_update_eq_ci,
9149         .context_switch = gaudi_context_switch,
9150         .restore_phase_topology = gaudi_restore_phase_topology,
9151         .debugfs_read_dma = gaudi_debugfs_read_dma,
9152         .add_device_attr = gaudi_add_device_attr,
9153         .handle_eqe = gaudi_handle_eqe,
9154         .get_events_stat = gaudi_get_events_stat,
9155         .read_pte = gaudi_read_pte,
9156         .write_pte = gaudi_write_pte,
9157         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9158         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9159         .mmu_prefetch_cache_range = NULL,
9160         .send_heartbeat = gaudi_send_heartbeat,
9161         .debug_coresight = gaudi_debug_coresight,
9162         .is_device_idle = gaudi_is_device_idle,
9163         .compute_reset_late_init = gaudi_compute_reset_late_init,
9164         .hw_queues_lock = gaudi_hw_queues_lock,
9165         .hw_queues_unlock = gaudi_hw_queues_unlock,
9166         .get_pci_id = gaudi_get_pci_id,
9167         .get_eeprom_data = gaudi_get_eeprom_data,
9168         .get_monitor_dump = gaudi_get_monitor_dump,
9169         .send_cpu_message = gaudi_send_cpu_message,
9170         .pci_bars_map = gaudi_pci_bars_map,
9171         .init_iatu = gaudi_init_iatu,
9172         .rreg = hl_rreg,
9173         .wreg = hl_wreg,
9174         .halt_coresight = gaudi_halt_coresight,
9175         .ctx_init = gaudi_ctx_init,
9176         .ctx_fini = gaudi_ctx_fini,
9177         .pre_schedule_cs = gaudi_pre_schedule_cs,
9178         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9179         .load_firmware_to_device = gaudi_load_firmware_to_device,
9180         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9181         .get_signal_cb_size = gaudi_get_signal_cb_size,
9182         .get_wait_cb_size = gaudi_get_wait_cb_size,
9183         .gen_signal_cb = gaudi_gen_signal_cb,
9184         .gen_wait_cb = gaudi_gen_wait_cb,
9185         .reset_sob = gaudi_reset_sob,
9186         .reset_sob_group = gaudi_reset_sob_group,
9187         .get_device_time = gaudi_get_device_time,
9188         .pb_print_security_errors = NULL,
9189         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9190         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9191         .get_dec_base_addr = NULL,
9192         .scramble_addr = hl_mmu_scramble_addr,
9193         .descramble_addr = hl_mmu_descramble_addr,
9194         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9195         .get_hw_block_id = gaudi_get_hw_block_id,
9196         .hw_block_mmap = gaudi_block_mmap,
9197         .enable_events_from_fw = gaudi_enable_events_from_fw,
9198         .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9199         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9200         .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9201         .init_firmware_loader = gaudi_init_firmware_loader,
9202         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9203         .state_dump_init = gaudi_state_dump_init,
9204         .get_sob_addr = gaudi_get_sob_addr,
9205         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9206         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9207         .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9208         .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9209         .access_dev_mem = hl_access_dev_mem,
9210         .set_dram_bar_base = gaudi_set_hbm_bar_base,
9211         .send_device_activity = gaudi_send_device_activity,
9212         .set_dram_properties = gaudi_set_dram_properties,
9213         .set_binning_masks = gaudi_set_binning_masks,
9214 };
9215
9216 /**
9217  * gaudi_set_asic_funcs - set GAUDI function pointers
9218  *
9219  * @hdev: pointer to hl_device structure
9220  *
9221  */
9222 void gaudi_set_asic_funcs(struct hl_device *hdev)
9223 {
9224         hdev->asic_funcs = &gaudi_funcs;
9225 }