1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2022 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "/*(DEBLOBBED)*/"
63 #define GAUDI_LINUX_FW_FILE "/*(DEBLOBBED)*/"
64 #define GAUDI_TPC_FW_FILE "/*(DEBLOBBED)*/"
68 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
70 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
71 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
75 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
83 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
85 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
87 #define GAUDI_MAX_STRING_LEN 20
89 #define GAUDI_CB_POOL_CB_CNT 512
90 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
92 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
94 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
96 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
98 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
100 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
102 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
104 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
106 #define MONITOR_SOB_STRING_SIZE 256
108 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
109 GAUDI_QUEUE_ID_DMA_0_0,
110 GAUDI_QUEUE_ID_DMA_0_1,
111 GAUDI_QUEUE_ID_DMA_0_2,
112 GAUDI_QUEUE_ID_DMA_0_3,
113 GAUDI_QUEUE_ID_DMA_1_0,
114 GAUDI_QUEUE_ID_DMA_1_1,
115 GAUDI_QUEUE_ID_DMA_1_2,
116 GAUDI_QUEUE_ID_DMA_1_3
119 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
120 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
121 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
122 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
123 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
124 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
125 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
126 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
127 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
131 [0] = GAUDI_QUEUE_ID_DMA_0_0,
132 [1] = GAUDI_QUEUE_ID_DMA_0_1,
133 [2] = GAUDI_QUEUE_ID_DMA_0_2,
134 [3] = GAUDI_QUEUE_ID_DMA_0_3,
135 [4] = GAUDI_QUEUE_ID_DMA_1_0,
136 [5] = GAUDI_QUEUE_ID_DMA_1_1,
137 [6] = GAUDI_QUEUE_ID_DMA_1_2,
138 [7] = GAUDI_QUEUE_ID_DMA_1_3,
141 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
142 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
143 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
144 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
145 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
146 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
147 [PACKET_REPEAT] = sizeof(struct packet_repeat),
148 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
149 [PACKET_FENCE] = sizeof(struct packet_fence),
150 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
151 [PACKET_NOP] = sizeof(struct packet_nop),
152 [PACKET_STOP] = sizeof(struct packet_stop),
153 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
154 [PACKET_WAIT] = sizeof(struct packet_wait),
155 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158 static inline bool validate_packet_id(enum packet_id id)
162 case PACKET_WREG_BULK:
163 case PACKET_MSG_LONG:
164 case PACKET_MSG_SHORT:
167 case PACKET_MSG_PROT:
172 case PACKET_ARB_POINT:
174 case PACKET_LOAD_AND_EXE:
181 static const char * const
182 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
183 "tpc_address_exceed_slm",
185 "tpc_spu_mac_overflow",
186 "tpc_spu_addsub_overflow",
187 "tpc_spu_abs_overflow",
188 "tpc_spu_fp_dst_nan_inf",
189 "tpc_spu_fp_dst_denorm",
190 "tpc_vpu_mac_overflow",
191 "tpc_vpu_addsub_overflow",
192 "tpc_vpu_abs_overflow",
193 "tpc_vpu_fp_dst_nan_inf",
194 "tpc_vpu_fp_dst_denorm",
196 "tpc_illegal_instruction",
197 "tpc_pc_wrap_around",
205 static const char * const
206 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
210 "CP error due to undefined OPCODE",
211 "CP encountered STOP OPCODE",
213 "CP WRREG32 or WRBULK returned error",
215 "FENCE 0 inc over max value and clipped",
216 "FENCE 1 inc over max value and clipped",
217 "FENCE 2 inc over max value and clipped",
218 "FENCE 3 inc over max value and clipped",
219 "FENCE 0 dec under min value and clipped",
220 "FENCE 1 dec under min value and clipped",
221 "FENCE 2 dec under min value and clipped",
222 "FENCE 3 dec under min value and clipped"
225 static const char * const
226 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
227 "Choice push while full error",
228 "Choice Q watchdog error",
229 "MSG AXI LBW returned with error"
232 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
241 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
349 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
350 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
351 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
352 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
353 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
354 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
355 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
356 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
357 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
358 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
359 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
360 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
361 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
362 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
363 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
364 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
365 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
366 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
367 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
368 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
369 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
370 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
371 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
372 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
373 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
374 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
375 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
379 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
380 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
381 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
382 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
383 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
384 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
385 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
386 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
387 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
388 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
389 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 static s64 gaudi_state_dump_specs_props[] = {
393 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
394 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
395 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
396 [SP_MON_OBJ_WR_ADDR_LOW] =
397 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
398 [SP_MON_OBJ_WR_ADDR_HIGH] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
400 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
401 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
402 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
403 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
404 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
405 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
406 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
407 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
408 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
409 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
410 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
411 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
412 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
413 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
414 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
415 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
416 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
417 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
418 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
419 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
420 [SP_FENCE0_CNT_OFFSET] =
421 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
422 [SP_FENCE0_RDATA_OFFSET] =
423 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
424 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
428 static const int gaudi_queue_id_to_engine_id[] = {
429 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
430 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
431 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
432 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
433 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
434 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
435 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
436 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
437 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
438 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
439 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
440 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
441 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
442 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
443 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
444 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
445 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
446 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
447 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
448 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
449 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
450 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
451 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
452 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
453 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
454 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
455 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
456 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
457 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 /* The order here is opposite to the order of the indexing in the h/w.
461 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
463 static const char * const gaudi_sync_manager_names[] = {
471 struct ecc_info_extract_params {
477 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
479 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
480 struct hl_cs_job *job);
481 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
483 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
484 u32 num_regs, u32 val);
485 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
487 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
488 static int gaudi_cpucp_info_get(struct hl_device *hdev);
489 static void gaudi_disable_clock_gating(struct hl_device *hdev);
490 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
491 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
493 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
494 struct hl_gen_wait_properties *prop);
495 static inline enum hl_collective_mode
496 get_collective_mode(struct hl_device *hdev, u32 queue_id)
498 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
499 return HL_COLLECTIVE_MASTER;
501 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
502 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
503 return HL_COLLECTIVE_SLAVE;
505 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
506 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
507 return HL_COLLECTIVE_SLAVE;
509 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
510 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
511 return HL_COLLECTIVE_SLAVE;
513 return HL_COLLECTIVE_NOT_SUPPORTED;
516 static inline void set_default_power_values(struct hl_device *hdev)
518 struct asic_fixed_properties *prop = &hdev->asic_prop;
520 if (hdev->card_type == cpucp_card_type_pmc) {
521 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
523 if (prop->fw_security_enabled)
524 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
526 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
528 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
529 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
533 static int gaudi_set_fixed_properties(struct hl_device *hdev)
535 struct asic_fixed_properties *prop = &hdev->asic_prop;
536 u32 num_sync_stream_queues = 0;
539 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
540 prop->hw_queues_props = kcalloc(prop->max_queues,
541 sizeof(struct hw_queue_properties),
544 if (!prop->hw_queues_props)
547 for (i = 0 ; i < prop->max_queues ; i++) {
548 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
549 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
550 prop->hw_queues_props[i].driver_only = 0;
551 prop->hw_queues_props[i].supports_sync_stream = 1;
552 prop->hw_queues_props[i].cb_alloc_flags =
554 num_sync_stream_queues++;
555 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
556 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
557 prop->hw_queues_props[i].driver_only = 1;
558 prop->hw_queues_props[i].supports_sync_stream = 0;
559 prop->hw_queues_props[i].cb_alloc_flags =
561 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
562 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
563 prop->hw_queues_props[i].driver_only = 0;
564 prop->hw_queues_props[i].supports_sync_stream = 0;
565 prop->hw_queues_props[i].cb_alloc_flags =
569 prop->hw_queues_props[i].collective_mode =
570 get_collective_mode(hdev, i);
573 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
574 prop->cfg_base_address = CFG_BASE;
575 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
576 prop->host_base_address = HOST_PHYS_BASE;
577 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
578 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
579 prop->completion_mode = HL_COMPLETION_MODE_JOB;
580 prop->collective_first_sob = 0;
581 prop->collective_first_mon = 0;
583 /* 2 SOBs per internal queue stream are reserved for collective */
584 prop->sync_stream_first_sob =
585 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
586 * QMAN_STREAMS * HL_RSVD_SOBS;
588 /* 1 monitor per internal queue stream are reserved for collective
589 * 2 monitors per external queue stream are reserved for collective
591 prop->sync_stream_first_mon =
592 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
593 (NUMBER_OF_EXT_HW_QUEUES * 2);
595 prop->dram_base_address = DRAM_PHYS_BASE;
596 prop->dram_size = GAUDI_HBM_SIZE_32GB;
597 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
598 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
600 prop->sram_base_address = SRAM_BASE_ADDR;
601 prop->sram_size = SRAM_SIZE;
602 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
603 prop->sram_user_base_address =
604 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
606 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
607 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
609 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
611 prop->mmu_pgt_size = 0x800000; /* 8MB */
613 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
614 prop->mmu_pte_size = HL_PTE_SIZE;
615 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
616 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
617 prop->dram_page_size = PAGE_SIZE_2MB;
618 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 prop->dram_supports_virtual_memory = false;
621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 prop->pmmu.end_addr =
633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 prop->pmmu.page_size = PAGE_SIZE_4KB;
635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 prop->pmmu.last_mask = LAST_MASK;
637 /* TODO: will be duplicated until implementing per-MMU props */
638 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
639 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
641 /* PMMU and HPMMU are the same except of page size */
642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
645 /* shifts and masks are the same in PMMU and DMMU */
646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 prop->dmmu.page_size = PAGE_SIZE_2MB;
651 prop->cfg_size = CFG_SIZE;
652 prop->max_asid = MAX_ASID;
653 prop->num_of_events = GAUDI_EVENT_SIZE;
654 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
655 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657 set_default_power_values(hdev);
659 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
660 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
663 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
668 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
671 prop->sync_stream_first_sob +
672 (num_sync_stream_queues * HL_RSVD_SOBS);
673 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
674 prop->sync_stream_first_mon +
675 (num_sync_stream_queues * HL_RSVD_MONS);
677 prop->first_available_user_interrupt = USHRT_MAX;
678 prop->tpc_interrupt_id = USHRT_MAX;
681 prop->eq_interrupt_id = 0;
683 for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 prop->first_available_cq[i] = USHRT_MAX;
686 prop->fw_cpu_boot_dev_sts0_valid = false;
687 prop->fw_cpu_boot_dev_sts1_valid = false;
688 prop->hard_reset_done_by_fw = false;
689 prop->gic_interrupts_enable = true;
691 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693 prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696 prop->use_get_power_for_reset_history = true;
698 prop->configurable_stop_on_err = true;
700 prop->set_max_power_on_device_init = true;
704 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
709 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 static const char * const name[] = {"SRAM", "CFG", "HBM"};
712 bool is_wc[3] = {false, false, true};
715 rc = hl_pci_bars_map(hdev, name, is_wc);
719 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
720 (CFG_BASE - SPI_FLASH_BASE_ADDR);
725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 struct gaudi_device *gaudi = hdev->asic_specific;
728 struct hl_inbound_pci_region pci_region;
732 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
735 if (hdev->asic_prop.iatu_done_by_fw)
738 /* Inbound Region 2 - Bar 4 - Point to HBM */
739 pci_region.mode = PCI_BAR_MATCH_MODE;
740 pci_region.bar = HBM_BAR_ID;
741 pci_region.addr = addr;
742 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
747 old_addr = gaudi->hbm_bar_cur_addr;
748 gaudi->hbm_bar_cur_addr = addr;
754 static int gaudi_init_iatu(struct hl_device *hdev)
756 struct hl_inbound_pci_region inbound_region;
757 struct hl_outbound_pci_region outbound_region;
760 if (hdev->asic_prop.iatu_done_by_fw)
763 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
764 inbound_region.mode = PCI_BAR_MATCH_MODE;
765 inbound_region.bar = SRAM_BAR_ID;
766 inbound_region.addr = SRAM_BASE_ADDR;
767 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
771 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
772 inbound_region.mode = PCI_BAR_MATCH_MODE;
773 inbound_region.bar = CFG_BAR_ID;
774 inbound_region.addr = SPI_FLASH_BASE_ADDR;
775 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
779 /* Inbound Region 2 - Bar 4 - Point to HBM */
780 inbound_region.mode = PCI_BAR_MATCH_MODE;
781 inbound_region.bar = HBM_BAR_ID;
782 inbound_region.addr = DRAM_PHYS_BASE;
783 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
787 /* Outbound Region 0 - Point to Host */
788 outbound_region.addr = HOST_PHYS_BASE;
789 outbound_region.size = HOST_PHYS_SIZE;
790 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 return RREG32(mmHW_STATE);
801 static int gaudi_early_init(struct hl_device *hdev)
803 struct asic_fixed_properties *prop = &hdev->asic_prop;
804 struct pci_dev *pdev = hdev->pdev;
805 resource_size_t pci_bar_size;
809 rc = gaudi_set_fixed_properties(hdev);
811 dev_err(hdev->dev, "Failed setting fixed properties\n");
815 /* Check BAR sizes */
816 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818 if (pci_bar_size != SRAM_BAR_SIZE) {
819 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
820 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 goto free_queue_props;
825 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827 if (pci_bar_size != CFG_BAR_SIZE) {
828 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
829 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 goto free_queue_props;
834 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
835 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837 /* If FW security is enabled at this point it means no access to ELBI */
838 if (hdev->asic_prop.fw_security_enabled) {
839 hdev->asic_prop.iatu_done_by_fw = true;
842 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
843 * decision can only be taken based on PCI ID security.
845 hdev->asic_prop.gic_interrupts_enable = false;
849 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
852 goto free_queue_props;
854 /* Check whether FW is configuring iATU */
855 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
856 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
857 hdev->asic_prop.iatu_done_by_fw = true;
860 rc = hl_pci_init(hdev);
862 goto free_queue_props;
864 /* Before continuing in the initialization, we need to read the preboot
865 * version to determine whether we run with a security-enabled firmware
867 rc = hl_fw_read_preboot_status(hdev);
869 if (hdev->reset_on_preboot_fail)
870 /* we are already on failure flow, so don't check if hw_fini fails. */
871 hdev->asic_funcs->hw_fini(hdev, true, false);
875 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
876 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
877 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
889 kfree(hdev->asic_prop.hw_queues_props);
893 static int gaudi_early_fini(struct hl_device *hdev)
895 kfree(hdev->asic_prop.hw_queues_props);
902 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904 * @hdev: pointer to hl_device structure
907 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
910 struct asic_fixed_properties *prop = &hdev->asic_prop;
911 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
914 if ((hdev->fw_components & FW_TYPE_LINUX) &&
915 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
916 struct gaudi_device *gaudi = hdev->asic_specific;
918 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
921 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
926 freq = pll_freq_arr[2];
928 /* Backward compatibility */
929 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
930 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
931 nr = RREG32(mmPSOC_CPU_PLL_NR);
932 nf = RREG32(mmPSOC_CPU_PLL_NF);
933 od = RREG32(mmPSOC_CPU_PLL_OD);
935 if (div_sel == DIV_SEL_REF_CLK ||
936 div_sel == DIV_SEL_DIVIDED_REF) {
937 if (div_sel == DIV_SEL_REF_CLK)
940 freq = PLL_REF_CLK / (div_fctr + 1);
941 } else if (div_sel == DIV_SEL_PLL_CLK ||
942 div_sel == DIV_SEL_DIVIDED_PLL) {
943 pll_clk = PLL_REF_CLK * (nf + 1) /
944 ((nr + 1) * (od + 1));
945 if (div_sel == DIV_SEL_PLL_CLK)
948 freq = pll_clk / (div_fctr + 1);
950 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
955 prop->psoc_timestamp_frequency = freq;
956 prop->psoc_pci_pll_nr = nr;
957 prop->psoc_pci_pll_nf = nf;
958 prop->psoc_pci_pll_od = od;
959 prop->psoc_pci_pll_div_factor = div_fctr;
964 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
965 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 struct asic_fixed_properties *prop = &hdev->asic_prop;
968 struct packet_lin_dma *init_tpc_mem_pkt;
969 struct hl_cs_job *job;
976 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
980 init_tpc_mem_pkt = cb->kernel_address;
981 cb_size = sizeof(*init_tpc_mem_pkt);
982 memset(init_tpc_mem_pkt, 0, cb_size);
984 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
987 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
988 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
996 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
997 round_up(prop->sram_user_base_address, SZ_8K));
998 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 dev_err(hdev->dev, "Failed to allocate a new job\n");
1009 atomic_inc(&job->user_cb->cs_cnt);
1010 job->user_cb_size = cb_size;
1011 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1012 job->patched_cb = job->user_cb;
1013 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015 hl_debugfs_add_job(hdev, job);
1017 rc = gaudi_send_job_on_qman0(hdev, job);
1022 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1023 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1029 hl_userptr_delete_list(hdev, &job->userptr_list);
1030 hl_debugfs_remove_job(hdev, job);
1032 atomic_dec(&cb->cs_cnt);
1036 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1042 * gaudi_init_tpc_mem() - Initialize TPC memories.
1043 * @hdev: Pointer to hl_device structure.
1045 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047 * Return: 0 for success, negative value for error.
1049 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 const struct firmware *fw;
1054 dma_addr_t dma_handle;
1058 rc = reject_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1059 if (rc == -EINTR && count-- > 0) {
1065 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1071 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1074 "Failed to allocate %zu of dma memory for TPC kernel\n",
1080 memcpy(cpu_addr, fw->data, fw_size);
1082 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1087 release_firmware(fw);
1091 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 struct gaudi_device *gaudi = hdev->asic_specific;
1094 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1095 struct hl_hw_queue *q;
1096 u32 i, sob_id, sob_group_id, queue_id;
1098 /* Iterate through SOB groups and assign a SOB for each slave queue */
1100 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1101 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1104 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1105 q = &hdev->kernel_queues[queue_id + (4 * i)];
1106 q->sync_stream_prop.collective_sob_id = sob_id + i;
1109 /* Both DMA5 and TPC7 use the same resources since only a single
1110 * engine need to participate in the reduction process
1112 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1113 q = &hdev->kernel_queues[queue_id];
1114 q->sync_stream_prop.collective_sob_id =
1115 sob_id + NIC_NUMBER_OF_ENGINES;
1117 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1118 q = &hdev->kernel_queues[queue_id];
1119 q->sync_stream_prop.collective_sob_id =
1120 sob_id + NIC_NUMBER_OF_ENGINES;
1123 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 struct gaudi_hw_sob_group *hw_sob_group =
1126 container_of(ref, struct gaudi_hw_sob_group, kref);
1127 struct hl_device *hdev = hw_sob_group->hdev;
1130 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1131 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1132 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134 kref_init(&hw_sob_group->kref);
1137 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 struct gaudi_hw_sob_group *hw_sob_group =
1140 container_of(ref, struct gaudi_hw_sob_group, kref);
1141 struct hl_device *hdev = hw_sob_group->hdev;
1144 "SOB release shouldn't be called here, base_sob_id: %d\n",
1145 hw_sob_group->base_sob_id);
1148 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 struct gaudi_collective_properties *prop;
1153 prop = &gaudi->collective_props;
1155 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1158 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1159 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1160 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1161 /* Set collective engine bit */
1162 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1163 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1166 static int gaudi_collective_init(struct hl_device *hdev)
1168 u32 i, sob_id, reserved_sobs_per_group;
1169 struct gaudi_collective_properties *prop;
1170 struct gaudi_device *gaudi;
1172 gaudi = hdev->asic_specific;
1173 prop = &gaudi->collective_props;
1174 sob_id = hdev->asic_prop.collective_first_sob;
1176 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1177 reserved_sobs_per_group =
1178 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180 /* Init SOB groups */
1181 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1182 prop->hw_sob_group[i].hdev = hdev;
1183 prop->hw_sob_group[i].base_sob_id = sob_id;
1184 sob_id += reserved_sobs_per_group;
1185 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1188 for (i = 0 ; i < QMAN_STREAMS; i++) {
1189 prop->next_sob_group_val[i] = 1;
1190 prop->curr_sob_group_idx[i] = 0;
1191 gaudi_collective_map_sobs(hdev, i);
1194 gaudi_collective_mstr_sob_mask_set(gaudi);
1199 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 struct gaudi_device *gaudi = hdev->asic_specific;
1202 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204 kref_put(&cprop->hw_sob_group[sob_group].kref,
1205 gaudi_sob_group_hw_reset);
1208 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1209 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1212 struct gaudi_collective_properties *cprop;
1213 struct hl_gen_wait_properties wait_prop;
1214 struct hl_sync_stream_properties *prop;
1215 struct gaudi_device *gaudi;
1217 gaudi = hdev->asic_specific;
1218 cprop = &gaudi->collective_props;
1219 queue_id = job->hw_queue_id;
1220 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1223 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1224 master_monitor = prop->collective_mstr_mon_id[0];
1226 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1229 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1230 master_sob_base, cprop->mstr_sob_mask[0],
1231 cprop->next_sob_group_val[stream],
1232 master_monitor, queue_id);
1234 wait_prop.data = (void *) job->patched_cb;
1235 wait_prop.sob_base = master_sob_base;
1236 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1237 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1238 wait_prop.mon_id = master_monitor;
1239 wait_prop.q_idx = queue_id;
1240 wait_prop.size = cb_size;
1241 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1244 master_monitor = prop->collective_mstr_mon_id[1];
1247 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1248 master_sob_base, cprop->mstr_sob_mask[1],
1249 cprop->next_sob_group_val[stream],
1250 master_monitor, queue_id);
1252 wait_prop.sob_base = master_sob_base;
1253 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1254 wait_prop.mon_id = master_monitor;
1255 wait_prop.size = cb_size;
1256 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1259 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1260 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 struct hl_gen_wait_properties wait_prop;
1263 struct hl_sync_stream_properties *prop;
1264 u32 queue_id, cb_size = 0;
1266 queue_id = job->hw_queue_id;
1267 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269 if (job->cs->encaps_signals) {
1270 /* use the encaps signal handle store earlier in the flow
1271 * and set the SOB information from the encaps
1274 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1277 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1279 cs_cmpl->hw_sob->sob_id,
1283 /* Add to wait CBs using slave monitor */
1284 wait_prop.data = (void *) job->user_cb;
1285 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1286 wait_prop.sob_mask = 0x1;
1287 wait_prop.sob_val = cs_cmpl->sob_val;
1288 wait_prop.mon_id = prop->collective_slave_mon_id;
1289 wait_prop.q_idx = queue_id;
1290 wait_prop.size = cb_size;
1293 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1294 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1295 prop->collective_slave_mon_id, queue_id);
1297 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1300 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1301 prop->collective_sob_id, queue_id);
1303 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1304 prop->collective_sob_id, cb_size, false);
1307 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 struct hl_cs_compl *signal_cs_cmpl =
1310 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1311 struct hl_cs_compl *cs_cmpl =
1312 container_of(cs->fence, struct hl_cs_compl, base_fence);
1313 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1314 struct gaudi_collective_properties *cprop;
1315 u32 stream, queue_id, sob_group_offset;
1316 struct gaudi_device *gaudi;
1317 struct hl_device *hdev;
1318 struct hl_cs_job *job;
1323 gaudi = hdev->asic_specific;
1324 cprop = &gaudi->collective_props;
1326 if (cs->encaps_signals) {
1327 cs_cmpl->hw_sob = handle->hw_sob;
1328 /* at this checkpoint we only need the hw_sob pointer
1329 * for the completion check before start going over the jobs
1330 * of the master/slaves, the sob_value will be taken later on
1331 * in gaudi_collective_slave_init_job depends on each
1332 * job wait offset value.
1334 cs_cmpl->sob_val = 0;
1336 /* copy the SOB id and value of the signal CS */
1337 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1338 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1341 /* check again if the signal cs already completed.
1342 * if yes then don't send any wait cs since the hw_sob
1343 * could be in reset already. if signal is not completed
1344 * then get refcount to hw_sob to prevent resetting the sob
1345 * while wait cs is not submitted.
1346 * note that this check is protected by two locks,
1347 * hw queue lock and completion object lock,
1348 * and the same completion object lock also protects
1349 * the hw_sob reset handler function.
1350 * The hw_queue lock prevent out of sync of hw_sob
1351 * refcount value, changed by signal/wait flows.
1353 spin_lock(&signal_cs_cmpl->lock);
1355 if (completion_done(&cs->signal_fence->completion)) {
1356 spin_unlock(&signal_cs_cmpl->lock);
1359 /* Increment kref since all slave queues are now waiting on it */
1360 kref_get(&cs_cmpl->hw_sob->kref);
1362 spin_unlock(&signal_cs_cmpl->lock);
1364 /* Calculate the stream from collective master queue (1st job) */
1365 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1366 stream = job->hw_queue_id % 4;
1368 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370 list_for_each_entry(job, &cs->job_list, cs_node) {
1371 queue_id = job->hw_queue_id;
1373 if (hdev->kernel_queues[queue_id].collective_mode ==
1374 HL_COLLECTIVE_MASTER)
1375 gaudi_collective_master_init_job(hdev, job, stream,
1378 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1381 cs_cmpl->sob_group = sob_group_offset;
1383 /* Handle sob group kref and wraparound */
1384 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1385 cprop->next_sob_group_val[stream]++;
1387 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 * Decrement as we reached the max value.
1390 * The release function won't be called here as we've
1391 * just incremented the refcount.
1393 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1394 gaudi_sob_group_reset_error);
1395 cprop->next_sob_group_val[stream] = 1;
1396 /* only two SOBs are currently in use */
1397 cprop->curr_sob_group_idx[stream] =
1398 (cprop->curr_sob_group_idx[stream] + 1) &
1401 gaudi_collective_map_sobs(hdev, stream);
1403 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1404 cprop->curr_sob_group_idx[stream], stream);
1408 hl_fence_put(cs->signal_fence);
1409 cs->signal_fence = NULL;
1414 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 u32 cacheline_end, additional_commands;
1418 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1419 additional_commands = sizeof(struct packet_msg_prot) * 2;
1421 if (user_cb_size + additional_commands > cacheline_end)
1422 return cacheline_end - user_cb_size + additional_commands;
1424 return additional_commands;
1427 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1428 struct hl_ctx *ctx, struct hl_cs *cs,
1429 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1430 u32 encaps_signal_offset)
1432 struct hw_queue_properties *hw_queue_prop;
1433 struct hl_cs_counters_atomic *cntr;
1434 struct hl_cs_job *job;
1439 cntr = &hdev->aggregated_cs_counters;
1441 if (mode == HL_COLLECTIVE_MASTER) {
1442 /* CB size of collective master queue contains
1443 * 4 msg short packets for monitor 1 configuration
1445 * 4 msg short packets for monitor 2 configuration
1447 * 2 msg prot packets for completion and MSI
1449 cb_size = sizeof(struct packet_msg_short) * 8 +
1450 sizeof(struct packet_fence) * 2 +
1451 sizeof(struct packet_msg_prot) * 2;
1454 /* CB size of collective slave queues contains
1455 * 4 msg short packets for monitor configuration
1457 * 1 additional msg short packet for sob signal
1459 cb_size = sizeof(struct packet_msg_short) * 5 +
1460 sizeof(struct packet_fence);
1464 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1465 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1468 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1469 dev_err(hdev->dev, "Failed to allocate a new job\n");
1473 /* Allocate internal mapped CB for non patched CBs */
1474 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1477 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1485 atomic_inc(&job->user_cb->cs_cnt);
1486 job->user_cb_size = cb_size;
1487 job->hw_queue_id = queue_id;
1489 /* since its guaranteed to have only one chunk in the collective wait
1490 * cs, we can use this chunk to set the encapsulated signal offset
1493 if (cs->encaps_signals)
1494 job->encaps_sig_wait_offset = encaps_signal_offset;
1497 * No need in parsing, user CB is the patched CB.
1498 * We call hl_cb_destroy() out of two reasons - we don't need
1499 * the CB in the CB idr anymore and to decrement its refcount as
1500 * it was incremented inside hl_cb_kernel_create().
1503 job->patched_cb = job->user_cb;
1505 job->patched_cb = NULL;
1507 job->job_cb_size = job->user_cb_size;
1508 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510 /* increment refcount as for external queues we get completion */
1511 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1514 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516 list_add_tail(&job->cs_node, &cs->job_list);
1518 hl_debugfs_add_job(hdev, job);
1523 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1524 struct hl_ctx *ctx, struct hl_cs *cs,
1525 u32 wait_queue_id, u32 collective_engine_id,
1526 u32 encaps_signal_offset)
1528 struct gaudi_device *gaudi = hdev->asic_specific;
1529 struct hw_queue_properties *hw_queue_prop;
1530 u32 queue_id, collective_queue, num_jobs;
1531 u32 stream, nic_queue, nic_idx = 0;
1535 /* Verify wait queue id is configured as master */
1536 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1537 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 "Queue %d is not configured as collective master\n",
1544 /* Verify engine id is supported */
1545 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1546 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 "Collective wait does not support engine %u\n",
1549 collective_engine_id);
1553 stream = wait_queue_id % 4;
1555 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1556 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1561 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563 /* First job goes to the collective master queue, it will wait for
1564 * the collective slave queues to finish execution.
1565 * The synchronization is done using two monitors:
1566 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1567 * reduction engine (DMA5/TPC7).
1569 * Rest of the jobs goes to the collective slave queues which will
1570 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 for (i = 0 ; i < num_jobs ; i++) {
1574 queue_id = wait_queue_id;
1575 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1576 HL_COLLECTIVE_MASTER, queue_id,
1577 wait_queue_id, encaps_signal_offset);
1579 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1580 if (gaudi->hw_cap_initialized &
1581 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1586 queue_id = nic_queue;
1593 queue_id = collective_queue;
1596 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1597 HL_COLLECTIVE_SLAVE, queue_id,
1598 wait_queue_id, encaps_signal_offset);
1608 static int gaudi_late_init(struct hl_device *hdev)
1610 struct gaudi_device *gaudi = hdev->asic_specific;
1613 rc = gaudi->cpucp_info_get(hdev);
1615 dev_err(hdev->dev, "Failed to get cpucp info\n");
1619 if ((hdev->card_type == cpucp_card_type_pci) &&
1620 (hdev->nic_ports_mask & 0x3)) {
1622 "PCI card detected, only 8 ports are enabled\n");
1623 hdev->nic_ports_mask &= ~0x3;
1625 /* Stop and disable unused NIC QMANs */
1626 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1627 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1628 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1631 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1632 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1635 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1640 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1646 /* Scrub both SRAM and DRAM */
1647 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1649 goto disable_pci_access;
1651 rc = gaudi_fetch_psoc_frequency(hdev);
1653 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1654 goto disable_pci_access;
1657 rc = gaudi_mmu_clear_pgt_range(hdev);
1659 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1660 goto disable_pci_access;
1663 rc = gaudi_init_tpc_mem(hdev);
1665 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1666 goto disable_pci_access;
1669 rc = gaudi_collective_init(hdev);
1671 dev_err(hdev->dev, "Failed to init collective\n");
1672 goto disable_pci_access;
1675 /* We only support a single ASID for the user, so for the sake of optimization, just
1676 * initialize the ASID one time during device initialization with the fixed value of 1
1678 gaudi_mmu_prepare(hdev, 1);
1680 hl_fw_set_pll_profile(hdev);
1685 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1690 static void gaudi_late_fini(struct hl_device *hdev)
1692 hl_hwmon_release_resources(hdev);
1695 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1697 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1698 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1702 * The device CPU works with 40-bits addresses, while bit 39 must be set
1703 * to '1' when accessing the host.
1704 * Bits 49:39 of the full host address are saved for a later
1705 * configuration of the HW to perform extension to 50 bits.
1706 * Because there is a single HW register that holds the extension bits,
1707 * these bits must be identical in all allocated range.
1710 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1711 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1713 GFP_KERNEL | __GFP_ZERO);
1714 if (!virt_addr_arr[i]) {
1716 goto free_dma_mem_arr;
1719 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1720 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1721 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1725 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1727 "MSB of CPU accessible DMA memory are not identical in all range\n");
1729 goto free_dma_mem_arr;
1732 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1733 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1734 hdev->cpu_pci_msb_addr =
1735 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1737 if (!hdev->asic_prop.fw_security_enabled)
1738 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1741 for (j = 0 ; j < i ; j++)
1742 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1748 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1750 struct gaudi_device *gaudi = hdev->asic_specific;
1751 struct gaudi_internal_qman_info *q;
1754 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1755 q = &gaudi->internal_qmans[i];
1756 if (!q->pq_kernel_addr)
1758 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1762 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1764 struct gaudi_device *gaudi = hdev->asic_specific;
1765 struct gaudi_internal_qman_info *q;
1768 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1769 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1772 q = &gaudi->internal_qmans[i];
1775 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1776 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1778 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1779 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1781 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1782 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1784 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1785 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1788 dev_err(hdev->dev, "Bad internal queue index %d", i);
1790 goto free_internal_qmans_pq_mem;
1793 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1794 GFP_KERNEL | __GFP_ZERO);
1795 if (!q->pq_kernel_addr) {
1797 goto free_internal_qmans_pq_mem;
1803 free_internal_qmans_pq_mem:
1804 gaudi_free_internal_qmans_pq_mem(hdev);
1808 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1810 struct asic_fixed_properties *prop = &hdev->asic_prop;
1811 struct pci_mem_region *region;
1814 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1815 region->region_base = CFG_BASE;
1816 region->region_size = CFG_SIZE;
1817 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1818 region->bar_size = CFG_BAR_SIZE;
1819 region->bar_id = CFG_BAR_ID;
1823 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1824 region->region_base = SRAM_BASE_ADDR;
1825 region->region_size = SRAM_SIZE;
1826 region->offset_in_bar = 0;
1827 region->bar_size = SRAM_BAR_SIZE;
1828 region->bar_id = SRAM_BAR_ID;
1832 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1833 region->region_base = DRAM_PHYS_BASE;
1834 region->region_size = hdev->asic_prop.dram_size;
1835 region->offset_in_bar = 0;
1836 region->bar_size = prop->dram_pci_bar_size;
1837 region->bar_id = HBM_BAR_ID;
1841 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1842 region->region_base = PSOC_SCRATCHPAD_ADDR;
1843 region->region_size = PSOC_SCRATCHPAD_SIZE;
1844 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1845 region->bar_size = CFG_BAR_SIZE;
1846 region->bar_id = CFG_BAR_ID;
1850 static int gaudi_sw_init(struct hl_device *hdev)
1852 struct gaudi_device *gaudi;
1853 u32 i, event_id = 0;
1856 /* Allocate device structure */
1857 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1861 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1862 if (gaudi_irq_map_table[i].valid) {
1863 if (event_id == GAUDI_EVENT_SIZE) {
1865 "Event array exceeds the limit of %u events\n",
1868 goto free_gaudi_device;
1871 gaudi->events[event_id++] =
1872 gaudi_irq_map_table[i].fc_id;
1876 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1878 hdev->asic_specific = gaudi;
1880 /* Create DMA pool for small allocations */
1881 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1882 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1883 if (!hdev->dma_pool) {
1884 dev_err(hdev->dev, "failed to create DMA pool\n");
1886 goto free_gaudi_device;
1889 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1893 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1894 if (!hdev->cpu_accessible_dma_pool) {
1896 "Failed to create CPU accessible DMA pool\n");
1898 goto free_cpu_dma_mem;
1901 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1902 (uintptr_t) hdev->cpu_accessible_dma_mem,
1903 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1906 "Failed to add memory to CPU accessible DMA pool\n");
1908 goto free_cpu_accessible_dma_pool;
1911 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1913 goto free_cpu_accessible_dma_pool;
1915 spin_lock_init(&gaudi->hw_queues_lock);
1917 hdev->supports_sync_stream = true;
1918 hdev->supports_coresight = true;
1919 hdev->supports_staged_submission = true;
1920 hdev->supports_wait_for_multi_cs = true;
1922 hdev->asic_funcs->set_pci_memory_regions(hdev);
1923 hdev->stream_master_qid_arr =
1924 hdev->asic_funcs->get_stream_master_qid_arr();
1925 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1929 free_cpu_accessible_dma_pool:
1930 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1932 if (!hdev->asic_prop.fw_security_enabled)
1933 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1934 hdev->cpu_pci_msb_addr);
1935 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1936 hdev->cpu_accessible_dma_address);
1938 dma_pool_destroy(hdev->dma_pool);
1944 static int gaudi_sw_fini(struct hl_device *hdev)
1946 struct gaudi_device *gaudi = hdev->asic_specific;
1948 gaudi_free_internal_qmans_pq_mem(hdev);
1950 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1952 if (!hdev->asic_prop.fw_security_enabled)
1953 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1954 hdev->cpu_pci_msb_addr);
1956 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1957 hdev->cpu_accessible_dma_address);
1959 dma_pool_destroy(hdev->dma_pool);
1966 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1968 struct hl_device *hdev = arg;
1974 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1975 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1977 hl_irq_handler_eq(irq, &hdev->event_queue);
1983 * For backward compatibility, new MSI interrupts should be set after the
1984 * existing CPU and NIC interrupts.
1986 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1991 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1992 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1993 GAUDI_EVENT_QUEUE_MSI_IDX);
1995 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1996 (nr + NIC_NUMBER_OF_ENGINES + 1);
1998 return pci_irq_vector(hdev->pdev, msi_vec);
2001 static int gaudi_enable_msi_single(struct hl_device *hdev)
2005 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2007 irq = gaudi_pci_irq_vector(hdev, 0, false);
2008 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2009 "gaudi single msi", hdev);
2012 "Failed to request single MSI IRQ\n");
2017 static int gaudi_enable_msi(struct hl_device *hdev)
2019 struct gaudi_device *gaudi = hdev->asic_specific;
2022 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2025 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2027 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031 rc = gaudi_enable_msi_single(hdev);
2033 goto free_pci_irq_vectors;
2035 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2039 free_pci_irq_vectors:
2040 pci_free_irq_vectors(hdev->pdev);
2044 static void gaudi_sync_irqs(struct hl_device *hdev)
2046 struct gaudi_device *gaudi = hdev->asic_specific;
2048 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2051 /* Wait for all pending IRQs to be finished */
2052 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2055 static void gaudi_disable_msi(struct hl_device *hdev)
2057 struct gaudi_device *gaudi = hdev->asic_specific;
2059 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2062 gaudi_sync_irqs(hdev);
2063 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2064 pci_free_irq_vectors(hdev->pdev);
2066 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2069 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2071 struct gaudi_device *gaudi = hdev->asic_specific;
2073 if (hdev->asic_prop.fw_security_enabled)
2076 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2077 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2080 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2083 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2084 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2086 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2088 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2098 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2101 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2103 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2105 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2115 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2117 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2118 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2120 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2122 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2132 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2134 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2137 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2139 struct gaudi_device *gaudi = hdev->asic_specific;
2141 if (hdev->asic_prop.fw_security_enabled)
2144 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2145 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2148 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2151 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2154 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2166 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2169 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2171 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2173 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2183 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2185 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2186 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2188 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2190 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2200 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2202 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2205 static void gaudi_init_e2e(struct hl_device *hdev)
2207 if (hdev->asic_prop.fw_security_enabled)
2210 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2211 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2214 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2216 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2217 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2219 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2221 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2222 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2224 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2226 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2227 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2229 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2231 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2232 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2234 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2236 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2237 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2239 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2241 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2242 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2244 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2246 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2247 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2249 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2251 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2252 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2254 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2256 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2257 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2259 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2264 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2269 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2274 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2279 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2284 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2289 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2291 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2292 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2297 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2302 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2307 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2312 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2317 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2322 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2327 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2332 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2334 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2335 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2336 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2337 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2340 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2341 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2342 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2345 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2346 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2347 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2350 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2351 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2352 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2355 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2356 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2357 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2360 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2361 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2362 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2365 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2366 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2367 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2370 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2371 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2372 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2375 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2376 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2377 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2380 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2381 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2382 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2385 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2386 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2387 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2390 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2391 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2392 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2395 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2396 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2397 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2400 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2401 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2402 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2405 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2406 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2407 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2410 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2411 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2412 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2415 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2416 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2417 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2420 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2421 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2422 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2425 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2426 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2427 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2430 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2431 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2432 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2435 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2436 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2437 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2440 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2441 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2442 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2445 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2446 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2447 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2450 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2451 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2452 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2455 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2457 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2459 if (hdev->asic_prop.fw_security_enabled)
2462 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2463 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2466 hbm0_wr = 0x33333333;
2467 hbm0_rd = 0x77777777;
2468 hbm1_wr = 0x55555555;
2469 hbm1_rd = 0xDDDDDDDD;
2471 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2472 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2473 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2474 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2476 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2477 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2478 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2479 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2481 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2482 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2483 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2484 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2486 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2487 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2488 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2489 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2491 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2492 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2493 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2494 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2495 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2496 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2497 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2498 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2499 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2500 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2501 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2504 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2505 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2506 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2507 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2508 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2509 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2510 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2511 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2512 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2513 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2514 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2515 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2518 static void gaudi_init_golden_registers(struct hl_device *hdev)
2523 gaudi_init_e2e(hdev);
2524 gaudi_init_hbm_cred(hdev);
2526 for (tpc_id = 0, tpc_offset = 0;
2527 tpc_id < TPC_NUMBER_OF_ENGINES;
2528 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2529 /* Mask all arithmetic interrupts from TPC */
2530 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2531 /* Set 16 cache lines */
2532 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2533 ICACHE_FETCH_LINE_NUM, 2);
2536 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2537 for (i = 0 ; i < 128 ; i += 8)
2538 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2540 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2546 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2547 int qman_id, dma_addr_t qman_pq_addr)
2549 struct cpu_dyn_regs *dyn_regs =
2550 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2551 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2552 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2553 u32 q_off, dma_qm_offset;
2554 u32 dma_qm_err_cfg, irq_handler_offset;
2556 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2558 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2559 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2562 so_base_en_lo = lower_32_bits(CFG_BASE +
2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564 so_base_en_hi = upper_32_bits(CFG_BASE +
2565 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2566 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2567 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2570 so_base_ws_lo = lower_32_bits(CFG_BASE +
2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572 so_base_ws_hi = upper_32_bits(CFG_BASE +
2573 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2575 q_off = dma_qm_offset + qman_id * 4;
2577 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2578 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2580 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2581 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2582 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2584 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2585 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2586 QMAN_LDMA_SRC_OFFSET);
2587 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2588 QMAN_LDMA_DST_OFFSET);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2597 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2599 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2601 /* The following configuration is needed only once per QMAN */
2603 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2604 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2605 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2607 /* Configure RAZWI IRQ */
2608 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2609 if (hdev->stop_on_err)
2611 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2613 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2615 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2616 lower_32_bits(CFG_BASE + irq_handler_offset));
2617 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2618 upper_32_bits(CFG_BASE + irq_handler_offset));
2620 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2621 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2624 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2625 QM_ARB_ERR_MSG_EN_MASK);
2627 /* Set timeout to maximum */
2628 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2630 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2631 QMAN_EXTERNAL_MAKE_TRUSTED);
2633 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2637 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2639 struct cpu_dyn_regs *dyn_regs =
2640 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2641 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2642 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2643 u32 irq_handler_offset;
2645 /* Set to maximum possible according to physical size */
2646 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2647 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2649 /* WA for H/W bug H3-2116 */
2650 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2652 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2653 if (hdev->stop_on_err)
2654 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2656 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2658 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2659 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2660 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2662 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2663 lower_32_bits(CFG_BASE + irq_handler_offset));
2664 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2665 upper_32_bits(CFG_BASE + irq_handler_offset));
2667 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2668 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2669 WREG32(mmDMA0_CORE_PROT + dma_offset,
2670 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2671 /* If the channel is secured, it should be in MMU bypass mode */
2672 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2673 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2674 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2677 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2680 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2682 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2685 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2687 struct gaudi_device *gaudi = hdev->asic_specific;
2688 struct hl_hw_queue *q;
2689 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2691 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2694 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2695 dma_id = gaudi_dma_assignment[i];
2697 * For queues after the CPU Q need to add 1 to get the correct
2698 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2699 * order to get the correct MSI register.
2703 nic_skip = NIC_NUMBER_OF_ENGINES;
2709 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2710 q_idx = 4 * dma_id + j + cpu_skip;
2711 q = &hdev->kernel_queues[q_idx];
2713 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2714 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2718 gaudi_init_dma_core(hdev, dma_id);
2720 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2723 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2726 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2727 int qman_id, u64 qman_base_addr)
2729 struct cpu_dyn_regs *dyn_regs =
2730 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2731 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2732 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2733 u32 dma_qm_err_cfg, irq_handler_offset;
2734 u32 q_off, dma_qm_offset;
2736 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2739 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2742 so_base_en_lo = lower_32_bits(CFG_BASE +
2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744 so_base_en_hi = upper_32_bits(CFG_BASE +
2745 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2746 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2747 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2750 so_base_ws_lo = lower_32_bits(CFG_BASE +
2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752 so_base_ws_hi = upper_32_bits(CFG_BASE +
2753 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755 q_off = dma_qm_offset + qman_id * 4;
2758 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2759 lower_32_bits(qman_base_addr));
2760 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2761 upper_32_bits(qman_base_addr));
2763 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2764 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2765 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2767 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2768 QMAN_CPDMA_SIZE_OFFSET);
2769 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2770 QMAN_CPDMA_SRC_OFFSET);
2771 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2772 QMAN_CPDMA_DST_OFFSET);
2774 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2775 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2776 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2778 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2779 QMAN_LDMA_SIZE_OFFSET);
2780 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2781 QMAN_LDMA_SRC_OFFSET);
2782 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2783 QMAN_LDMA_DST_OFFSET);
2785 /* Configure RAZWI IRQ */
2786 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2787 if (hdev->stop_on_err)
2789 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2791 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2793 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2794 lower_32_bits(CFG_BASE + irq_handler_offset));
2795 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2796 upper_32_bits(CFG_BASE + irq_handler_offset));
2798 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2799 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2802 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2803 QM_ARB_ERR_MSG_EN_MASK);
2805 /* Set timeout to maximum */
2806 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2808 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2809 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2810 QMAN_INTERNAL_MAKE_TRUSTED);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2816 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2818 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2819 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2820 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2822 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2824 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2826 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2831 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2833 struct gaudi_device *gaudi = hdev->asic_specific;
2834 struct gaudi_internal_qman_info *q;
2836 int i, j, dma_id, internal_q_index;
2838 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2841 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2842 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2844 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2846 * Add the CPU queue in order to get the correct queue
2847 * number as all internal queue are placed after it
2849 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2851 q = &gaudi->internal_qmans[internal_q_index];
2852 qman_base_addr = (u64) q->pq_dma_addr;
2853 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2857 /* Initializing lower CP for HBM DMA QMAN */
2858 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2860 gaudi_init_dma_core(hdev, dma_id);
2862 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2865 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2868 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2869 int qman_id, u64 qman_base_addr)
2871 struct cpu_dyn_regs *dyn_regs =
2872 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2873 u32 mtr_base_lo, mtr_base_hi;
2874 u32 so_base_lo, so_base_hi;
2875 u32 irq_handler_offset;
2879 mtr_base_lo = lower_32_bits(CFG_BASE +
2880 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 mtr_base_hi = upper_32_bits(CFG_BASE +
2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883 so_base_lo = lower_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885 so_base_hi = upper_32_bits(CFG_BASE +
2886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2888 q_off = mme_offset + qman_id * 4;
2891 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2892 lower_32_bits(qman_base_addr));
2893 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2894 upper_32_bits(qman_base_addr));
2896 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2897 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2898 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2900 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2901 QMAN_CPDMA_SIZE_OFFSET);
2902 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2903 QMAN_CPDMA_SRC_OFFSET);
2904 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2905 QMAN_CPDMA_DST_OFFSET);
2907 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2908 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2909 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2911 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2912 QMAN_LDMA_SIZE_OFFSET);
2913 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2914 QMAN_LDMA_SRC_OFFSET);
2915 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2916 QMAN_LDMA_DST_OFFSET);
2918 /* Configure RAZWI IRQ */
2919 mme_id = mme_offset /
2920 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2922 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2923 if (hdev->stop_on_err)
2925 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2927 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2929 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2930 lower_32_bits(CFG_BASE + irq_handler_offset));
2931 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2932 upper_32_bits(CFG_BASE + irq_handler_offset));
2934 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2935 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2938 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2939 QM_ARB_ERR_MSG_EN_MASK);
2941 /* Set timeout to maximum */
2942 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2944 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2945 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2946 QMAN_INTERNAL_MAKE_TRUSTED);
2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2950 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2952 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2955 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2957 struct gaudi_device *gaudi = hdev->asic_specific;
2958 struct gaudi_internal_qman_info *q;
2961 int i, internal_q_index;
2963 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2967 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2968 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2971 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2973 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2974 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2975 q = &gaudi->internal_qmans[internal_q_index];
2976 qman_base_addr = (u64) q->pq_dma_addr;
2977 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2983 /* Initializing lower CP for MME QMANs */
2984 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2985 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2986 gaudi_init_mme_qman(hdev, 0, 4, 0);
2988 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991 gaudi->hw_cap_initialized |= HW_CAP_MME;
2994 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2995 int qman_id, u64 qman_base_addr)
2997 struct cpu_dyn_regs *dyn_regs =
2998 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2999 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3000 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3001 u32 tpc_qm_err_cfg, irq_handler_offset;
3004 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3005 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3007 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3008 so_base_en_lo = lower_32_bits(CFG_BASE +
3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010 so_base_en_hi = upper_32_bits(CFG_BASE +
3011 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3012 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3013 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3015 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3016 so_base_ws_lo = lower_32_bits(CFG_BASE +
3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018 so_base_ws_hi = upper_32_bits(CFG_BASE +
3019 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3021 q_off = tpc_offset + qman_id * 4;
3023 tpc_id = tpc_offset /
3024 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3027 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3028 lower_32_bits(qman_base_addr));
3029 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3030 upper_32_bits(qman_base_addr));
3032 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3033 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3034 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3036 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3037 QMAN_CPDMA_SIZE_OFFSET);
3038 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3039 QMAN_CPDMA_SRC_OFFSET);
3040 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3041 QMAN_CPDMA_DST_OFFSET);
3043 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3044 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3045 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3047 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3048 QMAN_LDMA_SIZE_OFFSET);
3049 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3050 QMAN_LDMA_SRC_OFFSET);
3051 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3052 QMAN_LDMA_DST_OFFSET);
3054 /* Configure RAZWI IRQ */
3055 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3056 if (hdev->stop_on_err)
3058 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3060 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3062 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3063 lower_32_bits(CFG_BASE + irq_handler_offset));
3064 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3065 upper_32_bits(CFG_BASE + irq_handler_offset));
3067 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3068 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3071 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3072 QM_ARB_ERR_MSG_EN_MASK);
3074 /* Set timeout to maximum */
3075 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3077 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3078 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3079 QMAN_INTERNAL_MAKE_TRUSTED);
3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3085 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3087 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3089 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3091 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3093 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3095 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3100 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3102 struct gaudi_device *gaudi = hdev->asic_specific;
3103 struct gaudi_internal_qman_info *q;
3105 u32 so_base_hi, tpc_offset = 0;
3106 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3107 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3108 int i, tpc_id, internal_q_index;
3110 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3113 so_base_hi = upper_32_bits(CFG_BASE +
3114 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3116 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3117 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3118 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3119 tpc_id * QMAN_STREAMS + i;
3120 q = &gaudi->internal_qmans[internal_q_index];
3121 qman_base_addr = (u64) q->pq_dma_addr;
3122 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3126 /* Initializing lower CP for TPC QMAN */
3127 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3129 /* Enable the QMAN and TPC channel */
3130 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3135 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3138 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3140 gaudi->hw_cap_initialized |=
3141 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3145 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3146 int qman_id, u64 qman_base_addr, int nic_id)
3148 struct cpu_dyn_regs *dyn_regs =
3149 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3150 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3151 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3152 u32 nic_qm_err_cfg, irq_handler_offset;
3155 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3156 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161 so_base_en_hi = upper_32_bits(CFG_BASE +
3162 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3164 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169 so_base_ws_hi = upper_32_bits(CFG_BASE +
3170 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172 q_off = nic_offset + qman_id * 4;
3174 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3175 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3177 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3178 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3179 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3181 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182 QMAN_LDMA_SIZE_OFFSET);
3183 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184 QMAN_LDMA_SRC_OFFSET);
3185 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186 QMAN_LDMA_DST_OFFSET);
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3191 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3193 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3197 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3200 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3201 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3202 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3204 /* Configure RAZWI IRQ */
3205 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3206 if (hdev->stop_on_err)
3208 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3210 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3212 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3213 lower_32_bits(CFG_BASE + irq_handler_offset));
3214 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3215 upper_32_bits(CFG_BASE + irq_handler_offset));
3217 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3218 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3221 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3222 QM_ARB_ERR_MSG_EN_MASK);
3224 /* Set timeout to maximum */
3225 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3227 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3228 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3229 QMAN_INTERNAL_MAKE_TRUSTED);
3233 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235 struct gaudi_device *gaudi = hdev->asic_specific;
3236 struct gaudi_internal_qman_info *q;
3239 u32 nic_delta_between_qmans =
3240 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241 u32 nic_delta_between_nics =
3242 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3243 int i, nic_id, internal_q_index;
3245 if (!hdev->nic_ports_mask)
3248 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3251 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3254 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3255 nic_offset += nic_delta_between_qmans;
3257 nic_offset -= (nic_delta_between_qmans * 2);
3258 nic_offset += nic_delta_between_nics;
3263 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3264 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3265 nic_id * QMAN_STREAMS + i;
3266 q = &gaudi->internal_qmans[internal_q_index];
3267 qman_base_addr = (u64) q->pq_dma_addr;
3268 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3269 qman_base_addr, nic_id);
3272 /* Enable the QMAN */
3273 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275 nic_offset += nic_delta_between_qmans;
3277 nic_offset -= (nic_delta_between_qmans * 2);
3278 nic_offset += nic_delta_between_nics;
3281 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3285 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287 struct gaudi_device *gaudi = hdev->asic_specific;
3289 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3292 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3293 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3294 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3297 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299 struct gaudi_device *gaudi = hdev->asic_specific;
3301 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3304 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3305 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3307 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3308 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3311 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313 struct gaudi_device *gaudi = hdev->asic_specific;
3315 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3318 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3319 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3322 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324 struct gaudi_device *gaudi = hdev->asic_specific;
3328 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3331 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3332 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3333 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3337 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339 struct gaudi_device *gaudi = hdev->asic_specific;
3340 u32 nic_mask, nic_offset = 0;
3341 u32 nic_delta_between_qmans =
3342 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343 u32 nic_delta_between_nics =
3344 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3347 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3348 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350 if (gaudi->hw_cap_initialized & nic_mask)
3351 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353 nic_offset += nic_delta_between_qmans;
3355 nic_offset -= (nic_delta_between_qmans * 2);
3356 nic_offset += nic_delta_between_nics;
3361 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363 struct gaudi_device *gaudi = hdev->asic_specific;
3365 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3368 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3369 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3374 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376 struct gaudi_device *gaudi = hdev->asic_specific;
3378 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3381 /* Stop CPs of HBM DMA QMANs */
3383 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3390 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392 struct gaudi_device *gaudi = hdev->asic_specific;
3394 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3397 /* Stop CPs of MME QMANs */
3398 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3402 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404 struct gaudi_device *gaudi = hdev->asic_specific;
3406 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3409 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3419 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421 struct gaudi_device *gaudi = hdev->asic_specific;
3423 /* Stop upper CPs of QMANs */
3425 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3426 WREG32(mmNIC0_QM0_GLBL_CFG1,
3427 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3428 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3429 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3432 WREG32(mmNIC0_QM1_GLBL_CFG1,
3433 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3434 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3435 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3438 WREG32(mmNIC1_QM0_GLBL_CFG1,
3439 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3440 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3441 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3444 WREG32(mmNIC1_QM1_GLBL_CFG1,
3445 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3446 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3447 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3450 WREG32(mmNIC2_QM0_GLBL_CFG1,
3451 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3452 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3453 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3456 WREG32(mmNIC2_QM1_GLBL_CFG1,
3457 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3458 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3459 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3462 WREG32(mmNIC3_QM0_GLBL_CFG1,
3463 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3464 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3465 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3468 WREG32(mmNIC3_QM1_GLBL_CFG1,
3469 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3470 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3471 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3474 WREG32(mmNIC4_QM0_GLBL_CFG1,
3475 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3476 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3477 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3480 WREG32(mmNIC4_QM1_GLBL_CFG1,
3481 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3482 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3483 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3486 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488 struct gaudi_device *gaudi = hdev->asic_specific;
3490 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3493 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3498 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500 struct gaudi_device *gaudi = hdev->asic_specific;
3502 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3505 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3512 static void gaudi_mme_stall(struct hl_device *hdev)
3514 struct gaudi_device *gaudi = hdev->asic_specific;
3516 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3519 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3520 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3538 static void gaudi_tpc_stall(struct hl_device *hdev)
3540 struct gaudi_device *gaudi = hdev->asic_specific;
3542 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3545 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3555 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3560 if (hdev->asic_prop.fw_security_enabled)
3563 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3564 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3565 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3567 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3570 WREG32(mmMME0_QM_CGM_CFG, 0);
3571 WREG32(mmMME0_QM_CGM_CFG1, 0);
3572 WREG32(mmMME2_QM_CGM_CFG, 0);
3573 WREG32(mmMME2_QM_CGM_CFG1, 0);
3575 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3576 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3577 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3579 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3583 static void gaudi_enable_timestamp(struct hl_device *hdev)
3585 /* Disable the timestamp counter */
3586 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3588 /* Zero the lower/upper parts of the 64-bit counter */
3589 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3590 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3592 /* Enable the counter */
3593 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3596 static void gaudi_disable_timestamp(struct hl_device *hdev)
3598 /* Disable the timestamp counter */
3599 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3602 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3604 u32 wait_timeout_ms;
3607 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3609 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3614 gaudi_stop_nic_qmans(hdev);
3615 gaudi_stop_mme_qmans(hdev);
3616 gaudi_stop_tpc_qmans(hdev);
3617 gaudi_stop_hbm_dma_qmans(hdev);
3618 gaudi_stop_pci_dma_qmans(hdev);
3620 msleep(wait_timeout_ms);
3622 gaudi_pci_dma_stall(hdev);
3623 gaudi_hbm_dma_stall(hdev);
3624 gaudi_tpc_stall(hdev);
3625 gaudi_mme_stall(hdev);
3627 msleep(wait_timeout_ms);
3629 gaudi_disable_nic_qmans(hdev);
3630 gaudi_disable_mme_qmans(hdev);
3631 gaudi_disable_tpc_qmans(hdev);
3632 gaudi_disable_hbm_dma_qmans(hdev);
3633 gaudi_disable_pci_dma_qmans(hdev);
3635 gaudi_disable_timestamp(hdev);
3638 gaudi_disable_msi(hdev);
3641 static int gaudi_mmu_init(struct hl_device *hdev)
3643 struct asic_fixed_properties *prop = &hdev->asic_prop;
3644 struct gaudi_device *gaudi = hdev->asic_specific;
3648 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3651 for (i = 0 ; i < prop->max_asid ; i++) {
3652 hop0_addr = prop->mmu_pgt_addr +
3653 (i * prop->mmu_hop_table_size);
3655 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3658 "failed to set hop0 addr for asid %d\n", i);
3663 /* init MMU cache manage page */
3664 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3665 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3667 /* mem cache invalidation */
3668 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3670 rc = hl_mmu_invalidate_cache(hdev, true, 0);
3674 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3675 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3677 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3680 * The H/W expects the first PI after init to be 1. After wraparound
3683 gaudi->mmu_cache_inv_pi = 1;
3685 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3690 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3694 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3696 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3699 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3703 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3705 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3708 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3710 struct dynamic_fw_load_mgr *dynamic_loader;
3711 struct cpu_dyn_regs *dyn_regs;
3713 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3716 * here we update initial values for few specific dynamic regs (as
3717 * before reading the first descriptor from FW those value has to be
3718 * hard-coded) in later stages of the protocol those values will be
3719 * updated automatically by reading the FW descriptor so data there
3720 * will always be up-to-date
3722 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3723 dyn_regs->kmd_msg_to_cpu =
3724 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3725 dyn_regs->cpu_cmd_status_to_host =
3726 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3728 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3731 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3733 struct static_fw_load_mgr *static_loader;
3735 static_loader = &hdev->fw_loader.static_loader;
3737 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3739 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3740 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3741 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3742 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3743 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3744 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3745 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3746 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3747 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3748 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3749 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3750 GAUDI_PLDM_RESET_WAIT_MSEC :
3751 GAUDI_CPU_RESET_WAIT_MSEC;
3754 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3756 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3758 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3759 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3760 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3761 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3762 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3763 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3766 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3768 struct asic_fixed_properties *prop = &hdev->asic_prop;
3769 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3771 /* fill common fields */
3772 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3773 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3774 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3775 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3776 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3777 fw_loader->skip_bmc = !hdev->bmc_enable;
3778 fw_loader->sram_bar_id = SRAM_BAR_ID;
3779 fw_loader->dram_bar_id = HBM_BAR_ID;
3781 if (prop->dynamic_fw_load)
3782 gaudi_init_dynamic_firmware_loader(hdev);
3784 gaudi_init_static_firmware_loader(hdev);
3787 static int gaudi_init_cpu(struct hl_device *hdev)
3789 struct gaudi_device *gaudi = hdev->asic_specific;
3792 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3795 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3799 * The device CPU works with 40 bits addresses.
3800 * This register sets the extension to 50 bits.
3802 if (!hdev->asic_prop.fw_security_enabled)
3803 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3805 rc = hl_fw_init_cpu(hdev);
3810 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3815 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3817 struct cpu_dyn_regs *dyn_regs =
3818 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3819 struct asic_fixed_properties *prop = &hdev->asic_prop;
3820 struct gaudi_device *gaudi = hdev->asic_specific;
3821 u32 status, irq_handler_offset;
3823 struct hl_hw_queue *cpu_pq =
3824 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3827 if (!hdev->cpu_queues_enable)
3830 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3833 eq = &hdev->event_queue;
3835 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3836 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3838 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3839 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3841 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3842 lower_32_bits(hdev->cpu_accessible_dma_address));
3843 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3844 upper_32_bits(hdev->cpu_accessible_dma_address));
3846 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3847 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3848 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3850 /* Used for EQ CI */
3851 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3853 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3855 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3857 irq_handler_offset = prop->gic_interrupts_enable ?
3858 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3859 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3861 WREG32(irq_handler_offset,
3862 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3864 err = hl_poll_timeout(
3866 mmCPU_IF_QUEUE_INIT,
3868 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3874 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3878 /* update FW application security bits */
3879 if (prop->fw_cpu_boot_dev_sts0_valid)
3880 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3881 if (prop->fw_cpu_boot_dev_sts1_valid)
3882 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3884 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3888 static void gaudi_pre_hw_init(struct hl_device *hdev)
3890 /* Perform read from the device to make sure device is up */
3893 if (!hdev->asic_prop.fw_security_enabled) {
3894 /* Set the access through PCI bars (Linux driver only) as
3897 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3898 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3899 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3901 /* Perform read to flush the waiting writes to ensure
3902 * configuration was set in the device
3904 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3908 * Let's mark in the H/W that we have reached this point. We check
3909 * this value in the reset_before_init function to understand whether
3910 * we need to reset the chip before doing H/W init. This register is
3911 * cleared by the H/W upon H/W reset
3913 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3916 static int gaudi_hw_init(struct hl_device *hdev)
3918 struct gaudi_device *gaudi = hdev->asic_specific;
3921 gaudi_pre_hw_init(hdev);
3923 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3924 * So we set it here and if anyone tries to move it later to
3925 * a different address, there will be an error
3927 if (hdev->asic_prop.iatu_done_by_fw)
3928 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3931 * Before pushing u-boot/linux to device, need to set the hbm bar to
3932 * base address of dram
3934 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3936 "failed to map HBM bar to DRAM base address\n");
3940 rc = gaudi_init_cpu(hdev);
3942 dev_err(hdev->dev, "failed to initialize CPU\n");
3946 /* In case the clock gating was enabled in preboot we need to disable
3947 * it here before touching the MME/TPC registers.
3949 gaudi_disable_clock_gating(hdev);
3951 /* SRAM scrambler must be initialized after CPU is running from HBM */
3952 gaudi_init_scrambler_sram(hdev);
3954 /* This is here just in case we are working without CPU */
3955 gaudi_init_scrambler_hbm(hdev);
3957 gaudi_init_golden_registers(hdev);
3959 rc = gaudi_mmu_init(hdev);
3963 gaudi_init_security(hdev);
3965 gaudi_init_pci_dma_qmans(hdev);
3967 gaudi_init_hbm_dma_qmans(hdev);
3969 gaudi_init_mme_qmans(hdev);
3971 gaudi_init_tpc_qmans(hdev);
3973 gaudi_init_nic_qmans(hdev);
3975 gaudi_enable_timestamp(hdev);
3977 /* MSI must be enabled before CPU queues and NIC are initialized */
3978 rc = gaudi_enable_msi(hdev);
3980 goto disable_queues;
3982 /* must be called after MSI was enabled */
3983 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3985 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3990 /* Perform read from the device to flush all configuration */
3996 gaudi_disable_msi(hdev);
3998 gaudi_disable_mme_qmans(hdev);
3999 gaudi_disable_pci_dma_qmans(hdev);
4004 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4006 struct cpu_dyn_regs *dyn_regs =
4007 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4008 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4009 struct gaudi_device *gaudi = hdev->asic_specific;
4010 bool driver_performs_reset;
4013 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4018 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4019 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4021 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4022 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4027 "Firmware performs HARD reset, going to wait %dms\n",
4033 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4034 !hdev->asic_prop.hard_reset_done_by_fw);
4036 /* Set device to handle FLR by H/W as we will put the device CPU to
4039 if (driver_performs_reset)
4040 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4041 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4043 /* If linux is loaded in the device CPU we need to communicate with it
4044 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4045 * registers in case of old F/Ws
4047 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4048 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4049 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4050 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4052 WREG32(irq_handler_offset,
4053 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4055 /* This is a hail-mary attempt to revive the card in the small chance that the
4056 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4057 * In that case, triggering reset through GIC won't help. We need to trigger the
4058 * reset as if Linux wasn't loaded.
4060 * We do it only if the reset cause was HB, because that would be the indication
4063 * In case watchdog hasn't expired but we still got HB, then this won't do any
4066 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4067 if (hdev->asic_prop.hard_reset_done_by_fw)
4068 hl_fw_ask_hard_reset_without_linux(hdev);
4070 hl_fw_ask_halt_machine_without_linux(hdev);
4073 if (hdev->asic_prop.hard_reset_done_by_fw)
4074 hl_fw_ask_hard_reset_without_linux(hdev);
4076 hl_fw_ask_halt_machine_without_linux(hdev);
4079 if (driver_performs_reset) {
4081 /* Configure the reset registers. Must be done as early as
4082 * possible in case we fail during H/W initialization
4084 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4085 (CFG_RST_H_DMA_MASK |
4086 CFG_RST_H_MME_MASK |
4088 CFG_RST_H_TPC_7_MASK));
4090 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4092 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4093 (CFG_RST_H_HBM_MASK |
4094 CFG_RST_H_TPC_7_MASK |
4095 CFG_RST_H_NIC_MASK |
4097 CFG_RST_H_DMA_MASK |
4098 CFG_RST_H_MME_MASK |
4099 CFG_RST_H_CPU_MASK |
4100 CFG_RST_H_MMU_MASK));
4102 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4103 (CFG_RST_L_IF_MASK |
4104 CFG_RST_L_PSOC_MASK |
4105 CFG_RST_L_TPC_MASK));
4107 msleep(cpu_timeout_ms);
4109 /* Tell ASIC not to re-initialize PCIe */
4110 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4112 /* Restart BTL/BLR upon hard-reset */
4113 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4115 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4116 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4119 "Issued HARD reset command, going to wait %dms\n",
4123 "Firmware performs HARD reset, going to wait %dms\n",
4129 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4130 * itself is in reset. Need to wait until the reset is deasserted
4132 msleep(reset_timeout_ms);
4134 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4135 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4136 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4141 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4142 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4143 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4144 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4145 HW_CAP_HBM_SCRAMBLER);
4147 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4149 hdev->device_cpu_is_halted = false;
4154 static int gaudi_suspend(struct hl_device *hdev)
4158 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4160 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4165 static int gaudi_resume(struct hl_device *hdev)
4167 return gaudi_init_iatu(hdev);
4170 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4171 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4175 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4176 VM_DONTCOPY | VM_NORESERVE);
4178 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4179 (dma_addr - HOST_PHYS_BASE), size);
4181 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4186 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4188 struct cpu_dyn_regs *dyn_regs =
4189 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4190 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4191 struct gaudi_device *gaudi = hdev->asic_specific;
4192 bool invalid_queue = false;
4195 switch (hw_queue_id) {
4196 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4197 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4198 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4199 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4200 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4203 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4204 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4205 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4206 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4207 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4210 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4211 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4212 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4213 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4214 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4217 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4218 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4219 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4220 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4221 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4224 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4225 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4226 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4227 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4228 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4231 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4232 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4233 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4234 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4235 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4238 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4239 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4240 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4241 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4242 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4245 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4246 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4247 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4248 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4249 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4252 case GAUDI_QUEUE_ID_CPU_PQ:
4253 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4254 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4256 invalid_queue = true;
4259 case GAUDI_QUEUE_ID_MME_0_0:
4260 db_reg_offset = mmMME2_QM_PQ_PI_0;
4263 case GAUDI_QUEUE_ID_MME_0_1:
4264 db_reg_offset = mmMME2_QM_PQ_PI_1;
4267 case GAUDI_QUEUE_ID_MME_0_2:
4268 db_reg_offset = mmMME2_QM_PQ_PI_2;
4271 case GAUDI_QUEUE_ID_MME_0_3:
4272 db_reg_offset = mmMME2_QM_PQ_PI_3;
4275 case GAUDI_QUEUE_ID_MME_1_0:
4276 db_reg_offset = mmMME0_QM_PQ_PI_0;
4279 case GAUDI_QUEUE_ID_MME_1_1:
4280 db_reg_offset = mmMME0_QM_PQ_PI_1;
4283 case GAUDI_QUEUE_ID_MME_1_2:
4284 db_reg_offset = mmMME0_QM_PQ_PI_2;
4287 case GAUDI_QUEUE_ID_MME_1_3:
4288 db_reg_offset = mmMME0_QM_PQ_PI_3;
4291 case GAUDI_QUEUE_ID_TPC_0_0:
4292 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4295 case GAUDI_QUEUE_ID_TPC_0_1:
4296 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4299 case GAUDI_QUEUE_ID_TPC_0_2:
4300 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4303 case GAUDI_QUEUE_ID_TPC_0_3:
4304 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4307 case GAUDI_QUEUE_ID_TPC_1_0:
4308 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4311 case GAUDI_QUEUE_ID_TPC_1_1:
4312 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4315 case GAUDI_QUEUE_ID_TPC_1_2:
4316 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4319 case GAUDI_QUEUE_ID_TPC_1_3:
4320 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4323 case GAUDI_QUEUE_ID_TPC_2_0:
4324 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4327 case GAUDI_QUEUE_ID_TPC_2_1:
4328 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4331 case GAUDI_QUEUE_ID_TPC_2_2:
4332 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4335 case GAUDI_QUEUE_ID_TPC_2_3:
4336 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4339 case GAUDI_QUEUE_ID_TPC_3_0:
4340 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4343 case GAUDI_QUEUE_ID_TPC_3_1:
4344 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4347 case GAUDI_QUEUE_ID_TPC_3_2:
4348 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4351 case GAUDI_QUEUE_ID_TPC_3_3:
4352 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4355 case GAUDI_QUEUE_ID_TPC_4_0:
4356 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4359 case GAUDI_QUEUE_ID_TPC_4_1:
4360 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4363 case GAUDI_QUEUE_ID_TPC_4_2:
4364 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4367 case GAUDI_QUEUE_ID_TPC_4_3:
4368 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4371 case GAUDI_QUEUE_ID_TPC_5_0:
4372 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4375 case GAUDI_QUEUE_ID_TPC_5_1:
4376 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4379 case GAUDI_QUEUE_ID_TPC_5_2:
4380 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4383 case GAUDI_QUEUE_ID_TPC_5_3:
4384 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4387 case GAUDI_QUEUE_ID_TPC_6_0:
4388 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4391 case GAUDI_QUEUE_ID_TPC_6_1:
4392 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4395 case GAUDI_QUEUE_ID_TPC_6_2:
4396 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4399 case GAUDI_QUEUE_ID_TPC_6_3:
4400 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4403 case GAUDI_QUEUE_ID_TPC_7_0:
4404 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4407 case GAUDI_QUEUE_ID_TPC_7_1:
4408 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4411 case GAUDI_QUEUE_ID_TPC_7_2:
4412 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4415 case GAUDI_QUEUE_ID_TPC_7_3:
4416 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4419 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4420 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4421 invalid_queue = true;
4423 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4424 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4427 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4428 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4429 invalid_queue = true;
4431 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4432 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4435 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4436 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4437 invalid_queue = true;
4439 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4440 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4443 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4444 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4445 invalid_queue = true;
4447 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4448 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4451 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4452 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4453 invalid_queue = true;
4455 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4456 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4459 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4460 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4461 invalid_queue = true;
4463 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4464 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4467 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4468 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4469 invalid_queue = true;
4471 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4472 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4475 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4476 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4477 invalid_queue = true;
4479 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4480 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4483 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4484 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4485 invalid_queue = true;
4487 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4488 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4491 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4492 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4493 invalid_queue = true;
4495 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4496 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4500 invalid_queue = true;
4503 if (invalid_queue) {
4504 /* Should never get here */
4505 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4512 /* ring the doorbell */
4513 WREG32(db_reg_offset, db_value);
4515 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4516 /* make sure device CPU will read latest data from host */
4519 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4520 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4521 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4523 WREG32(irq_handler_offset,
4524 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4528 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4531 __le64 *pbd = (__le64 *) bd;
4533 /* The QMANs are on the host memory so a simple copy suffice */
4538 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4539 dma_addr_t *dma_handle, gfp_t flags)
4541 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4544 /* Shift to the device's base physical address of host memory */
4546 *dma_handle += HOST_PHYS_BASE;
4551 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4552 void *cpu_addr, dma_addr_t dma_handle)
4554 /* Cancel the device's base physical address of host memory */
4555 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4557 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4560 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4562 struct asic_fixed_properties *prop = &hdev->asic_prop;
4563 u64 cur_addr = prop->dram_user_base_address;
4564 u32 chunk_size, busy;
4567 while (cur_addr < prop->dram_end_address) {
4568 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4569 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4572 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4575 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4576 cur_addr, cur_addr + chunk_size);
4578 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4579 lower_32_bits(val));
4580 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4581 upper_32_bits(val));
4582 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4583 lower_32_bits(cur_addr));
4584 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4585 upper_32_bits(cur_addr));
4586 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4588 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4589 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4590 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4592 cur_addr += chunk_size;
4594 if (cur_addr == prop->dram_end_address)
4598 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4599 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4601 rc = hl_poll_timeout(
4603 mmDMA0_CORE_STS0 + dma_offset,
4605 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4607 HBM_SCRUBBING_TIMEOUT_US);
4611 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4621 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4623 struct asic_fixed_properties *prop = &hdev->asic_prop;
4624 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4625 u64 addr, size, val = hdev->memory_scrub_val;
4629 if (!hdev->memory_scrub)
4632 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4633 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4634 if (ktime_compare(ktime_get(), timeout) > 0) {
4635 dev_err(hdev->dev, "waiting for idle timeout\n");
4638 usleep_range((1000 >> 2) + 1, 1000);
4642 addr = prop->sram_user_base_address;
4643 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4645 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4646 addr, addr + size, val);
4647 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4649 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4653 /* Scrub HBM using all DMA channels in parallel */
4654 rc = gaudi_scrub_device_dram(hdev, val);
4656 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4663 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4664 u32 queue_id, dma_addr_t *dma_handle,
4667 struct gaudi_device *gaudi = hdev->asic_specific;
4668 struct gaudi_internal_qman_info *q;
4670 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4671 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4672 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4676 q = &gaudi->internal_qmans[queue_id];
4677 *dma_handle = q->pq_dma_addr;
4678 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4680 return q->pq_kernel_addr;
4683 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4684 u16 len, u32 timeout, u64 *result)
4686 struct gaudi_device *gaudi = hdev->asic_specific;
4688 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4695 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4697 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4701 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4703 struct packet_msg_prot *fence_pkt;
4704 dma_addr_t pkt_dma_addr;
4705 u32 fence_val, tmp, timeout_usec;
4706 dma_addr_t fence_dma_addr;
4711 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4713 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4715 fence_val = GAUDI_QMAN0_FENCE_VAL;
4717 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4720 "Failed to allocate memory for H/W queue %d testing\n",
4727 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4731 "Failed to allocate packet for H/W queue %d testing\n",
4734 goto free_fence_ptr;
4737 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4738 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4739 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4741 fence_pkt->ctl = cpu_to_le32(tmp);
4742 fence_pkt->value = cpu_to_le32(fence_val);
4743 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4745 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4746 sizeof(struct packet_msg_prot),
4750 "Failed to send fence packet to H/W queue %d\n",
4755 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4756 1000, timeout_usec, true);
4758 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4760 if (rc == -ETIMEDOUT) {
4762 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4763 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4768 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4770 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4774 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4776 struct gaudi_device *gaudi = hdev->asic_specific;
4779 * check capability here as send_cpu_message() won't update the result
4780 * value if no capability
4782 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4785 return hl_fw_test_cpu_queue(hdev);
4788 static int gaudi_test_queues(struct hl_device *hdev)
4790 int i, rc, ret_val = 0;
4792 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4793 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4794 rc = gaudi_test_queue(hdev, i);
4800 rc = gaudi_test_cpu_queue(hdev);
4807 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4808 gfp_t mem_flags, dma_addr_t *dma_handle)
4812 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4815 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4817 /* Shift to the device's base physical address of host memory */
4819 *dma_handle += HOST_PHYS_BASE;
4824 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4825 dma_addr_t dma_addr)
4827 /* Cancel the device's base physical address of host memory */
4828 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4830 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4833 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4834 size_t size, dma_addr_t *dma_handle)
4836 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4839 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4840 size_t size, void *vaddr)
4842 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4845 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4847 struct scatterlist *sg, *sg_next_iter;
4848 u32 count, dma_desc_cnt;
4850 dma_addr_t addr, addr_next;
4854 for_each_sgtable_dma_sg(sgt, sg, count) {
4855 len = sg_dma_len(sg);
4856 addr = sg_dma_address(sg);
4861 while ((count + 1) < sgt->nents) {
4862 sg_next_iter = sg_next(sg);
4863 len_next = sg_dma_len(sg_next_iter);
4864 addr_next = sg_dma_address(sg_next_iter);
4869 if ((addr + len == addr_next) &&
4870 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4882 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4885 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4886 struct hl_cs_parser *parser,
4887 struct packet_lin_dma *user_dma_pkt,
4888 u64 addr, enum dma_data_direction dir)
4890 struct hl_userptr *userptr;
4893 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4894 parser->job_userptr_list, &userptr))
4895 goto already_pinned;
4897 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4901 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4906 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4908 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4910 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4914 userptr->dma_mapped = true;
4918 parser->patched_cb_size +=
4919 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4924 list_del(&userptr->job_node);
4925 hl_unpin_host_memory(hdev, userptr);
4931 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4932 struct hl_cs_parser *parser,
4933 struct packet_lin_dma *user_dma_pkt,
4936 enum dma_data_direction dir;
4937 bool skip_host_mem_pin = false, user_memset;
4941 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4942 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4943 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4947 skip_host_mem_pin = true;
4949 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4950 dir = DMA_TO_DEVICE;
4951 addr = le64_to_cpu(user_dma_pkt->src_addr);
4953 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4954 dir = DMA_FROM_DEVICE;
4955 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4956 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4957 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4960 if (skip_host_mem_pin)
4961 parser->patched_cb_size += sizeof(*user_dma_pkt);
4963 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4969 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4970 struct hl_cs_parser *parser,
4971 struct packet_lin_dma *user_dma_pkt)
4973 bool src_in_host = false;
4974 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4975 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4976 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4978 dev_dbg(hdev->dev, "DMA packet details:\n");
4979 dev_dbg(hdev->dev, "source == 0x%llx\n",
4980 le64_to_cpu(user_dma_pkt->src_addr));
4981 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4982 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4985 * Special handling for DMA with size 0. Bypass all validations
4986 * because no transactions will be done except for WR_COMP, which
4987 * is not a security issue
4989 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4990 parser->patched_cb_size += sizeof(*user_dma_pkt);
4994 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4997 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5001 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5002 struct hl_cs_parser *parser,
5003 struct packet_load_and_exe *user_pkt)
5007 cfg = le32_to_cpu(user_pkt->cfg);
5009 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5011 "User not allowed to use Load and Execute\n");
5015 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5020 static int gaudi_validate_cb(struct hl_device *hdev,
5021 struct hl_cs_parser *parser, bool is_mmu)
5023 u32 cb_parsed_length = 0;
5026 parser->patched_cb_size = 0;
5028 /* cb_user_size is more than 0 so loop will always be executed */
5029 while (cb_parsed_length < parser->user_cb_size) {
5030 enum packet_id pkt_id;
5032 struct gaudi_packet *user_pkt;
5034 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5036 pkt_id = (enum packet_id) (
5037 (le64_to_cpu(user_pkt->header) &
5038 PACKET_HEADER_PACKET_ID_MASK) >>
5039 PACKET_HEADER_PACKET_ID_SHIFT);
5041 if (!validate_packet_id(pkt_id)) {
5042 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5047 pkt_size = gaudi_packet_sizes[pkt_id];
5048 cb_parsed_length += pkt_size;
5049 if (cb_parsed_length > parser->user_cb_size) {
5051 "packet 0x%x is out of CB boundary\n", pkt_id);
5057 case PACKET_MSG_PROT:
5059 "User not allowed to use MSG_PROT\n");
5064 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5069 dev_err(hdev->dev, "User not allowed to use STOP\n");
5073 case PACKET_WREG_BULK:
5075 "User not allowed to use WREG_BULK\n");
5079 case PACKET_LOAD_AND_EXE:
5080 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5081 (struct packet_load_and_exe *) user_pkt);
5084 case PACKET_LIN_DMA:
5085 parser->contains_dma_pkt = true;
5087 parser->patched_cb_size += pkt_size;
5089 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5090 (struct packet_lin_dma *) user_pkt);
5093 case PACKET_WREG_32:
5094 case PACKET_MSG_LONG:
5095 case PACKET_MSG_SHORT:
5099 case PACKET_ARB_POINT:
5100 parser->patched_cb_size += pkt_size;
5104 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5115 * The new CB should have space at the end for two MSG_PROT packets:
5116 * 1. Optional NOP padding for cacheline alignment
5117 * 2. A packet that will act as a completion packet
5118 * 3. A packet that will generate MSI interrupt
5120 if (parser->completion)
5121 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5122 parser->patched_cb_size);
5127 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5128 struct hl_cs_parser *parser,
5129 struct packet_lin_dma *user_dma_pkt,
5130 struct packet_lin_dma *new_dma_pkt,
5131 u32 *new_dma_pkt_size)
5133 struct hl_userptr *userptr;
5134 struct scatterlist *sg, *sg_next_iter;
5135 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5137 dma_addr_t dma_addr, dma_addr_next;
5138 u64 device_memory_addr, addr;
5139 enum dma_data_direction dir;
5140 struct sg_table *sgt;
5141 bool src_in_host = false;
5142 bool skip_host_mem_pin = false;
5145 ctl = le32_to_cpu(user_dma_pkt->ctl);
5147 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5150 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5151 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5154 addr = le64_to_cpu(user_dma_pkt->src_addr);
5155 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5156 dir = DMA_TO_DEVICE;
5158 skip_host_mem_pin = true;
5160 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5161 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5162 dir = DMA_FROM_DEVICE;
5165 if ((!skip_host_mem_pin) &&
5166 (!hl_userptr_is_pinned(hdev, addr,
5167 le32_to_cpu(user_dma_pkt->tsize),
5168 parser->job_userptr_list, &userptr))) {
5169 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5170 addr, user_dma_pkt->tsize);
5174 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5175 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5176 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5180 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5185 for_each_sgtable_dma_sg(sgt, sg, count) {
5186 len = sg_dma_len(sg);
5187 dma_addr = sg_dma_address(sg);
5192 while ((count + 1) < sgt->nents) {
5193 sg_next_iter = sg_next(sg);
5194 len_next = sg_dma_len(sg_next_iter);
5195 dma_addr_next = sg_dma_address(sg_next_iter);
5200 if ((dma_addr + len == dma_addr_next) &&
5201 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5210 ctl = le32_to_cpu(user_dma_pkt->ctl);
5211 if (likely(dma_desc_cnt))
5212 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5213 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5214 new_dma_pkt->ctl = cpu_to_le32(ctl);
5215 new_dma_pkt->tsize = cpu_to_le32(len);
5217 if (dir == DMA_TO_DEVICE) {
5218 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5219 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5221 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5222 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5226 device_memory_addr += len;
5231 if (!dma_desc_cnt) {
5233 "Error of 0 SG entries when patching DMA packet\n");
5237 /* Fix the last dma packet - wrcomp must be as user set it */
5239 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5241 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5246 static int gaudi_patch_cb(struct hl_device *hdev,
5247 struct hl_cs_parser *parser)
5249 u32 cb_parsed_length = 0;
5250 u32 cb_patched_cur_length = 0;
5253 /* cb_user_size is more than 0 so loop will always be executed */
5254 while (cb_parsed_length < parser->user_cb_size) {
5255 enum packet_id pkt_id;
5257 u32 new_pkt_size = 0;
5258 struct gaudi_packet *user_pkt, *kernel_pkt;
5260 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5261 kernel_pkt = parser->patched_cb->kernel_address +
5262 cb_patched_cur_length;
5264 pkt_id = (enum packet_id) (
5265 (le64_to_cpu(user_pkt->header) &
5266 PACKET_HEADER_PACKET_ID_MASK) >>
5267 PACKET_HEADER_PACKET_ID_SHIFT);
5269 if (!validate_packet_id(pkt_id)) {
5270 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5275 pkt_size = gaudi_packet_sizes[pkt_id];
5276 cb_parsed_length += pkt_size;
5277 if (cb_parsed_length > parser->user_cb_size) {
5279 "packet 0x%x is out of CB boundary\n", pkt_id);
5285 case PACKET_LIN_DMA:
5286 rc = gaudi_patch_dma_packet(hdev, parser,
5287 (struct packet_lin_dma *) user_pkt,
5288 (struct packet_lin_dma *) kernel_pkt,
5290 cb_patched_cur_length += new_pkt_size;
5293 case PACKET_MSG_PROT:
5295 "User not allowed to use MSG_PROT\n");
5300 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5305 dev_err(hdev->dev, "User not allowed to use STOP\n");
5309 case PACKET_WREG_32:
5310 case PACKET_WREG_BULK:
5311 case PACKET_MSG_LONG:
5312 case PACKET_MSG_SHORT:
5316 case PACKET_ARB_POINT:
5317 case PACKET_LOAD_AND_EXE:
5318 memcpy(kernel_pkt, user_pkt, pkt_size);
5319 cb_patched_cur_length += pkt_size;
5323 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5336 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5337 struct hl_cs_parser *parser)
5340 u32 patched_cb_size;
5341 struct hl_cb *user_cb;
5345 * The new CB should have space at the end for two MSG_PROT packets:
5346 * 1. Optional NOP padding for cacheline alignment
5347 * 2. A packet that will act as a completion packet
5348 * 3. A packet that will generate MSI interrupt
5350 if (parser->completion)
5351 parser->patched_cb_size = parser->user_cb_size +
5352 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5354 parser->patched_cb_size = parser->user_cb_size;
5356 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5357 parser->patched_cb_size, false, false,
5362 "Failed to allocate patched CB for DMA CS %d\n",
5367 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5368 /* hl_cb_get should never fail */
5369 if (!parser->patched_cb) {
5370 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5376 * We are protected from overflow because the check
5377 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5378 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5380 * There is no option to reach here without going through that check because:
5381 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5382 * an external queue.
5383 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5385 memcpy(parser->patched_cb->kernel_address,
5386 parser->user_cb->kernel_address,
5387 parser->user_cb_size);
5389 patched_cb_size = parser->patched_cb_size;
5391 /* Validate patched CB instead of user CB */
5392 user_cb = parser->user_cb;
5393 parser->user_cb = parser->patched_cb;
5394 rc = gaudi_validate_cb(hdev, parser, true);
5395 parser->user_cb = user_cb;
5398 hl_cb_put(parser->patched_cb);
5402 if (patched_cb_size != parser->patched_cb_size) {
5403 dev_err(hdev->dev, "user CB size mismatch\n");
5404 hl_cb_put(parser->patched_cb);
5411 * Always call cb destroy here because we still have 1 reference
5412 * to it by calling cb_get earlier. After the job will be completed,
5413 * cb_put will release it, but here we want to remove it from the
5416 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5421 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5422 struct hl_cs_parser *parser)
5427 rc = gaudi_validate_cb(hdev, parser, false);
5432 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5433 parser->patched_cb_size, false, false,
5437 "Failed to allocate patched CB for DMA CS %d\n", rc);
5441 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5442 /* hl_cb_get should never fail here */
5443 if (!parser->patched_cb) {
5444 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5449 rc = gaudi_patch_cb(hdev, parser);
5452 hl_cb_put(parser->patched_cb);
5456 * Always call cb destroy here because we still have 1 reference
5457 * to it by calling cb_get earlier. After the job will be completed,
5458 * cb_put will release it, but here we want to remove it from the
5461 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5465 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5469 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5470 struct hl_cs_parser *parser)
5472 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5473 struct gaudi_device *gaudi = hdev->asic_specific;
5474 u32 nic_queue_offset, nic_mask_q_id;
5476 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5477 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5478 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5479 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5481 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5482 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5487 /* For internal queue jobs just check if CB address is valid */
5488 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5489 parser->user_cb_size,
5490 asic_prop->sram_user_base_address,
5491 asic_prop->sram_end_address))
5494 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5495 parser->user_cb_size,
5496 asic_prop->dram_user_base_address,
5497 asic_prop->dram_end_address))
5500 /* PMMU and HPMMU addresses are equal, check only one of them */
5501 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5502 parser->user_cb_size,
5503 asic_prop->pmmu.start_addr,
5504 asic_prop->pmmu.end_addr))
5508 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5509 parser->user_cb, parser->user_cb_size);
5514 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5516 struct gaudi_device *gaudi = hdev->asic_specific;
5518 if (parser->queue_type == QUEUE_TYPE_INT)
5519 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5521 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5522 return gaudi_parse_cb_mmu(hdev, parser);
5524 return gaudi_parse_cb_no_mmu(hdev, parser);
5527 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5528 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5529 u32 msi_vec, bool eb)
5531 struct packet_msg_prot *cq_pkt;
5532 struct packet_nop *cq_padding;
5536 cq_padding = kernel_address + original_len;
5537 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5539 while ((void *)cq_padding < (void *)cq_pkt) {
5540 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5544 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5545 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5548 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5550 cq_pkt->ctl = cpu_to_le32(tmp);
5551 cq_pkt->value = cpu_to_le32(cq_val);
5552 cq_pkt->addr = cpu_to_le64(cq_addr);
5556 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5557 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5558 cq_pkt->ctl = cpu_to_le32(tmp);
5559 cq_pkt->value = cpu_to_le32(1);
5560 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5561 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5564 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5566 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5569 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5572 struct packet_lin_dma *lin_dma_pkt;
5573 struct hl_cs_job *job;
5574 u32 cb_size, ctl, err_cause;
5578 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5582 lin_dma_pkt = cb->kernel_address;
5583 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5584 cb_size = sizeof(*lin_dma_pkt);
5586 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5587 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5588 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5589 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5590 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5592 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5593 lin_dma_pkt->src_addr = cpu_to_le64(val);
5594 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5595 lin_dma_pkt->tsize = cpu_to_le32(size);
5597 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5599 dev_err(hdev->dev, "Failed to allocate a new job\n");
5604 /* Verify DMA is OK */
5605 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5606 if (err_cause && !hdev->init_done) {
5608 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5610 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5615 atomic_inc(&job->user_cb->cs_cnt);
5616 job->user_cb_size = cb_size;
5617 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5618 job->patched_cb = job->user_cb;
5619 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5621 hl_debugfs_add_job(hdev, job);
5623 rc = gaudi_send_job_on_qman0(hdev, job);
5624 hl_debugfs_remove_job(hdev, job);
5626 atomic_dec(&cb->cs_cnt);
5628 /* Verify DMA is OK */
5629 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5631 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5633 if (!hdev->init_done) {
5635 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5637 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5643 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5648 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5649 u32 num_regs, u32 val)
5651 struct packet_msg_long *pkt;
5652 struct hl_cs_job *job;
5657 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5659 if (cb_size > SZ_2M) {
5660 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5664 cb = hl_cb_kernel_create(hdev, cb_size, false);
5668 pkt = cb->kernel_address;
5670 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5671 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5672 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5674 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5676 for (i = 0; i < num_regs ; i++, pkt++) {
5677 pkt->ctl = cpu_to_le32(ctl);
5678 pkt->value = cpu_to_le32(val);
5679 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5682 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5684 dev_err(hdev->dev, "Failed to allocate a new job\n");
5691 atomic_inc(&job->user_cb->cs_cnt);
5692 job->user_cb_size = cb_size;
5693 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5694 job->patched_cb = job->user_cb;
5695 job->job_cb_size = cb_size;
5697 hl_debugfs_add_job(hdev, job);
5699 rc = gaudi_send_job_on_qman0(hdev, job);
5700 hl_debugfs_remove_job(hdev, job);
5702 atomic_dec(&cb->cs_cnt);
5706 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5711 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5717 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5718 num_regs = NUM_OF_SOB_IN_BLOCK;
5719 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721 dev_err(hdev->dev, "failed resetting SM registers");
5725 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5726 num_regs = NUM_OF_SOB_IN_BLOCK;
5727 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729 dev_err(hdev->dev, "failed resetting SM registers");
5733 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5734 num_regs = NUM_OF_SOB_IN_BLOCK;
5735 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737 dev_err(hdev->dev, "failed resetting SM registers");
5741 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5742 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5743 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745 dev_err(hdev->dev, "failed resetting SM registers");
5749 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5750 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5751 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753 dev_err(hdev->dev, "failed resetting SM registers");
5757 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5758 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5759 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5761 dev_err(hdev->dev, "failed resetting SM registers");
5765 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5766 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5767 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5768 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5770 dev_err(hdev->dev, "failed resetting SM registers");
5774 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5775 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5776 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5777 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5779 dev_err(hdev->dev, "failed resetting SM registers");
5786 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5788 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5789 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5792 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5793 u64 sob_addr = CFG_BASE +
5794 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5796 u32 dma_offset = i * DMA_CORE_OFFSET;
5798 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5799 lower_32_bits(sob_addr));
5800 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5801 upper_32_bits(sob_addr));
5802 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5804 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5805 * modified by the user for SRAM reduction
5808 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5813 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5818 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5819 qman_offset = i * DMA_QMAN_OFFSET;
5820 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5823 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5824 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5825 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5828 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5829 qman_offset = i * TPC_QMAN_OFFSET;
5830 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5833 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5834 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5835 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5836 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5840 static int gaudi_restore_user_registers(struct hl_device *hdev)
5844 rc = gaudi_restore_sm_registers(hdev);
5848 gaudi_restore_dma_registers(hdev);
5849 gaudi_restore_qm_registers(hdev);
5854 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5859 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5861 u32 size = hdev->asic_prop.mmu_pgt_size +
5862 hdev->asic_prop.mmu_cache_mng_size;
5863 struct gaudi_device *gaudi = hdev->asic_specific;
5864 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5866 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5869 return gaudi_memset_device_memory(hdev, addr, size, 0);
5872 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5877 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5878 u32 size_to_dma, dma_addr_t dma_addr)
5884 dma_offset = dma_id * DMA_CORE_OFFSET;
5886 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5887 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5888 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5889 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5890 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5891 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5892 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5894 rc = hl_poll_timeout(
5896 mmDMA0_CORE_STS0 + dma_offset,
5898 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5904 "DMA %d timed-out during reading of 0x%llx\n",
5909 /* Verify DMA is OK */
5910 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5912 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5914 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5916 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5924 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5927 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5928 u32 qm_glbl_sts0, qm_cgm_sts;
5929 u64 dma_offset, qm_offset;
5930 dma_addr_t dma_addr;
5935 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5940 hdev->asic_funcs->hw_queues_lock(hdev);
5942 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5943 dma_offset = dma_id * DMA_CORE_OFFSET;
5944 qm_offset = dma_id * DMA_QMAN_OFFSET;
5945 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5946 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5947 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5948 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5949 IS_DMA_IDLE(dma_core_sts0);
5952 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5953 dma_offset = dma_id * DMA_CORE_OFFSET;
5954 qm_offset = dma_id * DMA_QMAN_OFFSET;
5955 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5956 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5957 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5958 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5959 IS_DMA_IDLE(dma_core_sts0);
5962 dev_err_ratelimited(hdev->dev,
5963 "Can't read via DMA because it is BUSY\n");
5969 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5970 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5971 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5973 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5974 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5977 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5979 /* Verify DMA is OK */
5980 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5983 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5985 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5990 size_to_dma = SZ_2M;
5992 while (size_left > 0) {
5994 if (size_left < SZ_2M)
5995 size_to_dma = size_left;
5997 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6002 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6004 if (size_left <= SZ_2M)
6012 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6013 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6016 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6017 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6019 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6022 hdev->asic_funcs->hw_queues_unlock(hdev);
6024 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6029 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6031 struct gaudi_device *gaudi = hdev->asic_specific;
6033 if (hdev->reset_info.hard_reset_pending)
6036 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6037 (addr - gaudi->hbm_bar_cur_addr));
6040 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6042 struct gaudi_device *gaudi = hdev->asic_specific;
6044 if (hdev->reset_info.hard_reset_pending)
6047 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6048 (addr - gaudi->hbm_bar_cur_addr));
6051 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6053 /* mask to zero the MMBP and ASID bits */
6054 WREG32_AND(reg, ~0x7FF);
6055 WREG32_OR(reg, asid);
6058 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6060 struct gaudi_device *gaudi = hdev->asic_specific;
6062 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6065 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6066 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6070 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6071 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6074 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6076 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6077 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6080 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6082 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6083 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6086 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6088 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6089 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6092 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6094 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6095 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6098 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6100 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6101 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6104 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6106 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6107 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6110 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6112 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6113 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6116 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6118 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6119 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6120 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6122 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6123 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6124 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6125 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6127 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6128 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6135 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6136 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6152 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6176 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6213 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6215 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6216 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6218 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6220 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6222 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6224 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6228 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6229 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6231 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6233 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6235 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6237 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6241 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6242 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6244 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6246 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6248 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6250 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6254 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6255 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6257 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6259 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6261 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6263 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6267 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6268 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6270 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6272 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6274 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6276 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6280 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6281 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6283 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6285 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6287 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6289 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6293 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6294 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6296 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6298 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6300 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6302 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6306 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6307 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6309 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6311 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6313 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6315 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6319 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6320 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6322 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6324 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6326 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6328 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6332 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6333 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6335 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6337 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6339 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6341 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6345 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6346 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6349 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6350 struct hl_cs_job *job)
6352 struct packet_msg_prot *fence_pkt;
6354 dma_addr_t fence_dma_addr;
6356 u32 tmp, timeout, dma_offset;
6360 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6362 timeout = HL_DEVICE_TIMEOUT_USEC;
6364 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6367 "Failed to allocate fence memory for QMAN0\n");
6371 cb = job->patched_cb;
6373 fence_pkt = cb->kernel_address +
6374 job->job_cb_size - sizeof(struct packet_msg_prot);
6376 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6377 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6378 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6380 fence_pkt->ctl = cpu_to_le32(tmp);
6381 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6382 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6384 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6386 WREG32(mmDMA0_CORE_PROT + dma_offset,
6387 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6389 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6390 job->job_cb_size, cb->bus_address);
6392 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6393 goto free_fence_ptr;
6396 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6397 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6400 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6402 if (rc == -ETIMEDOUT) {
6403 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6404 goto free_fence_ptr;
6408 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6410 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6414 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6416 if (event_type >= GAUDI_EVENT_SIZE)
6417 goto event_not_supported;
6419 if (!gaudi_irq_map_table[event_type].valid)
6420 goto event_not_supported;
6422 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6426 event_not_supported:
6427 snprintf(desc, size, "N/A");
6430 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6431 bool is_write, u16 *engine_id_1,
6434 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6436 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6437 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6440 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6441 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6445 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6446 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6450 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6451 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6455 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6456 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6461 goto unknown_initiator;
6464 for (i = 0 ; i < 2 ; i++) {
6465 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6466 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6470 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6471 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6472 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6473 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6475 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6476 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6479 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6480 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6481 return "DMA0 or DMA2";
6483 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6484 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6485 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6486 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6488 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6489 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6492 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6493 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6494 return "DMA1 or DMA3";
6496 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6497 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6498 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6499 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6501 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6502 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6505 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6506 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6507 return "DMA4 or DMA6";
6509 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6510 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6511 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6512 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6514 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6515 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6518 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6519 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6520 return "DMA5 or DMA7";
6525 return "unknown initiator";
6528 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6529 u16 *engine_id_1, u16 *engine_id_2)
6531 u32 val, x_y, axi_id;
6533 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6534 RREG32(mmMMU_UP_RAZWI_READ_ID);
6535 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6536 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6537 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6538 RAZWI_INITIATOR_AXI_ID_SHIFT);
6541 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6542 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6543 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6546 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6547 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6551 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6552 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6554 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6555 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6556 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6558 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6559 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6560 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6562 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6563 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6565 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6566 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6567 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6570 /* PCI, CPU or PSOC does not have engine id*/
6571 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6573 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6575 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6578 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6579 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6586 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6587 engine_id_1, engine_id_2);
6588 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6589 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6590 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6593 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6594 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6597 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6598 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6602 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6603 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6605 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6606 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6607 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6609 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6610 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6611 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6613 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6614 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6616 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6617 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6618 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6621 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6622 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6625 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6626 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6635 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6637 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6638 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6639 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6640 RAZWI_INITIATOR_AXI_ID_MASK);
6642 return "unknown initiator";
6645 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6646 u16 *engine_id_2, bool *is_read, bool *is_write)
6649 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6650 dev_err_ratelimited(hdev->dev,
6651 "RAZWI event caused by illegal write of %s\n",
6652 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6653 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6657 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6658 dev_err_ratelimited(hdev->dev,
6659 "RAZWI event caused by illegal read of %s\n",
6660 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6661 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6666 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6668 struct gaudi_device *gaudi = hdev->asic_specific;
6671 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6674 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6675 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6676 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6678 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6680 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6681 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6683 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6686 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6687 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6688 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6690 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6692 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6694 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6699 * +-------------------+------------------------------------------------------+
6700 * | Configuration Reg | Description |
6702 * +-------------------+------------------------------------------------------+
6703 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6704 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6705 * | |0xF34 memory wrappers 63:32 |
6706 * | |0xF38 memory wrappers 95:64 |
6707 * | |0xF3C memory wrappers 127:96 |
6708 * +-------------------+------------------------------------------------------+
6709 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6710 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6711 * | |0xF44 memory wrappers 63:32 |
6712 * | |0xF48 memory wrappers 95:64 |
6713 * | |0xF4C memory wrappers 127:96 |
6714 * +-------------------+------------------------------------------------------+
6716 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6717 struct ecc_info_extract_params *params, u64 *ecc_address,
6718 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6720 u32 i, num_mem_regs, reg, err_bit;
6721 u64 err_addr, err_word = 0;
6723 num_mem_regs = params->num_memories / 32 +
6724 ((params->num_memories % 32) ? 1 : 0);
6726 if (params->block_address >= CFG_BASE)
6727 params->block_address -= CFG_BASE;
6730 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6732 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6734 /* Set invalid wrapper index */
6735 *memory_wrapper_idx = 0xFF;
6737 /* Iterate through memory wrappers, a single bit must be set */
6738 for (i = 0 ; i < num_mem_regs ; i++) {
6740 err_word = RREG32(err_addr);
6742 err_bit = __ffs(err_word);
6743 *memory_wrapper_idx = err_bit + (32 * i);
6748 if (*memory_wrapper_idx == 0xFF) {
6749 dev_err(hdev->dev, "ECC error information cannot be found\n");
6753 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6754 *memory_wrapper_idx);
6757 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6759 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6761 /* Clear error indication */
6762 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6764 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6766 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6768 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6774 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6776 * @idx: the current pi/ci value
6777 * @q_len: the queue length (power of 2)
6779 * @return the cyclically decremented index
6781 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6783 u32 mask = q_len - 1;
6786 * modular decrement is equivalent to adding (queue_size -1)
6787 * later we take LSBs to make sure the value is in the
6788 * range [0, queue_len - 1]
6790 return (idx + q_len - 1) & mask;
6794 * gaudi_handle_sw_config_stream_data - print SW config stream data
6796 * @hdev: pointer to the habanalabs device structure
6797 * @stream: the QMAN's stream
6798 * @qman_base: base address of QMAN registers block
6799 * @event_mask: mask of the last events occurred
6801 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6802 u64 qman_base, u64 event_mask)
6804 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6805 u32 cq_ptr_lo_off, size;
6807 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6809 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6810 stream * cq_ptr_lo_off;
6811 cq_ptr_hi = cq_ptr_lo +
6812 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6813 cq_tsize = cq_ptr_lo +
6814 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6816 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6817 size = RREG32(cq_tsize);
6818 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6819 stream, cq_ptr, size);
6821 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6822 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6823 hdev->captured_err_info.undef_opcode.cq_size = size;
6824 hdev->captured_err_info.undef_opcode.stream_id = stream;
6829 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6831 * @hdev: pointer to the habanalabs device structure
6832 * @qid_base: first QID of the QMAN (out of 4 streams)
6833 * @stream: the QMAN's stream
6834 * @qman_base: base address of QMAN registers block
6835 * @event_mask: mask of the last events occurred
6836 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6838 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6839 u32 stream, u64 qman_base,
6843 u32 ci, qm_ci_stream_off, queue_len;
6844 struct hl_hw_queue *q;
6845 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6848 q = &hdev->kernel_queues[qid_base + stream];
6850 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6851 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6852 stream * qm_ci_stream_off;
6854 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6855 q->int_queue_len : HL_QUEUE_LENGTH;
6857 hdev->asic_funcs->hw_queues_lock(hdev);
6860 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6864 /* we should start printing form ci -1 */
6865 ci = gaudi_queue_idx_dec(ci, queue_len);
6866 memset(addr, 0, sizeof(addr));
6868 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6872 bd = q->kernel_address;
6875 len = le32_to_cpu(bd->len);
6876 /* len 0 means uninitialized entry- break */
6880 addr[i] = le64_to_cpu(bd->ptr);
6882 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6883 stream, ci, addr[i], len);
6885 /* get previous ci, wrap if needed */
6886 ci = gaudi_queue_idx_dec(ci, queue_len);
6889 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6890 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6891 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6894 undef_opcode->timestamp = ktime_get();
6895 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6898 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6899 undef_opcode->cb_addr_streams_len++;
6902 hdev->asic_funcs->hw_queues_unlock(hdev);
6906 * handle_qman_data_on_err - extract QMAN data on error
6908 * @hdev: pointer to the habanalabs device structure
6909 * @qid_base: first QID of the QMAN (out of 4 streams)
6910 * @stream: the QMAN's stream
6911 * @qman_base: base address of QMAN registers block
6912 * @event_mask: mask of the last events occurred
6914 * This function attempt to exatract as much data as possible on QMAN error.
6915 * On upper CP print the SW config stream data and last 8 PQEs.
6916 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6918 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6919 u32 stream, u64 qman_base, u64 event_mask)
6923 if (stream != QMAN_STREAMS) {
6924 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6925 qman_base, event_mask, true);
6929 /* handle Lower-CP */
6930 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6932 for (i = 0; i < QMAN_STREAMS; i++)
6933 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6934 qman_base, event_mask, false);
6937 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6938 const char *qm_name,
6943 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6944 u64 glbl_sts_addr, arb_err_addr;
6947 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6948 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6950 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6951 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6952 glbl_sts_clr_val = 0;
6953 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6958 if (i == QMAN_STREAMS)
6959 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6961 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6963 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6964 if (glbl_sts_val & BIT(j)) {
6965 dev_err_ratelimited(hdev->dev,
6966 "%s %s. err cause: %s\n",
6968 gaudi_qman_error_cause[j]);
6969 glbl_sts_clr_val |= BIT(j);
6972 /* check for undefined opcode */
6973 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6974 hdev->captured_err_info.undef_opcode.write_enable) {
6975 memset(&hdev->captured_err_info.undef_opcode, 0,
6976 sizeof(hdev->captured_err_info.undef_opcode));
6978 hdev->captured_err_info.undef_opcode.write_enable = false;
6979 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6982 /* Write 1 clear errors */
6983 if (!hdev->stop_on_err)
6984 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6986 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6989 arb_err_val = RREG32(arb_err_addr);
6994 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6995 if (arb_err_val & BIT(j)) {
6996 dev_err_ratelimited(hdev->dev,
6997 "%s ARB_ERR. err cause: %s\n",
6999 gaudi_qman_arb_error_cause[j]);
7004 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7005 struct hl_eq_sm_sei_data *sei_data)
7007 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7009 /* Flip the bits as the enum is ordered in the opposite way */
7010 index = (index ^ 0x3) & 0x3;
7012 switch (sei_data->sei_cause) {
7013 case SM_SEI_SO_OVERFLOW:
7014 dev_err_ratelimited(hdev->dev,
7015 "%s SEI Error: SOB Group %u overflow/underflow",
7016 gaudi_sync_manager_names[index],
7017 le32_to_cpu(sei_data->sei_log));
7019 case SM_SEI_LBW_4B_UNALIGNED:
7020 dev_err_ratelimited(hdev->dev,
7021 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7022 gaudi_sync_manager_names[index],
7023 le32_to_cpu(sei_data->sei_log));
7025 case SM_SEI_AXI_RESPONSE_ERR:
7026 dev_err_ratelimited(hdev->dev,
7027 "%s SEI Error: AXI ID %u response error",
7028 gaudi_sync_manager_names[index],
7029 le32_to_cpu(sei_data->sei_log));
7032 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7033 le32_to_cpu(sei_data->sei_log));
7038 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7039 struct hl_eq_ecc_data *ecc_data)
7041 struct ecc_info_extract_params params;
7042 u64 ecc_address = 0, ecc_syndrom = 0;
7043 u8 index, memory_wrapper_idx = 0;
7044 bool extract_info_from_fw;
7047 if (hdev->asic_prop.fw_security_enabled) {
7048 extract_info_from_fw = true;
7049 goto extract_ecc_info;
7052 switch (event_type) {
7053 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7054 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7055 extract_info_from_fw = true;
7057 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7058 index = event_type - GAUDI_EVENT_TPC0_SERR;
7059 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7060 params.num_memories = 90;
7061 params.derr = false;
7062 extract_info_from_fw = false;
7064 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7065 index = event_type - GAUDI_EVENT_TPC0_DERR;
7066 params.block_address =
7067 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7068 params.num_memories = 90;
7070 extract_info_from_fw = false;
7072 case GAUDI_EVENT_MME0_ACC_SERR:
7073 case GAUDI_EVENT_MME1_ACC_SERR:
7074 case GAUDI_EVENT_MME2_ACC_SERR:
7075 case GAUDI_EVENT_MME3_ACC_SERR:
7076 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7077 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7078 params.num_memories = 128;
7079 params.derr = false;
7080 extract_info_from_fw = false;
7082 case GAUDI_EVENT_MME0_ACC_DERR:
7083 case GAUDI_EVENT_MME1_ACC_DERR:
7084 case GAUDI_EVENT_MME2_ACC_DERR:
7085 case GAUDI_EVENT_MME3_ACC_DERR:
7086 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7087 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7088 params.num_memories = 128;
7090 extract_info_from_fw = false;
7092 case GAUDI_EVENT_MME0_SBAB_SERR:
7093 case GAUDI_EVENT_MME1_SBAB_SERR:
7094 case GAUDI_EVENT_MME2_SBAB_SERR:
7095 case GAUDI_EVENT_MME3_SBAB_SERR:
7096 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7097 params.block_address =
7098 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7099 params.num_memories = 33;
7100 params.derr = false;
7101 extract_info_from_fw = false;
7103 case GAUDI_EVENT_MME0_SBAB_DERR:
7104 case GAUDI_EVENT_MME1_SBAB_DERR:
7105 case GAUDI_EVENT_MME2_SBAB_DERR:
7106 case GAUDI_EVENT_MME3_SBAB_DERR:
7107 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7108 params.block_address =
7109 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7110 params.num_memories = 33;
7112 extract_info_from_fw = false;
7119 if (extract_info_from_fw) {
7120 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7121 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7122 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7124 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7125 &ecc_syndrom, &memory_wrapper_idx);
7131 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7132 ecc_address, ecc_syndrom, memory_wrapper_idx);
7135 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7142 switch (event_type) {
7143 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7144 index = event_type - GAUDI_EVENT_TPC0_QM;
7145 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7146 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7147 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7149 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7150 if (event_type == GAUDI_EVENT_MME0_QM) {
7152 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7153 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7155 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7157 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7158 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7160 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7161 index = event_type - GAUDI_EVENT_DMA0_QM;
7162 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7163 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7166 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7167 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7169 case GAUDI_EVENT_NIC0_QM0:
7170 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7171 qman_base = mmNIC0_QM0_BASE;
7172 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7174 case GAUDI_EVENT_NIC0_QM1:
7175 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7176 qman_base = mmNIC0_QM1_BASE;
7177 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7179 case GAUDI_EVENT_NIC1_QM0:
7180 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7181 qman_base = mmNIC1_QM0_BASE;
7182 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7184 case GAUDI_EVENT_NIC1_QM1:
7185 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7186 qman_base = mmNIC1_QM1_BASE;
7187 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7189 case GAUDI_EVENT_NIC2_QM0:
7190 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7191 qman_base = mmNIC2_QM0_BASE;
7192 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7194 case GAUDI_EVENT_NIC2_QM1:
7195 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7196 qman_base = mmNIC2_QM1_BASE;
7197 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7199 case GAUDI_EVENT_NIC3_QM0:
7200 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7201 qman_base = mmNIC3_QM0_BASE;
7202 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7204 case GAUDI_EVENT_NIC3_QM1:
7205 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7206 qman_base = mmNIC3_QM1_BASE;
7207 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7209 case GAUDI_EVENT_NIC4_QM0:
7210 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7211 qman_base = mmNIC4_QM0_BASE;
7212 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7214 case GAUDI_EVENT_NIC4_QM1:
7215 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7216 qman_base = mmNIC4_QM1_BASE;
7217 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7223 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7226 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7227 bool check_razwi, u64 *event_mask)
7229 bool is_read = false, is_write = false;
7230 u16 engine_id[2], num_of_razwi_eng = 0;
7236 * Init engine id by default as not valid and only if razwi initiated from engine with
7237 * engine id it will get valid value.
7239 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7240 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7242 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7243 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7247 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7249 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7252 razwi_flags |= HL_RAZWI_READ;
7254 razwi_flags |= HL_RAZWI_WRITE;
7256 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7257 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7258 num_of_razwi_eng = 2;
7260 num_of_razwi_eng = 1;
7264 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7265 razwi_flags, event_mask);
7269 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7270 struct cpucp_pkt_sync_err *sync_err)
7272 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7274 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7275 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7278 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7279 struct hl_eq_fw_alive *fw_alive)
7282 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7283 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7284 le32_to_cpu(fw_alive->process_id),
7285 le32_to_cpu(fw_alive->thread_id),
7286 le64_to_cpu(fw_alive->uptime_seconds));
7289 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7292 char desc[64] = "", *type;
7293 struct eq_nic_sei_event *eq_nic_sei = data;
7294 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7296 switch (eq_nic_sei->axi_error_cause) {
7313 type = "NON_AXI_ERR";
7319 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7320 eq_nic_sei->axi_error_cause);
7325 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7327 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7331 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7333 /* GAUDI doesn't support any reset except hard-reset */
7337 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7338 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7340 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7343 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7344 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7345 if (!hbm_ecc_data) {
7346 dev_err(hdev->dev, "No FW ECC data");
7350 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7351 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7352 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7353 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7354 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7355 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7356 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7357 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7358 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7359 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7360 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7361 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7362 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7363 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7367 device, ch, wr_par, rd_par, ca_par, serr, derr);
7369 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7370 device, ch, hbm_ecc_data->first_addr, type,
7371 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7372 hbm_ecc_data->dec_cnt);
7376 if (hdev->asic_prop.fw_security_enabled) {
7377 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7381 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7382 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7383 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7384 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7388 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7389 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7390 (val >> 2) & 0x1, (val >> 3) & 0x1,
7393 val2 = RREG32(base + ch * 0x1000 + 0x060);
7395 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7397 RREG32(base + ch * 0x1000 + 0x064),
7398 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7399 (val2 & 0xFF0000) >> 16,
7400 (val2 & 0xFF000000) >> 24);
7403 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7404 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7408 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7409 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7410 (val >> 2) & 0x1, (val >> 3) & 0x1,
7413 val2 = RREG32(base + ch * 0x1000 + 0x070);
7415 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7417 RREG32(base + ch * 0x1000 + 0x074),
7418 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7419 (val2 & 0xFF0000) >> 16,
7420 (val2 & 0xFF000000) >> 24);
7423 /* Clear interrupts */
7424 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7425 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7426 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7427 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7428 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7429 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7432 val = RREG32(base + 0x8F30);
7433 val2 = RREG32(base + 0x8F34);
7437 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7440 val = RREG32(base + 0x8F40);
7441 val2 = RREG32(base + 0x8F44);
7445 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7452 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7454 switch (hbm_event_type) {
7455 case GAUDI_EVENT_HBM0_SPI_0:
7456 case GAUDI_EVENT_HBM0_SPI_1:
7458 case GAUDI_EVENT_HBM1_SPI_0:
7459 case GAUDI_EVENT_HBM1_SPI_1:
7461 case GAUDI_EVENT_HBM2_SPI_0:
7462 case GAUDI_EVENT_HBM2_SPI_1:
7464 case GAUDI_EVENT_HBM3_SPI_0:
7465 case GAUDI_EVENT_HBM3_SPI_1:
7471 /* Should never happen */
7475 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7476 char *interrupt_name)
7478 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7479 bool soft_reset_required = false;
7481 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7482 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7484 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7485 if (tpc_interrupts_cause & BIT(i)) {
7486 dev_err_ratelimited(hdev->dev,
7487 "TPC%d_%s interrupt cause: %s\n",
7488 tpc_id, interrupt_name,
7489 gaudi_tpc_interrupts_cause[i]);
7490 /* If this is QM error, we need to soft-reset */
7492 soft_reset_required = true;
7495 /* Clear interrupts */
7496 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7498 return soft_reset_required;
7501 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7503 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7506 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7508 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7511 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7513 ktime_t zero_time = ktime_set(0, 0);
7515 mutex_lock(&hdev->clk_throttling.lock);
7517 switch (event_type) {
7518 case GAUDI_EVENT_FIX_POWER_ENV_S:
7519 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7520 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7521 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7522 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7523 dev_info_ratelimited(hdev->dev,
7524 "Clock throttling due to power consumption\n");
7527 case GAUDI_EVENT_FIX_POWER_ENV_E:
7528 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7529 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7530 dev_info_ratelimited(hdev->dev,
7531 "Power envelop is safe, back to optimal clock\n");
7534 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7535 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7536 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7537 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7538 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7539 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7540 dev_info_ratelimited(hdev->dev,
7541 "Clock throttling due to overheating\n");
7544 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7545 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7546 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7547 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7548 dev_info_ratelimited(hdev->dev,
7549 "Thermal envelop is safe, back to optimal clock\n");
7553 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7558 mutex_unlock(&hdev->clk_throttling.lock);
7561 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7563 struct gaudi_device *gaudi = hdev->asic_specific;
7564 struct hl_info_fw_err_info fw_err_info;
7565 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7566 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7567 u32 fw_fatal_err_flag = 0, flags = 0;
7568 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7569 >> EQ_CTL_EVENT_TYPE_SHIFT);
7570 bool reset_required, reset_direct = false;
7574 if (event_type >= GAUDI_EVENT_SIZE) {
7575 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7576 event_type, GAUDI_EVENT_SIZE - 1);
7580 gaudi->events_stat[event_type]++;
7581 gaudi->events_stat_aggregate[event_type]++;
7583 switch (event_type) {
7584 case GAUDI_EVENT_PCIE_CORE_DERR:
7585 case GAUDI_EVENT_PCIE_IF_DERR:
7586 case GAUDI_EVENT_PCIE_PHY_DERR:
7587 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7588 case GAUDI_EVENT_MME0_ACC_DERR:
7589 case GAUDI_EVENT_MME0_SBAB_DERR:
7590 case GAUDI_EVENT_MME1_ACC_DERR:
7591 case GAUDI_EVENT_MME1_SBAB_DERR:
7592 case GAUDI_EVENT_MME2_ACC_DERR:
7593 case GAUDI_EVENT_MME2_SBAB_DERR:
7594 case GAUDI_EVENT_MME3_ACC_DERR:
7595 case GAUDI_EVENT_MME3_SBAB_DERR:
7596 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7598 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7599 case GAUDI_EVENT_PSOC_MEM_DERR:
7600 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7601 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7602 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7603 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7604 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7605 case GAUDI_EVENT_MMU_DERR:
7606 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7607 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7608 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7609 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7610 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7613 case GAUDI_EVENT_GIC500:
7614 case GAUDI_EVENT_AXI_ECC:
7615 case GAUDI_EVENT_L2_RAM_ECC:
7616 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7617 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7618 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7619 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7622 case GAUDI_EVENT_HBM0_SPI_0:
7623 case GAUDI_EVENT_HBM1_SPI_0:
7624 case GAUDI_EVENT_HBM2_SPI_0:
7625 case GAUDI_EVENT_HBM3_SPI_0:
7626 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7627 gaudi_hbm_read_interrupts(hdev,
7628 gaudi_hbm_event_to_dev(event_type),
7629 &eq_entry->hbm_ecc_data);
7630 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7631 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7634 case GAUDI_EVENT_HBM0_SPI_1:
7635 case GAUDI_EVENT_HBM1_SPI_1:
7636 case GAUDI_EVENT_HBM2_SPI_1:
7637 case GAUDI_EVENT_HBM3_SPI_1:
7638 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7639 gaudi_hbm_read_interrupts(hdev,
7640 gaudi_hbm_event_to_dev(event_type),
7641 &eq_entry->hbm_ecc_data);
7642 hl_fw_unmask_irq(hdev, event_type);
7643 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7646 case GAUDI_EVENT_TPC0_DEC:
7647 case GAUDI_EVENT_TPC1_DEC:
7648 case GAUDI_EVENT_TPC2_DEC:
7649 case GAUDI_EVENT_TPC3_DEC:
7650 case GAUDI_EVENT_TPC4_DEC:
7651 case GAUDI_EVENT_TPC5_DEC:
7652 case GAUDI_EVENT_TPC6_DEC:
7653 case GAUDI_EVENT_TPC7_DEC:
7654 /* In TPC DEC event, notify on TPC assertion. While there isn't
7655 * a specific event for assertion yet, the FW generates TPC DEC event.
7656 * The SW upper layer will inspect an internal mapped area to indicate
7657 * if the event is a TPC Assertion or a "real" TPC DEC.
7659 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7660 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7661 reset_required = gaudi_tpc_read_interrupts(hdev,
7662 tpc_dec_event_to_tpc_id(event_type),
7663 "AXI_SLV_DEC_Error");
7664 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7665 if (reset_required) {
7666 dev_err(hdev->dev, "reset required due to %s\n",
7667 gaudi_irq_map_table[event_type].name);
7669 reset_direct = true;
7672 hl_fw_unmask_irq(hdev, event_type);
7673 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7677 case GAUDI_EVENT_TPC0_KRN_ERR:
7678 case GAUDI_EVENT_TPC1_KRN_ERR:
7679 case GAUDI_EVENT_TPC2_KRN_ERR:
7680 case GAUDI_EVENT_TPC3_KRN_ERR:
7681 case GAUDI_EVENT_TPC4_KRN_ERR:
7682 case GAUDI_EVENT_TPC5_KRN_ERR:
7683 case GAUDI_EVENT_TPC6_KRN_ERR:
7684 case GAUDI_EVENT_TPC7_KRN_ERR:
7685 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7686 reset_required = gaudi_tpc_read_interrupts(hdev,
7687 tpc_krn_event_to_tpc_id(event_type),
7689 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7690 if (reset_required) {
7691 dev_err(hdev->dev, "reset required due to %s\n",
7692 gaudi_irq_map_table[event_type].name);
7694 reset_direct = true;
7697 hl_fw_unmask_irq(hdev, event_type);
7698 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7702 case GAUDI_EVENT_PCIE_CORE_SERR:
7703 case GAUDI_EVENT_PCIE_IF_SERR:
7704 case GAUDI_EVENT_PCIE_PHY_SERR:
7705 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7706 case GAUDI_EVENT_MME0_ACC_SERR:
7707 case GAUDI_EVENT_MME0_SBAB_SERR:
7708 case GAUDI_EVENT_MME1_ACC_SERR:
7709 case GAUDI_EVENT_MME1_SBAB_SERR:
7710 case GAUDI_EVENT_MME2_ACC_SERR:
7711 case GAUDI_EVENT_MME2_SBAB_SERR:
7712 case GAUDI_EVENT_MME3_ACC_SERR:
7713 case GAUDI_EVENT_MME3_SBAB_SERR:
7714 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7715 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7716 case GAUDI_EVENT_PSOC_MEM_SERR:
7717 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7718 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7719 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7720 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7721 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7723 case GAUDI_EVENT_MMU_SERR:
7724 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7725 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7726 hl_fw_unmask_irq(hdev, event_type);
7727 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7730 case GAUDI_EVENT_PCIE_DEC:
7731 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7732 case GAUDI_EVENT_PSOC_AXI_DEC:
7733 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7734 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7735 hl_fw_unmask_irq(hdev, event_type);
7736 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7739 case GAUDI_EVENT_MMU_PAGE_FAULT:
7740 case GAUDI_EVENT_MMU_WR_PERM:
7741 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7742 hl_fw_unmask_irq(hdev, event_type);
7743 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7746 case GAUDI_EVENT_MME0_WBC_RSP:
7747 case GAUDI_EVENT_MME0_SBAB0_RSP:
7748 case GAUDI_EVENT_MME1_WBC_RSP:
7749 case GAUDI_EVENT_MME1_SBAB0_RSP:
7750 case GAUDI_EVENT_MME2_WBC_RSP:
7751 case GAUDI_EVENT_MME2_SBAB0_RSP:
7752 case GAUDI_EVENT_MME3_WBC_RSP:
7753 case GAUDI_EVENT_MME3_SBAB0_RSP:
7754 case GAUDI_EVENT_RAZWI_OR_ADC:
7755 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7756 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7758 case GAUDI_EVENT_NIC0_QM0:
7759 case GAUDI_EVENT_NIC0_QM1:
7760 case GAUDI_EVENT_NIC1_QM0:
7761 case GAUDI_EVENT_NIC1_QM1:
7762 case GAUDI_EVENT_NIC2_QM0:
7763 case GAUDI_EVENT_NIC2_QM1:
7764 case GAUDI_EVENT_NIC3_QM0:
7765 case GAUDI_EVENT_NIC3_QM1:
7766 case GAUDI_EVENT_NIC4_QM0:
7767 case GAUDI_EVENT_NIC4_QM1:
7768 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7769 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7770 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7771 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7772 hl_fw_unmask_irq(hdev, event_type);
7773 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7776 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7777 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7778 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7781 case GAUDI_EVENT_TPC0_BMON_SPMU:
7782 case GAUDI_EVENT_TPC1_BMON_SPMU:
7783 case GAUDI_EVENT_TPC2_BMON_SPMU:
7784 case GAUDI_EVENT_TPC3_BMON_SPMU:
7785 case GAUDI_EVENT_TPC4_BMON_SPMU:
7786 case GAUDI_EVENT_TPC5_BMON_SPMU:
7787 case GAUDI_EVENT_TPC6_BMON_SPMU:
7788 case GAUDI_EVENT_TPC7_BMON_SPMU:
7789 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7790 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7791 hl_fw_unmask_irq(hdev, event_type);
7792 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7795 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7796 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7797 hl_fw_unmask_irq(hdev, event_type);
7798 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7801 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7802 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7803 gaudi_print_sm_sei_info(hdev, event_type,
7804 &eq_entry->sm_sei_data);
7805 rc = hl_state_dump(hdev);
7806 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7809 "Error during system state dump %d\n", rc);
7810 hl_fw_unmask_irq(hdev, event_type);
7813 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7816 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7817 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7818 hl_fw_unmask_irq(hdev, event_type);
7821 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7822 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7824 "Received high temp H/W interrupt %d (cause %d)\n",
7826 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7829 case GAUDI_EVENT_DEV_RESET_REQ:
7830 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7831 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7834 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7835 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7836 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7837 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7840 case GAUDI_EVENT_FW_ALIVE_S:
7841 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7842 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7843 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7844 fw_err_info.event_id = event_type;
7845 fw_err_info.event_mask = &event_mask;
7846 hl_handle_fw_err(hdev, &fw_err_info);
7850 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7856 hl_notifier_event_send_all(hdev, event_mask);
7861 reset_required = true;
7863 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7864 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7866 /* notify on device unavailable while the reset triggered by fw */
7867 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7868 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7869 } else if (hdev->hard_reset_on_fw_events) {
7870 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7871 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7873 reset_required = false;
7876 if (reset_required) {
7877 /* escalate general hw errors to critical/fatal error */
7878 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7879 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7881 hl_device_cond_reset(hdev, flags, event_mask);
7883 hl_fw_unmask_irq(hdev, event_type);
7884 /* Notification on occurred event needs to be sent although reset is not executed */
7886 hl_notifier_event_send_all(hdev, event_mask);
7890 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7892 struct gaudi_device *gaudi = hdev->asic_specific;
7895 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7896 return gaudi->events_stat_aggregate;
7899 *size = (u32) sizeof(gaudi->events_stat);
7900 return gaudi->events_stat;
7903 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7905 struct gaudi_device *gaudi = hdev->asic_specific;
7906 u32 status, timeout_usec;
7909 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7910 hdev->reset_info.hard_reset_pending)
7914 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7916 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7918 /* L0 & L1 invalidation */
7919 WREG32(mmSTLB_INV_PS, 3);
7920 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7921 WREG32(mmSTLB_INV_PS, 2);
7923 rc = hl_poll_timeout(
7931 WREG32(mmSTLB_INV_SET, 0);
7936 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7937 bool is_hard, u32 flags,
7938 u32 asid, u64 va, u64 size)
7940 /* Treat as invalidate all because there is no range invalidation
7943 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7946 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7948 u32 status, timeout_usec;
7952 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7954 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7956 WREG32(MMU_ASID, asid);
7957 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7958 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7959 WREG32(MMU_BUSY, 0x80000000);
7961 rc = hl_poll_timeout(
7965 !(status & 0x80000000),
7971 "Timeout during MMU hop0 config of asid %d\n", asid);
7978 static int gaudi_send_heartbeat(struct hl_device *hdev)
7980 struct gaudi_device *gaudi = hdev->asic_specific;
7982 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7985 return hl_fw_send_heartbeat(hdev);
7988 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7990 struct gaudi_device *gaudi = hdev->asic_specific;
7991 struct asic_fixed_properties *prop = &hdev->asic_prop;
7994 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7997 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7998 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8003 if (!strlen(prop->cpucp_info.card_name))
8004 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8007 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8009 set_default_power_values(hdev);
8014 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8015 struct engines_data *e)
8017 struct gaudi_device *gaudi = hdev->asic_specific;
8018 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8019 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8020 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8021 unsigned long *mask = (unsigned long *)mask_arr;
8022 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8023 bool is_idle = true, is_eng_idle, is_slave;
8025 int i, dma_id, port;
8028 hl_engine_data_sprintf(e,
8029 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8030 "--- ------- ------------ ---------- -------------\n");
8032 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8033 dma_id = gaudi_dma_assignment[i];
8034 offset = dma_id * DMA_QMAN_OFFSET;
8036 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8037 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8038 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8039 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8040 IS_DMA_IDLE(dma_core_sts0);
8041 is_idle &= is_eng_idle;
8043 if (mask && !is_eng_idle)
8044 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8046 hl_engine_data_sprintf(e, fmt, dma_id,
8047 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8048 qm_cgm_sts, dma_core_sts0);
8052 hl_engine_data_sprintf(e,
8053 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8054 "--- ------- ------------ ---------- ----------\n");
8056 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8057 offset = i * TPC_QMAN_OFFSET;
8058 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8059 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8060 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8061 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8062 IS_TPC_IDLE(tpc_cfg_sts);
8063 is_idle &= is_eng_idle;
8065 if (mask && !is_eng_idle)
8066 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8068 hl_engine_data_sprintf(e, fmt, i,
8069 is_eng_idle ? "Y" : "N",
8070 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8074 hl_engine_data_sprintf(e,
8075 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8076 "--- ------- ------------ ---------- -----------\n");
8078 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8079 offset = i * MME_QMAN_OFFSET;
8080 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8081 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8083 /* MME 1 & 3 are slaves, no need to check their QMANs */
8086 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8087 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8088 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8091 is_idle &= is_eng_idle;
8093 if (mask && !is_eng_idle)
8094 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8097 hl_engine_data_sprintf(e, fmt, i,
8098 is_eng_idle ? "Y" : "N",
8099 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8101 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8102 is_eng_idle ? "Y" : "N", "-",
8108 hl_engine_data_sprintf(e,
8109 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8110 "--- ------- ------------ ----------\n");
8112 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8113 offset = i * NIC_MACRO_QMAN_OFFSET;
8115 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8116 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8117 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8118 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8119 is_idle &= is_eng_idle;
8121 if (mask && !is_eng_idle)
8122 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8124 hl_engine_data_sprintf(e, nic_fmt, port,
8125 is_eng_idle ? "Y" : "N",
8126 qm_glbl_sts0, qm_cgm_sts);
8130 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8131 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8132 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8133 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8134 is_idle &= is_eng_idle;
8136 if (mask && !is_eng_idle)
8137 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8139 hl_engine_data_sprintf(e, nic_fmt, port,
8140 is_eng_idle ? "Y" : "N",
8141 qm_glbl_sts0, qm_cgm_sts);
8146 hl_engine_data_sprintf(e, "\n");
8151 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8152 __acquires(&gaudi->hw_queues_lock)
8154 struct gaudi_device *gaudi = hdev->asic_specific;
8156 spin_lock(&gaudi->hw_queues_lock);
8159 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8160 __releases(&gaudi->hw_queues_lock)
8162 struct gaudi_device *gaudi = hdev->asic_specific;
8164 spin_unlock(&gaudi->hw_queues_lock);
8167 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8169 return hdev->pdev->device;
8172 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8175 struct gaudi_device *gaudi = hdev->asic_specific;
8177 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8180 return hl_fw_get_eeprom_data(hdev, data, max_size);
8183 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8185 struct gaudi_device *gaudi = hdev->asic_specific;
8187 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8190 return hl_fw_get_monitor_dump(hdev, data);
8194 * this function should be used only during initialization and/or after reset,
8195 * when there are no active users.
8197 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8203 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8206 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8208 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8210 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8211 lower_32_bits(tpc_kernel));
8212 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8213 upper_32_bits(tpc_kernel));
8215 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8216 lower_32_bits(tpc_kernel));
8217 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8218 upper_32_bits(tpc_kernel));
8219 /* set a valid LUT pointer, content is of no significance */
8220 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8221 lower_32_bits(tpc_kernel));
8222 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8223 upper_32_bits(tpc_kernel));
8225 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8226 lower_32_bits(CFG_BASE +
8227 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8229 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8230 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8231 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8232 /* wait a bit for the engine to start executing */
8233 usleep_range(1000, 1500);
8235 /* wait until engine has finished executing */
8236 rc = hl_poll_timeout(
8238 mmTPC0_CFG_STATUS + offset,
8240 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8241 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8247 "Timeout while waiting for TPC%d icache prefetch\n",
8252 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8253 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8255 /* wait a bit for the engine to start executing */
8256 usleep_range(1000, 1500);
8258 /* wait until engine has finished executing */
8259 rc = hl_poll_timeout(
8261 mmTPC0_CFG_STATUS + offset,
8263 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8264 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8270 "Timeout while waiting for TPC%d vector pipe\n",
8275 rc = hl_poll_timeout(
8277 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8285 "Timeout while waiting for TPC%d kernel to execute\n",
8293 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8296 struct gaudi_device *gaudi = hdev->asic_specific;
8297 int min_alloc_order, rc, collective_cb_size;
8299 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8302 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8303 HOST_SPACE_INTERNAL_CB_SZ,
8304 &hdev->internal_cb_pool_dma_addr,
8305 GFP_KERNEL | __GFP_ZERO);
8307 if (!hdev->internal_cb_pool_virt_addr)
8310 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8311 sizeof(struct packet_fence);
8312 min_alloc_order = ilog2(collective_cb_size);
8314 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8315 if (!hdev->internal_cb_pool) {
8317 "Failed to create internal CB pool\n");
8319 goto free_internal_cb_pool;
8322 rc = gen_pool_add(hdev->internal_cb_pool,
8323 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8324 HOST_SPACE_INTERNAL_CB_SZ, -1);
8327 "Failed to add memory to internal CB pool\n");
8329 goto destroy_internal_cb_pool;
8332 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8333 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8334 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8336 if (!hdev->internal_cb_va_base) {
8338 goto destroy_internal_cb_pool;
8341 mutex_lock(&hdev->mmu_lock);
8343 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8344 hdev->internal_cb_pool_dma_addr,
8345 HOST_SPACE_INTERNAL_CB_SZ);
8347 goto unreserve_internal_cb_pool;
8349 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8351 goto unmap_internal_cb_pool;
8353 mutex_unlock(&hdev->mmu_lock);
8357 unmap_internal_cb_pool:
8358 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8359 HOST_SPACE_INTERNAL_CB_SZ);
8360 unreserve_internal_cb_pool:
8361 mutex_unlock(&hdev->mmu_lock);
8362 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8363 HOST_SPACE_INTERNAL_CB_SZ);
8364 destroy_internal_cb_pool:
8365 gen_pool_destroy(hdev->internal_cb_pool);
8366 free_internal_cb_pool:
8367 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8368 hdev->internal_cb_pool_dma_addr);
8373 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8376 struct gaudi_device *gaudi = hdev->asic_specific;
8378 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8381 mutex_lock(&hdev->mmu_lock);
8382 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8383 HOST_SPACE_INTERNAL_CB_SZ);
8384 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8385 HOST_SPACE_INTERNAL_CB_SZ);
8386 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8387 mutex_unlock(&hdev->mmu_lock);
8389 gen_pool_destroy(hdev->internal_cb_pool);
8391 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8392 hdev->internal_cb_pool_dma_addr);
8395 static int gaudi_ctx_init(struct hl_ctx *ctx)
8399 if (ctx->asid == HL_KERNEL_ASID_ID)
8402 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8406 rc = gaudi_restore_user_registers(ctx->hdev);
8408 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8413 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8415 if (ctx->asid == HL_KERNEL_ASID_ID)
8418 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8421 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8426 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8428 return gaudi_cq_assignment[cq_idx];
8431 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8433 return sizeof(struct packet_msg_short) +
8434 sizeof(struct packet_msg_prot) * 2;
8437 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8439 return sizeof(struct packet_msg_short) * 4 +
8440 sizeof(struct packet_fence) +
8441 sizeof(struct packet_msg_prot) * 2;
8444 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8446 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8449 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8452 struct hl_cb *cb = (struct hl_cb *) data;
8453 struct packet_msg_short *pkt;
8454 u32 value, ctl, pkt_size = sizeof(*pkt);
8456 pkt = cb->kernel_address + size;
8457 memset(pkt, 0, pkt_size);
8459 /* Inc by 1, Mode ADD */
8460 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8461 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8463 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8464 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8465 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8466 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8467 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8469 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8471 pkt->value = cpu_to_le32(value);
8472 pkt->ctl = cpu_to_le32(ctl);
8474 return size + pkt_size;
8477 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8480 u32 ctl, pkt_size = sizeof(*pkt);
8482 memset(pkt, 0, pkt_size);
8484 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8485 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8486 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8487 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8489 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8491 pkt->value = cpu_to_le32(value);
8492 pkt->ctl = cpu_to_le32(ctl);
8497 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8498 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8499 u16 sob_val, u16 mon_id)
8502 u32 ctl, value, pkt_size = sizeof(*pkt);
8503 u16 msg_addr_offset;
8506 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8508 "sob_base %u (mask %#x) is not valid\n",
8509 sob_base, sob_mask);
8514 * monitor_base should be the content of the base0 address registers,
8515 * so it will be added to the msg short offsets
8517 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8520 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8523 memset(pkt, 0, pkt_size);
8525 /* Monitor config packet: bind the monitor to a sync object */
8526 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8527 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8528 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8529 0); /* GREATER OR EQUAL*/
8530 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8532 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8533 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8534 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8535 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8536 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8538 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8540 pkt->value = cpu_to_le32(value);
8541 pkt->ctl = cpu_to_le32(ctl);
8546 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8548 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8550 memset(pkt, 0, pkt_size);
8552 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8553 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8554 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8556 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8557 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8558 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8559 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8561 pkt->cfg = cpu_to_le32(cfg);
8562 pkt->ctl = cpu_to_le32(ctl);
8567 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8569 u32 offset, nic_index;
8572 case GAUDI_QUEUE_ID_DMA_0_0:
8573 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8575 case GAUDI_QUEUE_ID_DMA_0_1:
8576 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8578 case GAUDI_QUEUE_ID_DMA_0_2:
8579 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8581 case GAUDI_QUEUE_ID_DMA_0_3:
8582 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8584 case GAUDI_QUEUE_ID_DMA_1_0:
8585 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8587 case GAUDI_QUEUE_ID_DMA_1_1:
8588 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8590 case GAUDI_QUEUE_ID_DMA_1_2:
8591 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8593 case GAUDI_QUEUE_ID_DMA_1_3:
8594 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8596 case GAUDI_QUEUE_ID_DMA_5_0:
8597 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8599 case GAUDI_QUEUE_ID_DMA_5_1:
8600 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8602 case GAUDI_QUEUE_ID_DMA_5_2:
8603 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8605 case GAUDI_QUEUE_ID_DMA_5_3:
8606 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8608 case GAUDI_QUEUE_ID_TPC_7_0:
8609 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8611 case GAUDI_QUEUE_ID_TPC_7_1:
8612 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8614 case GAUDI_QUEUE_ID_TPC_7_2:
8615 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8617 case GAUDI_QUEUE_ID_TPC_7_3:
8618 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8620 case GAUDI_QUEUE_ID_NIC_0_0:
8621 case GAUDI_QUEUE_ID_NIC_1_0:
8622 case GAUDI_QUEUE_ID_NIC_2_0:
8623 case GAUDI_QUEUE_ID_NIC_3_0:
8624 case GAUDI_QUEUE_ID_NIC_4_0:
8625 case GAUDI_QUEUE_ID_NIC_5_0:
8626 case GAUDI_QUEUE_ID_NIC_6_0:
8627 case GAUDI_QUEUE_ID_NIC_7_0:
8628 case GAUDI_QUEUE_ID_NIC_8_0:
8629 case GAUDI_QUEUE_ID_NIC_9_0:
8630 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8631 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8632 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8633 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8635 case GAUDI_QUEUE_ID_NIC_0_1:
8636 case GAUDI_QUEUE_ID_NIC_1_1:
8637 case GAUDI_QUEUE_ID_NIC_2_1:
8638 case GAUDI_QUEUE_ID_NIC_3_1:
8639 case GAUDI_QUEUE_ID_NIC_4_1:
8640 case GAUDI_QUEUE_ID_NIC_5_1:
8641 case GAUDI_QUEUE_ID_NIC_6_1:
8642 case GAUDI_QUEUE_ID_NIC_7_1:
8643 case GAUDI_QUEUE_ID_NIC_8_1:
8644 case GAUDI_QUEUE_ID_NIC_9_1:
8645 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8646 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8647 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8648 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8650 case GAUDI_QUEUE_ID_NIC_0_2:
8651 case GAUDI_QUEUE_ID_NIC_1_2:
8652 case GAUDI_QUEUE_ID_NIC_2_2:
8653 case GAUDI_QUEUE_ID_NIC_3_2:
8654 case GAUDI_QUEUE_ID_NIC_4_2:
8655 case GAUDI_QUEUE_ID_NIC_5_2:
8656 case GAUDI_QUEUE_ID_NIC_6_2:
8657 case GAUDI_QUEUE_ID_NIC_7_2:
8658 case GAUDI_QUEUE_ID_NIC_8_2:
8659 case GAUDI_QUEUE_ID_NIC_9_2:
8660 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8661 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8662 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8663 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8665 case GAUDI_QUEUE_ID_NIC_0_3:
8666 case GAUDI_QUEUE_ID_NIC_1_3:
8667 case GAUDI_QUEUE_ID_NIC_2_3:
8668 case GAUDI_QUEUE_ID_NIC_3_3:
8669 case GAUDI_QUEUE_ID_NIC_4_3:
8670 case GAUDI_QUEUE_ID_NIC_5_3:
8671 case GAUDI_QUEUE_ID_NIC_6_3:
8672 case GAUDI_QUEUE_ID_NIC_7_3:
8673 case GAUDI_QUEUE_ID_NIC_8_3:
8674 case GAUDI_QUEUE_ID_NIC_9_3:
8675 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8676 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8677 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8678 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8684 *addr = CFG_BASE + offset;
8689 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8693 u16 msg_addr_offset;
8696 * monitor_base should be the content of the base0 address registers,
8697 * so it will be added to the msg short offsets
8699 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8701 /* First monitor config packet: low address of the sync */
8703 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8706 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8709 /* Second monitor config packet: high address of the sync */
8711 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8714 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8718 * Third monitor config packet: the payload, i.e. what to write when the
8722 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8725 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8730 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8731 struct hl_gen_wait_properties *prop)
8733 struct hl_cb *cb = (struct hl_cb *) prop->data;
8734 void *buf = cb->kernel_address;
8736 u32 size = prop->size;
8738 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8739 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8744 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8745 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8746 prop->sob_mask, prop->sob_val, prop->mon_id);
8747 size += gaudi_add_fence_pkt(buf + size);
8752 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8754 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8756 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8759 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8760 hw_sob->sob_id * 4, 0);
8762 kref_init(&hw_sob->kref);
8765 static u64 gaudi_get_device_time(struct hl_device *hdev)
8767 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8769 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8772 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8773 u32 *block_size, u32 *block_id)
8778 static int gaudi_block_mmap(struct hl_device *hdev,
8779 struct vm_area_struct *vma,
8780 u32 block_id, u32 block_size)
8785 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8787 struct cpu_dyn_regs *dyn_regs =
8788 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8789 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8790 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8791 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8793 WREG32(irq_handler_offset,
8794 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8797 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8802 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8805 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8806 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8807 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8808 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8809 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8810 case HL_GAUDI_MME_PLL: return MME_PLL;
8811 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8812 case HL_GAUDI_IF_PLL: return IF_PLL;
8813 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8814 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8815 default: return -EINVAL;
8819 static int gaudi_add_sync_to_engine_map_entry(
8820 struct hl_sync_to_engine_map *map, u32 reg_value,
8821 enum hl_sync_engine_type engine_type, u32 engine_id)
8823 struct hl_sync_to_engine_map_entry *entry;
8825 /* Reg value represents a partial address of sync object,
8826 * it is used as unique identifier. For this we need to
8827 * clear the cutoff cfg base bits from the value.
8829 if (reg_value == 0 || reg_value == 0xffffffff)
8831 reg_value -= lower_32_bits(CFG_BASE);
8833 /* create a new hash entry */
8834 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8837 entry->engine_type = engine_type;
8838 entry->engine_id = engine_id;
8839 entry->sync_id = reg_value;
8840 hash_add(map->tb, &entry->node, reg_value);
8845 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8846 struct hl_sync_to_engine_map *map)
8848 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8852 /* Iterate over TPC engines */
8853 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8855 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8856 sds->props[SP_NEXT_TPC] * i);
8858 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8861 goto free_sync_to_engine_map;
8864 /* Iterate over MME engines */
8865 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8866 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8868 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8869 sds->props[SP_NEXT_MME] * i +
8872 rc = gaudi_add_sync_to_engine_map_entry(
8873 map, reg_value, ENGINE_MME,
8874 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8876 goto free_sync_to_engine_map;
8880 /* Iterate over DMA engines */
8881 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8882 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8883 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8884 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8887 goto free_sync_to_engine_map;
8892 free_sync_to_engine_map:
8893 hl_state_dump_free_sync_to_engine_map(map);
8898 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8901 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8905 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8907 const size_t max_write = 10;
8911 /* Sync object ID is calculated as follows:
8912 * (8 * group_id + cleared bits in mask)
8914 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8916 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8919 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8920 max_write; mask >>= 1, i++) {
8922 sob = gid * MONITOR_MAX_SOBS + i;
8925 offset += snprintf(sobs + offset, max_write,
8928 offset += snprintf(sobs + offset, max_write, "%u", sob);
8933 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8934 struct hl_device *hdev,
8935 struct hl_mon_state_dump *mon)
8938 char scratch_buf1[BIN_REG_STRING_SIZE],
8939 scratch_buf2[BIN_REG_STRING_SIZE];
8940 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8942 name = hl_state_dump_get_monitor_name(hdev, mon);
8946 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8948 return hl_snprintf_resize(
8950 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8952 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8954 hl_format_as_binary(
8955 scratch_buf1, sizeof(scratch_buf1),
8957 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8959 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8962 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8963 hl_format_as_binary(
8964 scratch_buf2, sizeof(scratch_buf2),
8966 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8972 static int gaudi_print_fences_single_engine(
8973 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8974 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8975 size_t *size, size_t *offset)
8977 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8978 int rc = -ENOMEM, i;
8979 u32 *statuses, *fences;
8981 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8982 sizeof(*statuses), GFP_KERNEL);
8986 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8987 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8988 sizeof(*fences), GFP_KERNEL);
8992 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8993 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8995 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8996 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8997 fences[i] = RREG32(base_offset + i * sizeof(u32));
8999 /* The actual print */
9000 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9002 u64 fence_cnt, fence_rdata;
9003 const char *engine_name;
9005 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9010 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9011 fence_cnt = base_offset + CFG_BASE +
9013 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9014 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9015 sds->props[SP_FENCE0_RDATA_OFFSET];
9016 engine_name = hl_sync_engine_to_string(engine_type);
9018 rc = hl_snprintf_resize(
9020 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9021 engine_name, engine_id,
9023 fence_cnt, engine_name, engine_id, fence_id, i,
9024 fence_rdata, engine_name, engine_id, fence_id, i,
9042 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9043 .monitor_valid = gaudi_monitor_valid,
9044 .print_single_monitor = gaudi_print_single_monitor,
9045 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9046 .print_fences_single_engine = gaudi_print_fences_single_engine,
9049 static void gaudi_state_dump_init(struct hl_device *hdev)
9051 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9054 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9055 hash_add(sds->so_id_to_str_tb,
9056 &gaudi_so_id_to_str[i].node,
9057 gaudi_so_id_to_str[i].id);
9059 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9060 hash_add(sds->monitor_id_to_str_tb,
9061 &gaudi_monitor_id_to_str[i].node,
9062 gaudi_monitor_id_to_str[i].id);
9064 sds->props = gaudi_state_dump_specs_props;
9066 sds->sync_namager_names = gaudi_sync_manager_names;
9068 sds->funcs = gaudi_state_dump_funcs;
9071 static u32 *gaudi_get_stream_master_qid_arr(void)
9073 return gaudi_stream_master;
9076 static int gaudi_set_dram_properties(struct hl_device *hdev)
9081 static int gaudi_set_binning_masks(struct hl_device *hdev)
9086 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9090 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9092 struct hl_device *hdev = dev_get_drvdata(dev);
9093 struct cpucp_info *cpucp_info;
9095 cpucp_info = &hdev->asic_prop.cpucp_info;
9097 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9100 static DEVICE_ATTR_RO(infineon_ver);
9102 static struct attribute *gaudi_vrm_dev_attrs[] = {
9103 &dev_attr_infineon_ver.attr,
9107 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9108 struct attribute_group *dev_vrm_attr_grp)
9110 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9111 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9114 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9119 static const struct hl_asic_funcs gaudi_funcs = {
9120 .early_init = gaudi_early_init,
9121 .early_fini = gaudi_early_fini,
9122 .late_init = gaudi_late_init,
9123 .late_fini = gaudi_late_fini,
9124 .sw_init = gaudi_sw_init,
9125 .sw_fini = gaudi_sw_fini,
9126 .hw_init = gaudi_hw_init,
9127 .hw_fini = gaudi_hw_fini,
9128 .halt_engines = gaudi_halt_engines,
9129 .suspend = gaudi_suspend,
9130 .resume = gaudi_resume,
9132 .ring_doorbell = gaudi_ring_doorbell,
9133 .pqe_write = gaudi_pqe_write,
9134 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9135 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9136 .scrub_device_mem = gaudi_scrub_device_mem,
9137 .scrub_device_dram = gaudi_scrub_device_dram,
9138 .get_int_queue_base = gaudi_get_int_queue_base,
9139 .test_queues = gaudi_test_queues,
9140 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9141 .asic_dma_pool_free = gaudi_dma_pool_free,
9142 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9143 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9144 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9145 .cs_parser = gaudi_cs_parser,
9146 .dma_map_sgtable = hl_asic_dma_map_sgtable,
9147 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9148 .update_eq_ci = gaudi_update_eq_ci,
9149 .context_switch = gaudi_context_switch,
9150 .restore_phase_topology = gaudi_restore_phase_topology,
9151 .debugfs_read_dma = gaudi_debugfs_read_dma,
9152 .add_device_attr = gaudi_add_device_attr,
9153 .handle_eqe = gaudi_handle_eqe,
9154 .get_events_stat = gaudi_get_events_stat,
9155 .read_pte = gaudi_read_pte,
9156 .write_pte = gaudi_write_pte,
9157 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9158 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9159 .mmu_prefetch_cache_range = NULL,
9160 .send_heartbeat = gaudi_send_heartbeat,
9161 .debug_coresight = gaudi_debug_coresight,
9162 .is_device_idle = gaudi_is_device_idle,
9163 .compute_reset_late_init = gaudi_compute_reset_late_init,
9164 .hw_queues_lock = gaudi_hw_queues_lock,
9165 .hw_queues_unlock = gaudi_hw_queues_unlock,
9166 .get_pci_id = gaudi_get_pci_id,
9167 .get_eeprom_data = gaudi_get_eeprom_data,
9168 .get_monitor_dump = gaudi_get_monitor_dump,
9169 .send_cpu_message = gaudi_send_cpu_message,
9170 .pci_bars_map = gaudi_pci_bars_map,
9171 .init_iatu = gaudi_init_iatu,
9174 .halt_coresight = gaudi_halt_coresight,
9175 .ctx_init = gaudi_ctx_init,
9176 .ctx_fini = gaudi_ctx_fini,
9177 .pre_schedule_cs = gaudi_pre_schedule_cs,
9178 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9179 .load_firmware_to_device = gaudi_load_firmware_to_device,
9180 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9181 .get_signal_cb_size = gaudi_get_signal_cb_size,
9182 .get_wait_cb_size = gaudi_get_wait_cb_size,
9183 .gen_signal_cb = gaudi_gen_signal_cb,
9184 .gen_wait_cb = gaudi_gen_wait_cb,
9185 .reset_sob = gaudi_reset_sob,
9186 .reset_sob_group = gaudi_reset_sob_group,
9187 .get_device_time = gaudi_get_device_time,
9188 .pb_print_security_errors = NULL,
9189 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9190 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9191 .get_dec_base_addr = NULL,
9192 .scramble_addr = hl_mmu_scramble_addr,
9193 .descramble_addr = hl_mmu_descramble_addr,
9194 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9195 .get_hw_block_id = gaudi_get_hw_block_id,
9196 .hw_block_mmap = gaudi_block_mmap,
9197 .enable_events_from_fw = gaudi_enable_events_from_fw,
9198 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9199 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9200 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9201 .init_firmware_loader = gaudi_init_firmware_loader,
9202 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9203 .state_dump_init = gaudi_state_dump_init,
9204 .get_sob_addr = gaudi_get_sob_addr,
9205 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9206 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9207 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9208 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9209 .access_dev_mem = hl_access_dev_mem,
9210 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9211 .send_device_activity = gaudi_send_device_activity,
9212 .set_dram_properties = gaudi_set_dram_properties,
9213 .set_binning_masks = gaudi_set_binning_masks,
9217 * gaudi_set_asic_funcs - set GAUDI function pointers
9219 * @hdev: pointer to hl_device structure
9222 void gaudi_set_asic_funcs(struct hl_device *hdev)
9224 hdev->asic_funcs = &gaudi_funcs;