3 * This file is provided under a dual BSD/GPLv2 license. When using or
4 * redistributing this file, you may do so under either license.
8 * Copyright(c) 2015 Intel Corporation.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
21 * Copyright(c) 2015 Intel Corporation.
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
27 * - Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * - Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in
31 * the documentation and/or other materials provided with the
33 * - Neither the name of Intel Corporation nor the names of its
34 * contributors may be used to endorse or promote products derived
35 * from this software without specific prior written permission.
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 * This file contains all of the code that is specific to the HFI chip
55 #include <linux/pci.h>
56 #include <linux/delay.h>
57 #include <linux/interrupt.h>
58 #include <linux/module.h>
67 #define NUM_IB_PORTS 1
70 module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71 MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
73 uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74 module_param(num_vls, uint, S_IRUGO);
75 MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
78 * Default time to aggregate two 10K packets from the idle state
79 * (timer not running). The timer starts at the end of the first packet,
80 * so only the time for one 10K packet and header plus a bit extra is needed.
81 * 10 * 1024 + 64 header byte = 10304 byte
82 * 10304 byte / 12.5 GB/s = 824.32ns
84 uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85 module_param(rcv_intr_timeout, uint, S_IRUGO);
86 MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
88 uint rcv_intr_count = 16; /* same as qib */
89 module_param(rcv_intr_count, uint, S_IRUGO);
90 MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
92 ushort link_crc_mask = SUPPORTED_CRCS;
93 module_param(link_crc_mask, ushort, S_IRUGO);
94 MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
97 module_param_named(loopback, loopback, uint, S_IRUGO);
98 MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
100 /* Other driver tunables */
101 uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102 static ushort crc_14b_sideband = 1;
103 static uint use_flr = 1;
104 uint quick_linkup; /* skip LNI */
107 u64 flag; /* the flag */
108 char *str; /* description string */
109 u16 extra; /* extra information */
114 /* str must be a string constant */
115 #define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116 #define FLAG_ENTRY0(str, flag) {flag, str, 0}
118 /* Send Error Consequences */
119 #define SEC_WRITE_DROPPED 0x1
120 #define SEC_PACKET_DROPPED 0x2
121 #define SEC_SC_HALTED 0x4 /* per-context only */
122 #define SEC_SPC_FREEZE 0x8 /* per-HFI only */
125 #define MIN_KERNEL_KCTXTS 2
126 #define NUM_MAP_REGS 32
128 /* Bit offset into the GUID which carries HFI id information */
129 #define GUID_HFI_INDEX_SHIFT 39
131 /* extract the emulation revision */
132 #define emulator_rev(dd) ((dd)->irev >> 8)
133 /* parallel and serial emulation versions are 3 and 4 respectively */
134 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
140 #define IB_PACKET_TYPE 2ull
141 #define QW_SHIFT 6ull
143 #define QPN_WIDTH 7ull
145 /* LRH.BTH: QW 0, OFFSET 48 - for match */
146 #define LRH_BTH_QW 0ull
147 #define LRH_BTH_BIT_OFFSET 48ull
148 #define LRH_BTH_OFFSET(off) ((LRH_BTH_QW << QW_SHIFT) | (off))
149 #define LRH_BTH_MATCH_OFFSET LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150 #define LRH_BTH_SELECT
151 #define LRH_BTH_MASK 3ull
152 #define LRH_BTH_VALUE 2ull
154 /* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155 #define LRH_SC_QW 0ull
156 #define LRH_SC_BIT_OFFSET 56ull
157 #define LRH_SC_OFFSET(off) ((LRH_SC_QW << QW_SHIFT) | (off))
158 #define LRH_SC_MATCH_OFFSET LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159 #define LRH_SC_MASK 128ull
160 #define LRH_SC_VALUE 0ull
162 /* SC[n..0] QW 0, OFFSET 60 - for select */
163 #define LRH_SC_SELECT_OFFSET ((LRH_SC_QW << QW_SHIFT) | (60ull))
165 /* QPN[m+n:1] QW 1, OFFSET 1 */
166 #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
168 /* defines to build power on SC2VL table */
180 ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181 ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182 ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183 ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184 ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185 ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186 ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187 ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT) \
190 #define DC_SC_VL_VAL( \
209 ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210 ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211 ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212 ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213 ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214 ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215 ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216 ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217 ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218 ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219 ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220 ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221 ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222 ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223 ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224 ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
227 /* all CceStatus sub-block freeze bits */
228 #define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229 | CCE_STATUS_RXE_FROZE_SMASK \
230 | CCE_STATUS_TXE_FROZE_SMASK \
231 | CCE_STATUS_TXE_PIO_FROZE_SMASK)
232 /* all CceStatus sub-block TXE pause bits */
233 #define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234 | CCE_STATUS_TXE_PAUSED_SMASK \
235 | CCE_STATUS_SDMA_PAUSED_SMASK)
236 /* all CceStatus sub-block RXE pause bits */
237 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
242 static struct flag_table cce_err_status_flags[] = {
243 /* 0*/ FLAG_ENTRY0("CceCsrParityErr",
244 CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245 /* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr",
246 CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247 /* 2*/ FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248 CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249 /* 3*/ FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250 CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251 /* 4*/ FLAG_ENTRY0("CceTrgtAccessErr",
252 CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253 /* 5*/ FLAG_ENTRY0("CceRspdDataParityErr",
254 CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255 /* 6*/ FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256 CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257 /* 7*/ FLAG_ENTRY0("CceCsrCfgBusParityErr",
258 CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259 /* 8*/ FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260 CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261 /* 9*/ FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263 /*10*/ FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265 /*11*/ FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267 /*12*/ FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269 /*13*/ FLAG_ENTRY0("PcicRetryMemCorErr",
270 CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271 /*14*/ FLAG_ENTRY0("PcicRetryMemCorErr",
272 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273 /*15*/ FLAG_ENTRY0("PcicPostHdQCorErr",
274 CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275 /*16*/ FLAG_ENTRY0("PcicPostHdQCorErr",
276 CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277 /*17*/ FLAG_ENTRY0("PcicPostHdQCorErr",
278 CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279 /*18*/ FLAG_ENTRY0("PcicCplDatQCorErr",
280 CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281 /*19*/ FLAG_ENTRY0("PcicNPostHQParityErr",
282 CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283 /*20*/ FLAG_ENTRY0("PcicNPostDatQParityErr",
284 CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285 /*21*/ FLAG_ENTRY0("PcicRetryMemUncErr",
286 CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287 /*22*/ FLAG_ENTRY0("PcicRetrySotMemUncErr",
288 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289 /*23*/ FLAG_ENTRY0("PcicPostHdQUncErr",
290 CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291 /*24*/ FLAG_ENTRY0("PcicPostDatQUncErr",
292 CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293 /*25*/ FLAG_ENTRY0("PcicCplHdQUncErr",
294 CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295 /*26*/ FLAG_ENTRY0("PcicCplDatQUncErr",
296 CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297 /*27*/ FLAG_ENTRY0("PcicTransmitFrontParityErr",
298 CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299 /*28*/ FLAG_ENTRY0("PcicTransmitBackParityErr",
300 CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301 /*29*/ FLAG_ENTRY0("PcicReceiveParityErr",
302 CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303 /*30*/ FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304 CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305 /*31*/ FLAG_ENTRY0("LATriggered",
306 CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307 /*32*/ FLAG_ENTRY0("CceSegReadBadAddrErr",
308 CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309 /*33*/ FLAG_ENTRY0("CceSegWriteBadAddrErr",
310 CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311 /*34*/ FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312 CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313 /*35*/ FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314 CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315 /*36*/ FLAG_ENTRY0("CceMsixTableCorErr",
316 CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317 /*37*/ FLAG_ENTRY0("CceMsixTableUncErr",
318 CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319 /*38*/ FLAG_ENTRY0("CceIntMapCorErr",
320 CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321 /*39*/ FLAG_ENTRY0("CceIntMapUncErr",
322 CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323 /*40*/ FLAG_ENTRY0("CceMsixCsrParityErr",
324 CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
331 #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332 static struct flag_table misc_err_status_flags[] = {
333 /* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334 /* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335 /* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336 /* 3*/ FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337 /* 4*/ FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338 /* 5*/ FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339 /* 6*/ FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340 /* 7*/ FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341 /* 8*/ FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342 /* 9*/ FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343 /*10*/ FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344 /*11*/ FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345 /*12*/ FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
349 * TXE PIO Error flags and consequences
351 static struct flag_table pio_err_status_flags[] = {
352 /* 0*/ FLAG_ENTRY("PioWriteBadCtxt",
354 SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355 /* 1*/ FLAG_ENTRY("PioWriteAddrParity",
357 SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358 /* 2*/ FLAG_ENTRY("PioCsrParity",
360 SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361 /* 3*/ FLAG_ENTRY("PioSbMemFifo0",
363 SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364 /* 4*/ FLAG_ENTRY("PioSbMemFifo1",
366 SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367 /* 5*/ FLAG_ENTRY("PioPccFifoParity",
369 SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370 /* 6*/ FLAG_ENTRY("PioPecFifoParity",
372 SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373 /* 7*/ FLAG_ENTRY("PioSbrdctlCrrelParity",
375 SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376 /* 8*/ FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
378 SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379 /* 9*/ FLAG_ENTRY("PioPktEvictFifoParityErr",
381 SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382 /*10*/ FLAG_ENTRY("PioSmPktResetParity",
384 SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385 /*11*/ FLAG_ENTRY("PioVlLenMemBank0Unc",
387 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388 /*12*/ FLAG_ENTRY("PioVlLenMemBank1Unc",
390 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391 /*13*/ FLAG_ENTRY("PioVlLenMemBank0Cor",
393 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394 /*14*/ FLAG_ENTRY("PioVlLenMemBank1Cor",
396 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397 /*15*/ FLAG_ENTRY("PioCreditRetFifoParity",
399 SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400 /*16*/ FLAG_ENTRY("PioPpmcPblFifo",
402 SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403 /*17*/ FLAG_ENTRY("PioInitSmIn",
405 SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406 /*18*/ FLAG_ENTRY("PioPktEvictSmOrArbSm",
408 SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409 /*19*/ FLAG_ENTRY("PioHostAddrMemUnc",
411 SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412 /*20*/ FLAG_ENTRY("PioHostAddrMemCor",
414 SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415 /*21*/ FLAG_ENTRY("PioWriteDataParity",
417 SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418 /*22*/ FLAG_ENTRY("PioStateMachine",
420 SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421 /*23*/ FLAG_ENTRY("PioWriteQwValidParity",
422 SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423 SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424 /*24*/ FLAG_ENTRY("PioBlockQwCountParity",
425 SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426 SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427 /*25*/ FLAG_ENTRY("PioVlfVlLenParity",
429 SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430 /*26*/ FLAG_ENTRY("PioVlfSopParity",
432 SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433 /*27*/ FLAG_ENTRY("PioVlFifoParity",
435 SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436 /*28*/ FLAG_ENTRY("PioPpmcBqcMemParity",
438 SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439 /*29*/ FLAG_ENTRY("PioPpmcSopLen",
441 SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
443 /*32*/ FLAG_ENTRY("PioCurrentFreeCntParity",
445 SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446 /*33*/ FLAG_ENTRY("PioLastReturnedCntParity",
448 SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449 /*34*/ FLAG_ENTRY("PioPccSopHeadParity",
451 SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452 /*35*/ FLAG_ENTRY("PioPecSopHeadParityErr",
454 SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
458 /* TXE PIO errors that cause an SPC freeze */
459 #define ALL_PIO_FREEZE_ERR \
460 (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461 | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462 | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463 | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464 | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465 | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466 | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467 | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468 | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469 | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470 | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471 | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472 | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473 | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474 | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475 | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476 | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477 | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478 | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479 | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480 | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481 | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482 | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483 | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484 | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485 | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486 | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487 | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488 | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
491 * TXE SDMA Error flags
493 static struct flag_table sdma_err_status_flags[] = {
494 /* 0*/ FLAG_ENTRY0("SDmaRpyTagErr",
495 SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496 /* 1*/ FLAG_ENTRY0("SDmaCsrParityErr",
497 SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498 /* 2*/ FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500 /* 3*/ FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
505 /* TXE SDMA errors that cause an SPC freeze */
506 #define ALL_SDMA_FREEZE_ERR \
507 (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508 | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509 | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
512 * TXE Egress Error flags
514 #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515 static struct flag_table egress_err_status_flags[] = {
516 /* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517 /* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
519 /* 3*/ FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520 SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521 /* 4*/ FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522 /* 5*/ FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
524 /* 7*/ FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525 SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526 /* 8*/ FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527 SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
529 /*11*/ FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530 SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531 /*12*/ FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532 /*13*/ FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533 /*14*/ FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534 /*15*/ FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535 /*16*/ FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536 SEES(TX_SDMA0_DISALLOWED_PACKET)),
537 /*17*/ FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538 SEES(TX_SDMA1_DISALLOWED_PACKET)),
539 /*18*/ FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540 SEES(TX_SDMA2_DISALLOWED_PACKET)),
541 /*19*/ FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542 SEES(TX_SDMA3_DISALLOWED_PACKET)),
543 /*20*/ FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544 SEES(TX_SDMA4_DISALLOWED_PACKET)),
545 /*21*/ FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546 SEES(TX_SDMA5_DISALLOWED_PACKET)),
547 /*22*/ FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548 SEES(TX_SDMA6_DISALLOWED_PACKET)),
549 /*23*/ FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550 SEES(TX_SDMA7_DISALLOWED_PACKET)),
551 /*24*/ FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552 SEES(TX_SDMA8_DISALLOWED_PACKET)),
553 /*25*/ FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554 SEES(TX_SDMA9_DISALLOWED_PACKET)),
555 /*26*/ FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556 SEES(TX_SDMA10_DISALLOWED_PACKET)),
557 /*27*/ FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558 SEES(TX_SDMA11_DISALLOWED_PACKET)),
559 /*28*/ FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560 SEES(TX_SDMA12_DISALLOWED_PACKET)),
561 /*29*/ FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562 SEES(TX_SDMA13_DISALLOWED_PACKET)),
563 /*30*/ FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564 SEES(TX_SDMA14_DISALLOWED_PACKET)),
565 /*31*/ FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566 SEES(TX_SDMA15_DISALLOWED_PACKET)),
567 /*32*/ FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568 SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569 /*33*/ FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570 SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571 /*34*/ FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572 SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573 /*35*/ FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574 SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575 /*36*/ FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576 SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577 /*37*/ FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578 SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579 /*38*/ FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580 SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581 /*39*/ FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582 SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583 /*40*/ FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584 SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585 /*41*/ FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586 /*42*/ FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587 /*43*/ FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588 /*44*/ FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589 /*45*/ FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590 /*46*/ FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591 /*47*/ FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592 /*48*/ FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593 /*49*/ FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594 /*50*/ FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595 /*51*/ FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596 /*52*/ FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597 /*53*/ FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598 /*54*/ FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599 /*55*/ FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600 /*56*/ FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601 /*57*/ FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602 /*58*/ FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603 /*59*/ FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604 /*60*/ FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605 /*61*/ FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606 /*62*/ FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607 SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608 /*63*/ FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609 SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
613 * TXE Egress Error Info flags
615 #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616 static struct flag_table egress_err_info_flags[] = {
617 /* 0*/ FLAG_ENTRY0("Reserved", 0ull),
618 /* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)),
619 /* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620 /* 3*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621 /* 4*/ FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622 /* 5*/ FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623 /* 6*/ FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624 /* 7*/ FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625 /* 8*/ FLAG_ENTRY0("RawErr", SEEI(RAW)),
626 /* 9*/ FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627 /*10*/ FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628 /*11*/ FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629 /*12*/ FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630 /*13*/ FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631 /*14*/ FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632 /*15*/ FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633 /*16*/ FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634 /*17*/ FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635 /*18*/ FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636 /*19*/ FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637 /*20*/ FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638 /*21*/ FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
641 /* TXE Egress errors that cause an SPC freeze */
642 #define ALL_TXE_EGRESS_FREEZE_ERR \
643 (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644 | SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645 | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646 | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647 | SEES(TX_LAUNCH_CSR_PARITY) \
648 | SEES(TX_SBRD_CTL_CSR_PARITY) \
649 | SEES(TX_CONFIG_PARITY) \
650 | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651 | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652 | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653 | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654 | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655 | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656 | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657 | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658 | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659 | SEES(TX_CREDIT_RETURN_PARITY))
662 * TXE Send error flags
664 #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665 static struct flag_table send_err_status_flags[] = {
666 /* 0*/ FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667 /* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668 /* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
672 * TXE Send Context Error flags and consequences
674 static struct flag_table sc_err_status_flags[] = {
675 /* 0*/ FLAG_ENTRY("InconsistentSop",
676 SEC_PACKET_DROPPED | SEC_SC_HALTED,
677 SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678 /* 1*/ FLAG_ENTRY("DisallowedPacket",
679 SEC_PACKET_DROPPED | SEC_SC_HALTED,
680 SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681 /* 2*/ FLAG_ENTRY("WriteCrossesBoundary",
682 SEC_WRITE_DROPPED | SEC_SC_HALTED,
683 SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684 /* 3*/ FLAG_ENTRY("WriteOverflow",
685 SEC_WRITE_DROPPED | SEC_SC_HALTED,
686 SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687 /* 4*/ FLAG_ENTRY("WriteOutOfBounds",
688 SEC_WRITE_DROPPED | SEC_SC_HALTED,
689 SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
694 * RXE Receive Error flags
696 #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697 static struct flag_table rxe_err_status_flags[] = {
698 /* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699 /* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700 /* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701 /* 3*/ FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702 /* 4*/ FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703 /* 5*/ FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704 /* 6*/ FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705 /* 7*/ FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706 /* 8*/ FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707 /* 9*/ FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708 /*10*/ FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709 /*11*/ FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710 /*12*/ FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711 /*13*/ FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712 /*14*/ FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713 /*15*/ FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714 /*16*/ FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715 RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716 /*17*/ FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717 /*18*/ FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718 /*19*/ FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719 RXES(RBUF_BLOCK_LIST_READ_UNC)),
720 /*20*/ FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721 RXES(RBUF_BLOCK_LIST_READ_COR)),
722 /*21*/ FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723 RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724 /*22*/ FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725 RXES(RBUF_CSR_QENT_CNT_PARITY)),
726 /*23*/ FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727 RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728 /*24*/ FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729 RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730 /*25*/ FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731 /*26*/ FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732 /*27*/ FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733 RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734 /*28*/ FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735 /*29*/ FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736 /*30*/ FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737 /*31*/ FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738 /*32*/ FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739 /*33*/ FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740 /*34*/ FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741 /*35*/ FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742 RXES(RBUF_FL_INITDONE_PARITY)),
743 /*36*/ FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744 RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745 /*37*/ FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746 /*38*/ FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747 /*39*/ FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748 /*40*/ FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749 RXES(LOOKUP_DES_PART1_UNC_COR)),
750 /*41*/ FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751 RXES(LOOKUP_DES_PART2_PARITY)),
752 /*42*/ FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753 /*43*/ FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754 /*44*/ FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755 /*45*/ FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756 /*46*/ FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757 /*47*/ FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758 /*48*/ FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759 /*49*/ FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760 /*50*/ FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761 /*51*/ FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762 /*52*/ FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763 /*53*/ FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764 /*54*/ FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765 /*55*/ FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766 /*56*/ FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767 /*57*/ FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768 /*58*/ FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769 /*59*/ FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770 /*60*/ FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771 /*61*/ FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772 /*62*/ FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773 /*63*/ FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
776 /* RXE errors that will trigger an SPC freeze */
777 #define ALL_RXE_FREEZE_ERR \
778 (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779 | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780 | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781 | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782 | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783 | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784 | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785 | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786 | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787 | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788 | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789 | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790 | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791 | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792 | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793 | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794 | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795 | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796 | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797 | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798 | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799 | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800 | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801 | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802 | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803 | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804 | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805 | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806 | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807 | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808 | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809 | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810 | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811 | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812 | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813 | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814 | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815 | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816 | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817 | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818 | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819 | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820 | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821 | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
823 #define RXE_FREEZE_ABORT_MASK \
824 (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825 RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826 RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
831 #define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832 static struct flag_table dcc_err_flags[] = {
833 FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834 FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835 FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836 FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837 FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838 FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839 FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840 FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841 FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842 FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843 FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844 FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845 FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846 FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847 FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848 FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849 FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850 FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851 FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852 FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853 FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854 FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855 FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856 FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857 FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858 FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859 FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860 FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861 FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862 FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863 FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864 FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865 FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866 FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867 FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868 FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869 FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870 FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871 FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872 FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873 FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874 FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875 FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876 FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877 FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878 FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
884 #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885 static struct flag_table lcb_err_flags[] = {
886 /* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887 /* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888 /* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889 /* 3*/ FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890 LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891 /* 4*/ FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892 /* 5*/ FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893 /* 6*/ FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894 /* 7*/ FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895 /* 8*/ FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896 /* 9*/ FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897 /*10*/ FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898 /*11*/ FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899 /*12*/ FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900 /*13*/ FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901 LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902 /*14*/ FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903 /*15*/ FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904 /*16*/ FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905 /*17*/ FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906 /*18*/ FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907 /*19*/ FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908 LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909 /*20*/ FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910 /*21*/ FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911 /*22*/ FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912 /*23*/ FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913 /*24*/ FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914 /*25*/ FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915 /*26*/ FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916 LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917 /*27*/ FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918 /*28*/ FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919 LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920 /*29*/ FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921 LCBE(REDUNDANT_FLIT_PARITY_ERR))
927 #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928 static struct flag_table dc8051_err_flags[] = {
929 FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930 FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931 FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932 FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933 FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934 FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935 FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936 FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937 FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938 D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939 FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
943 * DC8051 Information Error flags
945 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
947 static struct flag_table dc8051_info_err_flags[] = {
948 FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED),
949 FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME),
950 FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET),
951 FLAG_ENTRY0("Serdes internal loopback failure",
952 FAILED_SERDES_INTERNAL_LOOPBACK),
953 FLAG_ENTRY0("Failed SerDes init", FAILED_SERDES_INIT),
954 FLAG_ENTRY0("Failed LNI(Polling)", FAILED_LNI_POLLING),
955 FLAG_ENTRY0("Failed LNI(Debounce)", FAILED_LNI_DEBOUNCE),
956 FLAG_ENTRY0("Failed LNI(EstbComm)", FAILED_LNI_ESTBCOMM),
957 FLAG_ENTRY0("Failed LNI(OptEq)", FAILED_LNI_OPTEQ),
958 FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959 FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960 FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT)
964 * DC8051 Information Host Information flags
966 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
968 static struct flag_table dc8051_info_host_msg_flags[] = {
969 FLAG_ENTRY0("Host request done", 0x0001),
970 FLAG_ENTRY0("BC SMA message", 0x0002),
971 FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972 FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973 FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974 FLAG_ENTRY0("External device config request", 0x0020),
975 FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976 FLAG_ENTRY0("LinkUp achieved", 0x0080),
977 FLAG_ENTRY0("Link going down", 0x0100),
981 static u32 encoded_size(u32 size);
982 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
986 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987 u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989 u8 *remote_tx_rate, u16 *link_widths);
990 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991 u8 *flag_bits, u16 *link_widths);
992 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
994 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996 static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997 u8 *tx_polarity_inversion,
998 u8 *rx_polarity_inversion, u8 *max_rate);
999 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000 unsigned int context, u64 err_status);
1001 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002 static void handle_dcc_err(struct hfi1_devdata *dd,
1003 unsigned int context, u64 err_status);
1004 static void handle_lcb_err(struct hfi1_devdata *dd,
1005 unsigned int context, u64 err_status);
1006 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014 static void set_partition_keys(struct hfi1_pportdata *);
1015 static const char *link_state_name(u32 state);
1016 static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1018 static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1020 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021 static int thermal_init(struct hfi1_devdata *dd);
1023 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1025 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026 static void handle_temp_err(struct hfi1_devdata *);
1027 static void dc_shutdown(struct hfi1_devdata *);
1028 static void dc_start(struct hfi1_devdata *);
1031 * Error interrupt table entry. This is used as input to the interrupt
1032 * "clear down" routine used for all second tier error interrupt register.
1033 * Second tier interrupt registers have a single bit representing them
1034 * in the top-level CceIntStatus.
1036 struct err_reg_info {
1037 u32 status; /* status CSR offset */
1038 u32 clear; /* clear CSR offset */
1039 u32 mask; /* mask CSR offset */
1040 void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1044 #define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045 #define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046 #define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1049 * Helpers for building HFI and DC error interrupt table entries. Different
1050 * helpers are needed because of inconsistent register names.
1052 #define EE(reg, handler, desc) \
1053 { reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1055 #define DC_EE1(reg, handler, desc) \
1056 { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057 #define DC_EE2(reg, handler, desc) \
1058 { reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1061 * Table of the "misc" grouping of error interrupts. Each entry refers to
1062 * another register containing more information.
1064 static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065 /* 0*/ EE(CCE_ERR, handle_cce_err, "CceErr"),
1066 /* 1*/ EE(RCV_ERR, handle_rxe_err, "RxeErr"),
1067 /* 2*/ EE(MISC_ERR, handle_misc_err, "MiscErr"),
1068 /* 3*/ { 0, 0, 0, NULL }, /* reserved */
1069 /* 4*/ EE(SEND_PIO_ERR, handle_pio_err, "PioErr"),
1070 /* 5*/ EE(SEND_DMA_ERR, handle_sdma_err, "SDmaErr"),
1071 /* 6*/ EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072 /* 7*/ EE(SEND_ERR, handle_txe_err, "TxeErr")
1073 /* the rest are reserved */
1077 * Index into the Various section of the interrupt sources
1078 * corresponding to the Critical Temperature interrupt.
1080 #define TCRIT_INT_SOURCE 4
1083 * SDMA error interrupt entry - refers to another register containing more
1086 static const struct err_reg_info sdma_eng_err =
1087 EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1089 static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090 /* 0*/ { 0, 0, 0, NULL }, /* PbcInt */
1091 /* 1*/ { 0, 0, 0, NULL }, /* GpioAssertInt */
1092 /* 2*/ EE(ASIC_QSFP1, handle_qsfp_int, "QSFP1"),
1093 /* 3*/ EE(ASIC_QSFP2, handle_qsfp_int, "QSFP2"),
1094 /* 4*/ { 0, 0, 0, NULL }, /* TCritInt */
1095 /* rest are reserved */
1099 * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100 * register can not be derived from the MTU value because 10K is not
1101 * a power of 2. Therefore, we need a constant. Everything else can
1104 #define DCC_CFG_PORT_MTU_CAP_10240 7
1107 * Table of the DC grouping of error interrupts. Each entry refers to
1108 * another register containing more information.
1110 static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111 /* 0*/ DC_EE1(DCC_ERR, handle_dcc_err, "DCC Err"),
1112 /* 1*/ DC_EE2(DC_LCB_ERR, handle_lcb_err, "LCB Err"),
1113 /* 2*/ DC_EE2(DC_DC8051_ERR, handle_8051_interrupt, "DC8051 Interrupt"),
1114 /* 3*/ /* dc_lbm_int - special, see is_dc_int() */
1115 /* the rest are reserved */
1125 * csr to read for name (if applicable)
1130 * offset into dd or ppd to store the counter's value
1140 * accessor for stat element, context either dd or ppd
1142 u64 (*rw_cntr)(const struct cntr_entry *,
1149 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150 #define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1152 #define CNTR_ELEM(name, csr, offset, flags, accessor) \
1162 #define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1164 (counter * 8 + RCV_COUNTER_ARRAY32), \
1165 0, flags | CNTR_32BIT, \
1166 port_access_u32_csr)
1168 #define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1170 (counter * 8 + RCV_COUNTER_ARRAY32), \
1171 0, flags | CNTR_32BIT, \
1175 #define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1177 (counter * 8 + RCV_COUNTER_ARRAY64), \
1179 port_access_u64_csr)
1181 #define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1183 (counter * 8 + RCV_COUNTER_ARRAY64), \
1187 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188 #define OVR_ELM(ctx) \
1189 CNTR_ELEM("RcvHdrOvr" #ctx, \
1190 (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191 0, CNTR_NORMAL, port_access_u64_csr)
1194 #define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1196 (counter * 8 + SEND_COUNTER_ARRAY32), \
1197 0, flags | CNTR_32BIT, \
1198 port_access_u32_csr)
1201 #define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1203 (counter * 8 + SEND_COUNTER_ARRAY64), \
1205 port_access_u64_csr)
1207 # define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1209 counter * 8 + SEND_COUNTER_ARRAY64, \
1215 #define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1217 (counter * 8 + CCE_COUNTER_ARRAY32), \
1218 0, flags | CNTR_32BIT, \
1221 #define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1223 (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224 0, flags | CNTR_32BIT, \
1228 #define DC_PERF_CNTR(name, counter, flags) \
1235 #define DC_PERF_CNTR_LCB(name, counter, flags) \
1243 #define SW_IBP_CNTR(name, cntr) \
1250 u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1254 if (dd->flags & HFI1_PRESENT) {
1255 val = readq((void __iomem *)dd->kregbase + offset);
1261 void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1263 if (dd->flags & HFI1_PRESENT)
1264 writeq(value, (void __iomem *)dd->kregbase + offset);
1267 void __iomem *get_csr_addr(
1268 struct hfi1_devdata *dd,
1271 return (void __iomem *)dd->kregbase + offset;
1274 static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275 int mode, u64 value)
1280 if (mode == CNTR_MODE_R) {
1281 ret = read_csr(dd, csr);
1282 } else if (mode == CNTR_MODE_W) {
1283 write_csr(dd, csr, value);
1286 dd_dev_err(dd, "Invalid cntr register access mode");
1290 hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1295 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296 void *context, int vl, int mode, u64 data)
1298 struct hfi1_devdata *dd = context;
1300 if (vl != CNTR_INVALID_VL)
1302 return read_write_csr(dd, entry->csr, mode, data);
1305 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306 int vl, int mode, u64 data)
1308 struct hfi1_devdata *dd = context;
1311 u64 csr = entry->csr;
1313 if (entry->flags & CNTR_VL) {
1314 if (vl == CNTR_INVALID_VL)
1318 if (vl != CNTR_INVALID_VL)
1322 val = read_write_csr(dd, csr, mode, data);
1326 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327 int vl, int mode, u64 data)
1329 struct hfi1_devdata *dd = context;
1330 u32 csr = entry->csr;
1333 if (vl != CNTR_INVALID_VL)
1335 if (mode == CNTR_MODE_R)
1336 ret = read_lcb_csr(dd, csr, &data);
1337 else if (mode == CNTR_MODE_W)
1338 ret = write_lcb_csr(dd, csr, data);
1341 dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1345 hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1350 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351 int vl, int mode, u64 data)
1353 struct hfi1_pportdata *ppd = context;
1355 if (vl != CNTR_INVALID_VL)
1357 return read_write_csr(ppd->dd, entry->csr, mode, data);
1360 static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361 void *context, int vl, int mode, u64 data)
1363 struct hfi1_pportdata *ppd = context;
1365 u64 csr = entry->csr;
1367 if (entry->flags & CNTR_VL) {
1368 if (vl == CNTR_INVALID_VL)
1372 if (vl != CNTR_INVALID_VL)
1375 val = read_write_csr(ppd->dd, csr, mode, data);
1379 /* Software defined */
1380 static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1385 if (mode == CNTR_MODE_R) {
1387 } else if (mode == CNTR_MODE_W) {
1391 dd_dev_err(dd, "Invalid cntr sw access mode");
1395 hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1400 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401 int vl, int mode, u64 data)
1403 struct hfi1_pportdata *ppd = context;
1405 if (vl != CNTR_INVALID_VL)
1407 return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1410 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411 int vl, int mode, u64 data)
1413 struct hfi1_pportdata *ppd = context;
1415 if (vl != CNTR_INVALID_VL)
1417 return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1420 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421 void *context, int vl, int mode, u64 data)
1423 struct hfi1_pportdata *ppd = context;
1425 if (vl != CNTR_INVALID_VL)
1428 return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1431 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432 void *context, int vl, int mode, u64 data)
1434 struct hfi1_pportdata *ppd = context;
1436 if (vl != CNTR_INVALID_VL)
1439 return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1443 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444 void *context, int vl, int mode, u64 data)
1446 struct hfi1_pportdata *ppd = context;
1448 if (vl != CNTR_INVALID_VL)
1451 return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1455 u64 get_all_cpu_total(u64 __percpu *cntr)
1460 for_each_possible_cpu(cpu)
1461 counter += *per_cpu_ptr(cntr, cpu);
1465 static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1467 int vl, int mode, u64 data)
1472 if (vl != CNTR_INVALID_VL)
1475 if (mode == CNTR_MODE_R) {
1476 ret = get_all_cpu_total(cntr) - *z_val;
1477 } else if (mode == CNTR_MODE_W) {
1478 /* A write can only zero the counter */
1480 *z_val = get_all_cpu_total(cntr);
1482 dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1484 dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1491 static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492 void *context, int vl, int mode, u64 data)
1494 struct hfi1_devdata *dd = context;
1496 return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1500 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501 void *context, int vl, int mode, u64 data)
1503 struct hfi1_devdata *dd = context;
1505 return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1509 static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510 void *context, int vl, int mode, u64 data)
1512 struct hfi1_devdata *dd = context;
1514 return dd->verbs_dev.n_piowait;
1517 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518 void *context, int vl, int mode, u64 data)
1520 struct hfi1_devdata *dd = context;
1522 return dd->verbs_dev.n_txwait;
1525 static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526 void *context, int vl, int mode, u64 data)
1528 struct hfi1_devdata *dd = context;
1530 return dd->verbs_dev.n_kmem_wait;
1533 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1534 void *context, int vl, int mode, u64 data)
1536 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1538 return dd->verbs_dev.n_send_schedule;
1541 #define def_access_sw_cpu(cntr) \
1542 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \
1543 void *context, int vl, int mode, u64 data) \
1545 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \
1546 return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr, \
1547 ppd->ibport_data.cntr, vl, \
1551 def_access_sw_cpu(rc_acks);
1552 def_access_sw_cpu(rc_qacks);
1553 def_access_sw_cpu(rc_delayed_comp);
1555 #define def_access_ibp_counter(cntr) \
1556 static u64 access_ibp_##cntr(const struct cntr_entry *entry, \
1557 void *context, int vl, int mode, u64 data) \
1559 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \
1561 if (vl != CNTR_INVALID_VL) \
1564 return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr, \
1568 def_access_ibp_counter(loop_pkts);
1569 def_access_ibp_counter(rc_resends);
1570 def_access_ibp_counter(rnr_naks);
1571 def_access_ibp_counter(other_naks);
1572 def_access_ibp_counter(rc_timeouts);
1573 def_access_ibp_counter(pkt_drops);
1574 def_access_ibp_counter(dmawait);
1575 def_access_ibp_counter(rc_seqnak);
1576 def_access_ibp_counter(rc_dupreq);
1577 def_access_ibp_counter(rdma_seq);
1578 def_access_ibp_counter(unaligned);
1579 def_access_ibp_counter(seq_naks);
1581 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1582 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1583 [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1585 [C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1587 [C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1588 RCV_TID_FLOW_GEN_MISMATCH_CNT,
1590 [C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1592 [C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1594 [C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1595 RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1596 [C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1597 CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1598 [C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1600 [C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1602 [C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1604 [C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1606 [C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1608 [C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1610 [C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1611 CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL),
1612 [C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1613 CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1614 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1616 [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1617 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1619 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1621 [C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1623 [C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1624 DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1625 [C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1626 DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1628 [C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1629 DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1630 [C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1632 [C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1634 [C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1636 [C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1638 [C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1640 [C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1642 [C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1644 [C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1645 CNTR_SYNTH | CNTR_VL),
1646 [C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1647 CNTR_SYNTH | CNTR_VL),
1648 [C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1649 [C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1650 CNTR_SYNTH | CNTR_VL),
1651 [C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1652 [C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1653 CNTR_SYNTH | CNTR_VL),
1654 [C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1656 [C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1657 CNTR_SYNTH | CNTR_VL),
1658 [C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1660 [C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1661 CNTR_SYNTH | CNTR_VL),
1663 DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1665 [C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1667 [C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1669 [C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1671 [C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1673 [C_DC_CRC_MULT_LN] =
1674 DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1676 [C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1678 [C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1680 [C_DC_SEQ_CRC_CNT] =
1681 DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1683 [C_DC_ESC0_ONLY_CNT] =
1684 DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1686 [C_DC_ESC0_PLUS1_CNT] =
1687 DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1689 [C_DC_ESC0_PLUS2_CNT] =
1690 DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1692 [C_DC_REINIT_FROM_PEER_CNT] =
1693 DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1695 [C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1697 [C_DC_MISC_FLG_CNT] =
1698 DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1700 [C_DC_PRF_GOOD_LTP_CNT] =
1701 DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1702 [C_DC_PRF_ACCEPTED_LTP_CNT] =
1703 DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1705 [C_DC_PRF_RX_FLIT_CNT] =
1706 DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1707 [C_DC_PRF_TX_FLIT_CNT] =
1708 DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1709 [C_DC_PRF_CLK_CNTR] =
1710 DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1711 [C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1712 DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1713 [C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1714 DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1716 [C_DC_PG_STS_TX_SBE_CNT] =
1717 DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1718 [C_DC_PG_STS_TX_MBE_CNT] =
1719 DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1721 [C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1722 access_sw_cpu_intr),
1723 [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1724 access_sw_cpu_rcv_limit),
1725 [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1726 access_sw_vtx_wait),
1727 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1728 access_sw_pio_wait),
1729 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1730 access_sw_kmem_wait),
1731 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1732 access_sw_send_schedule),
1735 static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1736 [C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1738 [C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1740 [C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1742 [C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1744 [C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1746 [C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1748 [C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1750 [C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1751 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1752 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1753 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1754 CNTR_SYNTH | CNTR_VL),
1755 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1756 CNTR_SYNTH | CNTR_VL),
1757 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1758 CNTR_SYNTH | CNTR_VL),
1759 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1760 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1761 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1762 access_sw_link_dn_cnt),
1763 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1764 access_sw_link_up_cnt),
1765 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1766 access_sw_xmit_discards),
1767 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1768 CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1769 access_sw_xmit_discards),
1770 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1771 access_xmit_constraint_errs),
1772 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1773 access_rcv_constraint_errs),
1774 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1775 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1776 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1777 [C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1778 [C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1779 [C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1780 [C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1781 [C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1782 [C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1783 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1784 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1785 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1786 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1787 access_sw_cpu_rc_acks),
1788 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1789 access_sw_cpu_rc_qacks),
1790 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1791 access_sw_cpu_rc_delayed_comp),
1792 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1793 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1794 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1795 [OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1796 [OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1797 [OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1798 [OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1799 [OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1800 [OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1801 [OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1802 [OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1803 [OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1804 [OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1805 [OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1806 [OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1807 [OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1808 [OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1809 [OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1810 [OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1811 [OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1812 [OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1813 [OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1814 [OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1815 [OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1816 [OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1817 [OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1818 [OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1819 [OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1820 [OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1821 [OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1822 [OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1823 [OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1824 [OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1825 [OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1826 [OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1827 [OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1828 [OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1829 [OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1830 [OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1831 [OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1832 [OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1833 [OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1834 [OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1835 [OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1836 [OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1837 [OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1838 [OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1839 [OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1840 [OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1841 [OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1842 [OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1843 [OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1844 [OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1845 [OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1846 [OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1847 [OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1848 [OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1849 [OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1850 [OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1851 [OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1852 [OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1853 [OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1854 [OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1855 [OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1856 [OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1857 [OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1858 [OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1859 [OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1860 [OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1861 [OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1862 [OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1863 [OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1864 [OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1865 [OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1866 [OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1867 [OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1868 [OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1869 [OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1870 [OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1871 [OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1874 /* ======================================================================== */
1876 /* return true if this is chip revision revision a0 */
1877 int is_a0(struct hfi1_devdata *dd)
1879 return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1880 & CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1883 /* return true if this is chip revision revision a */
1884 int is_ax(struct hfi1_devdata *dd)
1887 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1888 & CCE_REVISION_CHIP_REV_MINOR_MASK;
1889 return (chip_rev_minor & 0xf0) == 0;
1892 /* return true if this is chip revision revision b */
1893 int is_bx(struct hfi1_devdata *dd)
1896 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1897 & CCE_REVISION_CHIP_REV_MINOR_MASK;
1898 return !!(chip_rev_minor & 0x10);
1902 * Append string s to buffer buf. Arguments curp and len are the current
1903 * position and remaining length, respectively.
1905 * return 0 on success, 1 on out of room
1907 static int append_str(char *buf, char **curp, int *lenp, const char *s)
1911 int result = 0; /* success */
1914 /* add a comma, if first in the buffer */
1917 result = 1; /* out of room */
1924 /* copy the string */
1925 while ((c = *s++) != 0) {
1927 result = 1; /* out of room */
1935 /* write return values */
1943 * Using the given flag table, print a comma separated string into
1944 * the buffer. End in '*' if the buffer is too short.
1946 static char *flag_string(char *buf, int buf_len, u64 flags,
1947 struct flag_table *table, int table_size)
1955 /* make sure there is at least 2 so we can form "*" */
1959 len--; /* leave room for a nul */
1960 for (i = 0; i < table_size; i++) {
1961 if (flags & table[i].flag) {
1962 no_room = append_str(buf, &p, &len, table[i].str);
1965 flags &= ~table[i].flag;
1969 /* any undocumented bits left? */
1970 if (!no_room && flags) {
1971 snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1972 no_room = append_str(buf, &p, &len, extra);
1975 /* add * if ran out of room */
1977 /* may need to back up to add space for a '*' */
1983 /* add final nul - space already allocated above */
1988 /* first 8 CCE error interrupt source names */
1989 static const char * const cce_misc_names[] = {
1990 "CceErrInt", /* 0 */
1991 "RxeErrInt", /* 1 */
1992 "MiscErrInt", /* 2 */
1993 "Reserved3", /* 3 */
1994 "PioErrInt", /* 4 */
1995 "SDmaErrInt", /* 5 */
1996 "EgressErrInt", /* 6 */
2001 * Return the miscellaneous error interrupt name.
2003 static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
2005 if (source < ARRAY_SIZE(cce_misc_names))
2006 strncpy(buf, cce_misc_names[source], bsize);
2011 source + IS_GENERAL_ERR_START);
2017 * Return the SDMA engine error interrupt name.
2019 static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2021 snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2026 * Return the send context error interrupt name.
2028 static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2030 snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2034 static const char * const various_names[] = {
2043 * Return the various interrupt name.
2045 static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2047 if (source < ARRAY_SIZE(various_names))
2048 strncpy(buf, various_names[source], bsize);
2050 snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2055 * Return the DC interrupt name.
2057 static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2059 static const char * const dc_int_names[] = {
2063 "lbm" /* local block merge */
2066 if (source < ARRAY_SIZE(dc_int_names))
2067 snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2069 snprintf(buf, bsize, "DCInt%u", source);
2073 static const char * const sdma_int_names[] = {
2080 * Return the SDMA engine interrupt name.
2082 static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2084 /* what interrupt */
2085 unsigned int what = source / TXE_NUM_SDMA_ENGINES;
2087 unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2089 if (likely(what < 3))
2090 snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2092 snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2097 * Return the receive available interrupt name.
2099 static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2101 snprintf(buf, bsize, "RcvAvailInt%u", source);
2106 * Return the receive urgent interrupt name.
2108 static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2110 snprintf(buf, bsize, "RcvUrgentInt%u", source);
2115 * Return the send credit interrupt name.
2117 static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2119 snprintf(buf, bsize, "SendCreditInt%u", source);
2124 * Return the reserved interrupt name.
2126 static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2128 snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2132 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2134 return flag_string(buf, buf_len, flags,
2135 cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2138 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2140 return flag_string(buf, buf_len, flags,
2141 rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2144 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2146 return flag_string(buf, buf_len, flags, misc_err_status_flags,
2147 ARRAY_SIZE(misc_err_status_flags));
2150 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2152 return flag_string(buf, buf_len, flags,
2153 pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2156 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2158 return flag_string(buf, buf_len, flags,
2159 sdma_err_status_flags,
2160 ARRAY_SIZE(sdma_err_status_flags));
2163 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2165 return flag_string(buf, buf_len, flags,
2166 egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2169 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2171 return flag_string(buf, buf_len, flags,
2172 egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2175 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2177 return flag_string(buf, buf_len, flags,
2178 send_err_status_flags,
2179 ARRAY_SIZE(send_err_status_flags));
2182 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2187 * For most these errors, there is nothing that can be done except
2188 * report or record it.
2190 dd_dev_info(dd, "CCE Error: %s\n",
2191 cce_err_status_string(buf, sizeof(buf), reg));
2193 if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2195 && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2196 /* this error requires a manual drop into SPC freeze mode */
2198 start_freeze_handling(dd->pport, FREEZE_SELF);
2203 * Check counters for receive errors that do not have an interrupt
2204 * associated with them.
2206 #define RCVERR_CHECK_TIME 10
2207 static void update_rcverr_timer(unsigned long opaque)
2209 struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2210 struct hfi1_pportdata *ppd = dd->pport;
2211 u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2213 if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2214 ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2215 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2216 set_link_down_reason(ppd,
2217 OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2218 OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2219 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2221 dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2223 mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2226 static int init_rcverr(struct hfi1_devdata *dd)
2228 setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2229 /* Assume the hardware counter has been reset */
2230 dd->rcv_ovfl_cnt = 0;
2231 return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2234 static void free_rcverr(struct hfi1_devdata *dd)
2236 if (dd->rcverr_timer.data)
2237 del_timer_sync(&dd->rcverr_timer);
2238 dd->rcverr_timer.data = 0;
2241 static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2245 dd_dev_info(dd, "Receive Error: %s\n",
2246 rxe_err_status_string(buf, sizeof(buf), reg));
2248 if (reg & ALL_RXE_FREEZE_ERR) {
2252 * Freeze mode recovery is disabled for the errors
2253 * in RXE_FREEZE_ABORT_MASK
2255 if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2256 flags = FREEZE_ABORT;
2258 start_freeze_handling(dd->pport, flags);
2262 static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2266 dd_dev_info(dd, "Misc Error: %s",
2267 misc_err_status_string(buf, sizeof(buf), reg));
2270 static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2274 dd_dev_info(dd, "PIO Error: %s\n",
2275 pio_err_status_string(buf, sizeof(buf), reg));
2277 if (reg & ALL_PIO_FREEZE_ERR)
2278 start_freeze_handling(dd->pport, 0);
2281 static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2285 dd_dev_info(dd, "SDMA Error: %s\n",
2286 sdma_err_status_string(buf, sizeof(buf), reg));
2288 if (reg & ALL_SDMA_FREEZE_ERR)
2289 start_freeze_handling(dd->pport, 0);
2292 static void count_port_inactive(struct hfi1_devdata *dd)
2294 struct hfi1_pportdata *ppd = dd->pport;
2296 if (ppd->port_xmit_discards < ~(u64)0)
2297 ppd->port_xmit_discards++;
2301 * We have had a "disallowed packet" error during egress. Determine the
2302 * integrity check which failed, and update relevant error counter, etc.
2304 * Note that the SEND_EGRESS_ERR_INFO register has only a single
2305 * bit of state per integrity check, and so we can miss the reason for an
2306 * egress error if more than one packet fails the same integrity check
2307 * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2309 static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2311 struct hfi1_pportdata *ppd = dd->pport;
2312 u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2313 u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2316 /* clear down all observed info as quickly as possible after read */
2317 write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2320 "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2321 info, egress_err_info_string(buf, sizeof(buf), info), src);
2323 /* Eventually add other counters for each bit */
2325 if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2326 if (ppd->port_xmit_discards < ~(u64)0)
2327 ppd->port_xmit_discards++;
2332 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2333 * register. Does it represent a 'port inactive' error?
2335 static inline int port_inactive_err(u64 posn)
2337 return (posn >= SEES(TX_LINKDOWN) &&
2338 posn <= SEES(TX_INCORRECT_LINK_STATE));
2342 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2343 * register. Does it represent a 'disallowed packet' error?
2345 static inline int disallowed_pkt_err(u64 posn)
2347 return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2348 posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2351 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2353 u64 reg_copy = reg, handled = 0;
2356 if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2357 start_freeze_handling(dd->pport, 0);
2358 if (is_a0(dd) && (reg &
2359 SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2360 && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2361 start_freeze_handling(dd->pport, 0);
2364 int posn = fls64(reg_copy);
2366 * fls64() returns a 1-based offset, but we generally
2367 * want 0-based offsets.
2369 int shift = posn - 1;
2371 if (port_inactive_err(shift)) {
2372 count_port_inactive(dd);
2373 handled |= (1ULL << shift);
2374 } else if (disallowed_pkt_err(shift)) {
2375 handle_send_egress_err_info(dd);
2376 handled |= (1ULL << shift);
2378 clear_bit(shift, (unsigned long *)®_copy);
2384 dd_dev_info(dd, "Egress Error: %s\n",
2385 egress_err_status_string(buf, sizeof(buf), reg));
2388 static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2392 dd_dev_info(dd, "Send Error: %s\n",
2393 send_err_status_string(buf, sizeof(buf), reg));
2398 * The maximum number of times the error clear down will loop before
2399 * blocking a repeating error. This value is arbitrary.
2401 #define MAX_CLEAR_COUNT 20
2404 * Clear and handle an error register. All error interrupts are funneled
2405 * through here to have a central location to correctly handle single-
2406 * or multi-shot errors.
2408 * For non per-context registers, call this routine with a context value
2409 * of 0 so the per-context offset is zero.
2411 * If the handler loops too many times, assume that something is wrong
2412 * and can't be fixed, so mask the error bits.
2414 static void interrupt_clear_down(struct hfi1_devdata *dd,
2416 const struct err_reg_info *eri)
2421 /* read in a loop until no more errors are seen */
2424 reg = read_kctxt_csr(dd, context, eri->status);
2427 write_kctxt_csr(dd, context, eri->clear, reg);
2428 if (likely(eri->handler))
2429 eri->handler(dd, context, reg);
2431 if (count > MAX_CLEAR_COUNT) {
2434 dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2437 * Read-modify-write so any other masked bits
2440 mask = read_kctxt_csr(dd, context, eri->mask);
2442 write_kctxt_csr(dd, context, eri->mask, mask);
2449 * CCE block "misc" interrupt. Source is < 16.
2451 static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2453 const struct err_reg_info *eri = &misc_errs[source];
2456 interrupt_clear_down(dd, 0, eri);
2458 dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2463 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2465 return flag_string(buf, buf_len, flags,
2466 sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2470 * Send context error interrupt. Source (hw_context) is < 160.
2472 * All send context errors cause the send context to halt. The normal
2473 * clear-down mechanism cannot be used because we cannot clear the
2474 * error bits until several other long-running items are done first.
2475 * This is OK because with the context halted, nothing else is going
2476 * to happen on it anyway.
2478 static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2479 unsigned int hw_context)
2481 struct send_context_info *sci;
2482 struct send_context *sc;
2487 sw_index = dd->hw_to_sw[hw_context];
2488 if (sw_index >= dd->num_send_contexts) {
2490 "out of range sw index %u for send context %u\n",
2491 sw_index, hw_context);
2494 sci = &dd->send_contexts[sw_index];
2497 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2498 sw_index, hw_context);
2502 /* tell the software that a halt has begun */
2503 sc_stop(sc, SCF_HALTED);
2505 status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2507 dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2508 send_context_err_status_string(flags, sizeof(flags), status));
2510 if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2511 handle_send_egress_err_info(dd);
2514 * Automatically restart halted kernel contexts out of interrupt
2515 * context. User contexts must ask the driver to restart the context.
2517 if (sc->type != SC_USER)
2518 queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2521 static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2522 unsigned int source, u64 status)
2524 struct sdma_engine *sde;
2526 sde = &dd->per_sdma[source];
2527 #ifdef CONFIG_SDMA_VERBOSITY
2528 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2529 slashstrip(__FILE__), __LINE__, __func__);
2530 dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2531 sde->this_idx, source, (unsigned long long)status);
2533 sdma_engine_error(sde, status);
2537 * CCE block SDMA error interrupt. Source is < 16.
2539 static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2541 #ifdef CONFIG_SDMA_VERBOSITY
2542 struct sdma_engine *sde = &dd->per_sdma[source];
2544 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2545 slashstrip(__FILE__), __LINE__, __func__);
2546 dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2548 sdma_dumpstate(sde);
2550 interrupt_clear_down(dd, source, &sdma_eng_err);
2554 * CCE block "various" interrupt. Source is < 8.
2556 static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2558 const struct err_reg_info *eri = &various_err[source];
2561 * TCritInt cannot go through interrupt_clear_down()
2562 * because it is not a second tier interrupt. The handler
2563 * should be called directly.
2565 if (source == TCRIT_INT_SOURCE)
2566 handle_temp_err(dd);
2567 else if (eri->handler)
2568 interrupt_clear_down(dd, 0, eri);
2571 "%s: Unimplemented/reserved interrupt %d\n",
2575 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2577 /* source is always zero */
2578 struct hfi1_pportdata *ppd = dd->pport;
2579 unsigned long flags;
2580 u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2582 if (reg & QSFP_HFI0_MODPRST_N) {
2584 dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2587 if (!qsfp_mod_present(ppd)) {
2588 ppd->driver_link_ready = 0;
2590 * Cable removed, reset all our information about the
2591 * cache and cable capabilities
2594 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2596 * We don't set cache_refresh_required here as we expect
2597 * an interrupt when a cable is inserted
2599 ppd->qsfp_info.cache_valid = 0;
2600 ppd->qsfp_info.qsfp_interrupt_functional = 0;
2601 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2608 if (ppd->host_link_state == HLS_DN_POLL) {
2610 * The link is still in POLL. This means
2611 * that the normal link down processing
2612 * will not happen. We have to do it here
2613 * before turning the DC off.
2615 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2618 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2619 ppd->qsfp_info.cache_valid = 0;
2620 ppd->qsfp_info.cache_refresh_required = 1;
2621 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2624 qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2633 if (reg & QSFP_HFI0_INT_N) {
2635 dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2637 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2638 ppd->qsfp_info.check_interrupt_flags = 1;
2639 ppd->qsfp_info.qsfp_interrupt_functional = 1;
2640 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2643 /* Schedule the QSFP work only if there is a cable attached. */
2644 if (qsfp_mod_present(ppd))
2645 queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2648 static int request_host_lcb_access(struct hfi1_devdata *dd)
2652 ret = do_8051_command(dd, HCMD_MISC,
2653 (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2655 if (ret != HCMD_SUCCESS) {
2656 dd_dev_err(dd, "%s: command failed with error %d\n",
2659 return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2662 static int request_8051_lcb_access(struct hfi1_devdata *dd)
2666 ret = do_8051_command(dd, HCMD_MISC,
2667 (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2669 if (ret != HCMD_SUCCESS) {
2670 dd_dev_err(dd, "%s: command failed with error %d\n",
2673 return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2677 * Set the LCB selector - allow host access. The DCC selector always
2678 * points to the host.
2680 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2682 write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2683 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2684 | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2688 * Clear the LCB selector - allow 8051 access. The DCC selector always
2689 * points to the host.
2691 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2693 write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2694 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2698 * Acquire LCB access from the 8051. If the host already has access,
2699 * just increment a counter. Otherwise, inform the 8051 that the
2700 * host is taking access.
2704 * -EBUSY if the 8051 has control and cannot be disturbed
2705 * -errno if unable to acquire access from the 8051
2707 int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2709 struct hfi1_pportdata *ppd = dd->pport;
2713 * Use the host link state lock so the operation of this routine
2714 * { link state check, selector change, count increment } can occur
2715 * as a unit against a link state change. Otherwise there is a
2716 * race between the state change and the count increment.
2719 mutex_lock(&ppd->hls_lock);
2721 while (!mutex_trylock(&ppd->hls_lock))
2725 /* this access is valid only when the link is up */
2726 if ((ppd->host_link_state & HLS_UP) == 0) {
2727 dd_dev_info(dd, "%s: link state %s not up\n",
2728 __func__, link_state_name(ppd->host_link_state));
2733 if (dd->lcb_access_count == 0) {
2734 ret = request_host_lcb_access(dd);
2737 "%s: unable to acquire LCB access, err %d\n",
2741 set_host_lcb_access(dd);
2743 dd->lcb_access_count++;
2745 mutex_unlock(&ppd->hls_lock);
2750 * Release LCB access by decrementing the use count. If the count is moving
2751 * from 1 to 0, inform 8051 that it has control back.
2755 * -errno if unable to release access to the 8051
2757 int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2762 * Use the host link state lock because the acquire needed it.
2763 * Here, we only need to keep { selector change, count decrement }
2767 mutex_lock(&dd->pport->hls_lock);
2769 while (!mutex_trylock(&dd->pport->hls_lock))
2773 if (dd->lcb_access_count == 0) {
2774 dd_dev_err(dd, "%s: LCB access count is zero. Skipping.\n",
2779 if (dd->lcb_access_count == 1) {
2780 set_8051_lcb_access(dd);
2781 ret = request_8051_lcb_access(dd);
2784 "%s: unable to release LCB access, err %d\n",
2786 /* restore host access if the grant didn't work */
2787 set_host_lcb_access(dd);
2791 dd->lcb_access_count--;
2793 mutex_unlock(&dd->pport->hls_lock);
2798 * Initialize LCB access variables and state. Called during driver load,
2799 * after most of the initialization is finished.
2801 * The DC default is LCB access on for the host. The driver defaults to
2802 * leaving access to the 8051. Assign access now - this constrains the call
2803 * to this routine to be after all LCB set-up is done. In particular, after
2804 * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2806 static void init_lcb_access(struct hfi1_devdata *dd)
2808 dd->lcb_access_count = 0;
2812 * Write a response back to a 8051 request.
2814 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2816 write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2817 DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2818 | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2819 | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2823 * Handle requests from the 8051.
2825 static void handle_8051_request(struct hfi1_devdata *dd)
2831 reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2832 if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2833 return; /* no request */
2835 /* zero out COMPLETED so the response is seen */
2836 write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2838 /* extract request details */
2839 type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2840 & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2841 data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2842 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2845 case HREQ_LOAD_CONFIG:
2846 case HREQ_SAVE_CONFIG:
2847 case HREQ_READ_CONFIG:
2848 case HREQ_SET_TX_EQ_ABS:
2849 case HREQ_SET_TX_EQ_REL:
2851 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2853 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2856 case HREQ_CONFIG_DONE:
2857 hreq_response(dd, HREQ_SUCCESS, 0);
2860 case HREQ_INTERFACE_TEST:
2861 hreq_response(dd, HREQ_SUCCESS, data);
2865 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2866 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2871 static void write_global_credit(struct hfi1_devdata *dd,
2872 u8 vau, u16 total, u16 shared)
2874 write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2876 << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2878 << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2879 | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2883 * Set up initial VL15 credits of the remote. Assumes the rest of
2884 * the CM credit registers are zero from a previous global or credit reset .
2886 void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2888 /* leave shared count at zero for both global and VL15 */
2889 write_global_credit(dd, vau, vl15buf, 0);
2891 /* We may need some credits for another VL when sending packets
2892 * with the snoop interface. Dividing it down the middle for VL15
2893 * and VL0 should suffice.
2895 if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2896 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2897 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2898 write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2899 << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2901 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2902 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2907 * Zero all credit details from the previous connection and
2908 * reset the CM manager's internal counters.
2910 void reset_link_credits(struct hfi1_devdata *dd)
2914 /* remove all previous VL credit limits */
2915 for (i = 0; i < TXE_NUM_DATA_VL; i++)
2916 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2917 write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2918 write_global_credit(dd, 0, 0, 0);
2919 /* reset the CM block */
2920 pio_send_control(dd, PSC_CM_RESET);
2923 /* convert a vCU to a CU */
2924 static u32 vcu_to_cu(u8 vcu)
2929 /* convert a CU to a vCU */
2930 static u8 cu_to_vcu(u32 cu)
2935 /* convert a vAU to an AU */
2936 static u32 vau_to_au(u8 vau)
2938 return 8 * (1 << vau);
2941 static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2943 ppd->sm_trap_qp = 0x0;
2948 * Graceful LCB shutdown. This leaves the LCB FIFOs in reset.
2950 static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2954 /* clear lcb run: LCB_CFG_RUN.EN = 0 */
2955 write_csr(dd, DC_LCB_CFG_RUN, 0);
2956 /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2957 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2958 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2959 /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2960 dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2961 reg = read_csr(dd, DCC_CFG_RESET);
2962 write_csr(dd, DCC_CFG_RESET,
2964 | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2965 | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2966 (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2968 udelay(1); /* must hold for the longer of 16cclks or 20ns */
2969 write_csr(dd, DCC_CFG_RESET, reg);
2970 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2975 * This routine should be called after the link has been transitioned to
2976 * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2979 * The expectation is that the caller of this routine would have taken
2980 * care of properly transitioning the link into the correct state.
2982 static void dc_shutdown(struct hfi1_devdata *dd)
2984 unsigned long flags;
2986 spin_lock_irqsave(&dd->dc8051_lock, flags);
2987 if (dd->dc_shutdown) {
2988 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2991 dd->dc_shutdown = 1;
2992 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2993 /* Shutdown the LCB */
2994 lcb_shutdown(dd, 1);
2995 /* Going to OFFLINE would have causes the 8051 to put the
2996 * SerDes into reset already. Just need to shut down the 8051,
2998 write_csr(dd, DC_DC8051_CFG_RST, 0x1);
3001 /* Calling this after the DC has been brought out of reset should not
3003 static void dc_start(struct hfi1_devdata *dd)
3005 unsigned long flags;
3008 spin_lock_irqsave(&dd->dc8051_lock, flags);
3009 if (!dd->dc_shutdown)
3011 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3012 /* Take the 8051 out of reset */
3013 write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3014 /* Wait until 8051 is ready */
3015 ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3017 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3020 /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3021 write_csr(dd, DCC_CFG_RESET, 0x10);
3022 /* lcb_shutdown() with abort=1 does not restore these */
3023 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3024 spin_lock_irqsave(&dd->dc8051_lock, flags);
3025 dd->dc_shutdown = 0;
3027 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3031 * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3033 static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3035 u64 rx_radr, tx_radr;
3038 if (dd->icode != ICODE_FPGA_EMULATION)
3042 * These LCB defaults on emulator _s are good, nothing to do here:
3043 * LCB_CFG_TX_FIFOS_RADR
3044 * LCB_CFG_RX_FIFOS_RADR
3046 * LCB_CFG_IGNORE_LOST_RCLK
3048 if (is_emulator_s(dd))
3050 /* else this is _p */
3052 version = emulator_rev(dd);
3054 version = 0x2d; /* all B0 use 0x2d or higher settings */
3056 if (version <= 0x12) {
3057 /* release 0x12 and below */
3060 * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3061 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3062 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3065 0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3066 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3067 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3069 * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3070 * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3072 tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073 } else if (version <= 0x18) {
3074 /* release 0x13 up to 0x18 */
3075 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3077 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081 } else if (version == 0x19) {
3083 /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3085 0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089 } else if (version == 0x1a) {
3091 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3093 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097 write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3099 /* release 0x1b and higher */
3100 /* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3102 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3103 | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3104 | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3105 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3108 write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3109 /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3110 write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3111 DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3112 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3116 * Handle a SMA idle message
3118 * This is a work-queue function outside of the interrupt.
3120 void handle_sma_message(struct work_struct *work)
3122 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3124 struct hfi1_devdata *dd = ppd->dd;
3128 /* msg is bytes 1-4 of the 40-bit idle message - the command code
3130 ret = read_idle_sma(dd, &msg);
3133 dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3135 * React to the SMA message. Byte[1] (0 for us) is the command.
3137 switch (msg & 0xff) {
3140 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3143 * Only expected in INIT or ARMED, discard otherwise.
3145 if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3146 ppd->neighbor_normal = 1;
3148 case SMA_IDLE_ACTIVE:
3150 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3153 * Can activate the node. Discard otherwise.
3155 if (ppd->host_link_state == HLS_UP_ARMED
3156 && ppd->is_active_optimize_enabled) {
3157 ppd->neighbor_normal = 1;
3158 ret = set_link_state(ppd, HLS_UP_ACTIVE);
3162 "%s: received Active SMA idle message, couldn't set link to Active\n",
3168 "%s: received unexpected SMA idle message 0x%llx\n",
3174 static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3177 unsigned long flags;
3179 spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3180 rcvctrl = read_csr(dd, RCV_CTRL);
3183 write_csr(dd, RCV_CTRL, rcvctrl);
3184 spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3187 static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3189 adjust_rcvctrl(dd, add, 0);
3192 static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3194 adjust_rcvctrl(dd, 0, clear);
3198 * Called from all interrupt handlers to start handling an SPC freeze.
3200 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3202 struct hfi1_devdata *dd = ppd->dd;
3203 struct send_context *sc;
3206 if (flags & FREEZE_SELF)
3207 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3209 /* enter frozen mode */
3210 dd->flags |= HFI1_FROZEN;
3212 /* notify all SDMA engines that they are going into a freeze */
3213 sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3215 /* do halt pre-handling on all enabled send contexts */
3216 for (i = 0; i < dd->num_send_contexts; i++) {
3217 sc = dd->send_contexts[i].sc;
3218 if (sc && (sc->flags & SCF_ENABLED))
3219 sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3222 /* Send context are frozen. Notify user space */
3223 hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3225 if (flags & FREEZE_ABORT) {
3227 "Aborted freeze recovery. Please REBOOT system\n");
3230 /* queue non-interrupt handler */
3231 queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3235 * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3236 * depending on the "freeze" parameter.
3238 * No need to return an error if it times out, our only option
3239 * is to proceed anyway.
3241 static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3243 unsigned long timeout;
3246 timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3248 reg = read_csr(dd, CCE_STATUS);
3250 /* waiting until all indicators are set */
3251 if ((reg & ALL_FROZE) == ALL_FROZE)
3252 return; /* all done */
3254 /* waiting until all indicators are clear */
3255 if ((reg & ALL_FROZE) == 0)
3256 return; /* all done */
3259 if (time_after(jiffies, timeout)) {
3261 "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3264 freeze ? ALL_FROZE : 0ull);
3267 usleep_range(80, 120);
3272 * Do all freeze handling for the RXE block.
3274 static void rxe_freeze(struct hfi1_devdata *dd)
3279 clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3281 /* disable all receive contexts */
3282 for (i = 0; i < dd->num_rcv_contexts; i++)
3283 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3287 * Unfreeze handling for the RXE block - kernel contexts only.
3288 * This will also enable the port. User contexts will do unfreeze
3289 * handling on a per-context basis as they call into the driver.
3292 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3296 /* enable all kernel contexts */
3297 for (i = 0; i < dd->n_krcv_queues; i++)
3298 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3301 add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3305 * Non-interrupt SPC freeze handling.
3307 * This is a work-queue function outside of the triggering interrupt.
3309 void handle_freeze(struct work_struct *work)
3311 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3313 struct hfi1_devdata *dd = ppd->dd;
3315 /* wait for freeze indicators on all affected blocks */
3316 dd_dev_info(dd, "Entering SPC freeze\n");
3317 wait_for_freeze_status(dd, 1);
3319 /* SPC is now frozen */
3321 /* do send PIO freeze steps */
3324 /* do send DMA freeze steps */
3327 /* do send egress freeze steps - nothing to do */
3329 /* do receive freeze steps */
3333 * Unfreeze the hardware - clear the freeze, wait for each
3334 * block's frozen bit to clear, then clear the frozen flag.
3336 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3337 wait_for_freeze_status(dd, 0);
3340 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3341 wait_for_freeze_status(dd, 1);
3342 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3343 wait_for_freeze_status(dd, 0);
3346 /* do send PIO unfreeze steps for kernel contexts */
3347 pio_kernel_unfreeze(dd);
3349 /* do send DMA unfreeze steps */
3352 /* do send egress unfreeze steps - nothing to do */
3354 /* do receive unfreeze steps for kernel contexts */
3355 rxe_kernel_unfreeze(dd);
3358 * The unfreeze procedure touches global device registers when
3359 * it disables and re-enables RXE. Mark the device unfrozen
3360 * after all that is done so other parts of the driver waiting
3361 * for the device to unfreeze don't do things out of order.
3363 * The above implies that the meaning of HFI1_FROZEN flag is
3364 * "Device has gone into freeze mode and freeze mode handling
3365 * is still in progress."
3367 * The flag will be removed when freeze mode processing has
3370 dd->flags &= ~HFI1_FROZEN;
3371 wake_up(&dd->event_queue);
3373 /* no longer frozen */
3374 dd_dev_err(dd, "Exiting SPC freeze\n");
3378 * Handle a link up interrupt from the 8051.
3380 * This is a work-queue function outside of the interrupt.
3382 void handle_link_up(struct work_struct *work)
3384 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3386 set_link_state(ppd, HLS_UP_INIT);
3388 /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3389 read_ltp_rtt(ppd->dd);
3391 * OPA specifies that certain counters are cleared on a transition
3392 * to link up, so do that.
3394 clear_linkup_counters(ppd->dd);
3396 * And (re)set link up default values.
3398 set_linkup_defaults(ppd);
3400 /* enforce link speed enabled */
3401 if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3402 /* oops - current speed is not enabled, bounce */
3404 "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3405 ppd->link_speed_active, ppd->link_speed_enabled);
3406 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3407 OPA_LINKDOWN_REASON_SPEED_POLICY);
3408 set_link_state(ppd, HLS_DN_OFFLINE);
3413 /* Several pieces of LNI information were cached for SMA in ppd.
3414 * Reset these on link down */
3415 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3417 ppd->neighbor_guid = 0;
3418 ppd->neighbor_port_number = 0;
3419 ppd->neighbor_type = 0;
3420 ppd->neighbor_fm_security = 0;
3424 * Handle a link down interrupt from the 8051.
3426 * This is a work-queue function outside of the interrupt.
3428 void handle_link_down(struct work_struct *work)
3430 u8 lcl_reason, neigh_reason = 0;
3431 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3434 /* go offline first, then deal with reasons */
3435 set_link_state(ppd, HLS_DN_OFFLINE);
3438 read_planned_down_reason_code(ppd->dd, &neigh_reason);
3441 * If no reason, assume peer-initiated but missed
3442 * LinkGoingDown idle flits.
3444 if (neigh_reason == 0)
3445 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3447 set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3449 reset_neighbor_info(ppd);
3451 /* disable the port */
3452 clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3454 /* If there is no cable attached, turn the DC off. Otherwise,
3455 * start the link bring up. */
3456 if (!qsfp_mod_present(ppd))
3457 dc_shutdown(ppd->dd);
3462 void handle_link_bounce(struct work_struct *work)
3464 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3468 * Only do something if the link is currently up.
3470 if (ppd->host_link_state & HLS_UP) {
3471 set_link_state(ppd, HLS_DN_OFFLINE);
3474 dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3475 __func__, link_state_name(ppd->host_link_state));
3480 * Mask conversion: Capability exchange to Port LTP. The capability
3481 * exchange has an implicit 16b CRC that is mandatory.
3483 static int cap_to_port_ltp(int cap)
3485 int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3487 if (cap & CAP_CRC_14B)
3488 port_ltp |= PORT_LTP_CRC_MODE_14;
3489 if (cap & CAP_CRC_48B)
3490 port_ltp |= PORT_LTP_CRC_MODE_48;
3491 if (cap & CAP_CRC_12B_16B_PER_LANE)
3492 port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3498 * Convert an OPA Port LTP mask to capability mask
3500 int port_ltp_to_cap(int port_ltp)
3504 if (port_ltp & PORT_LTP_CRC_MODE_14)
3505 cap_mask |= CAP_CRC_14B;
3506 if (port_ltp & PORT_LTP_CRC_MODE_48)
3507 cap_mask |= CAP_CRC_48B;
3508 if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3509 cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3515 * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3517 static int lcb_to_port_ltp(int lcb_crc)
3521 if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3522 port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3523 else if (lcb_crc == LCB_CRC_48B)
3524 port_ltp = PORT_LTP_CRC_MODE_48;
3525 else if (lcb_crc == LCB_CRC_14B)
3526 port_ltp = PORT_LTP_CRC_MODE_14;
3528 port_ltp = PORT_LTP_CRC_MODE_16;
3534 * Our neighbor has indicated that we are allowed to act as a fabric
3535 * manager, so place the full management partition key in the second
3536 * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3537 * that we should already have the limited management partition key in
3538 * array element 1, and also that the port is not yet up when
3539 * add_full_mgmt_pkey() is invoked.
3541 static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3543 struct hfi1_devdata *dd = ppd->dd;
3545 /* Sanity check - ppd->pkeys[2] should be 0 */
3546 if (ppd->pkeys[2] != 0)
3547 dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3548 __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3549 ppd->pkeys[2] = FULL_MGMT_P_KEY;
3550 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3554 * Convert the given link width to the OPA link width bitmask.
3556 static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3561 * Simulator and quick linkup do not set the width.
3562 * Just set it to 4x without complaint.
3564 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3565 return OPA_LINK_WIDTH_4X;
3566 return 0; /* no lanes up */
3567 case 1: return OPA_LINK_WIDTH_1X;
3568 case 2: return OPA_LINK_WIDTH_2X;
3569 case 3: return OPA_LINK_WIDTH_3X;
3571 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3574 case 4: return OPA_LINK_WIDTH_4X;
3579 * Do a population count on the bottom nibble.
3581 static const u8 bit_counts[16] = {
3582 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3584 static inline u8 nibble_to_count(u8 nibble)
3586 return bit_counts[nibble & 0xf];
3590 * Read the active lane information from the 8051 registers and return
3593 * Active lane information is found in these 8051 registers:
3597 static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3603 u8 tx_polarity_inversion;
3604 u8 rx_polarity_inversion;
3607 /* read the active lanes */
3608 read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3609 &rx_polarity_inversion, &max_rate);
3610 read_local_lni(dd, &enable_lane_rx);
3612 /* convert to counts */
3613 tx = nibble_to_count(enable_lane_tx);
3614 rx = nibble_to_count(enable_lane_rx);
3617 * Set link_speed_active here, overriding what was set in
3618 * handle_verify_cap(). The ASIC 8051 firmware does not correctly
3619 * set the max_rate field in handle_verify_cap until v0.19.
3621 if ((dd->icode == ICODE_RTL_SILICON)
3622 && (dd->dc8051_ver < dc8051_ver(0, 19))) {
3623 /* max_rate: 0 = 12.5G, 1 = 25G */
3626 dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3630 "%s: unexpected max rate %d, using 25Gb\n",
3631 __func__, (int)max_rate);
3634 dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3640 "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3641 enable_lane_tx, tx, enable_lane_rx, rx);
3642 *tx_width = link_width_to_bits(dd, tx);
3643 *rx_width = link_width_to_bits(dd, rx);
3647 * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3648 * Valid after the end of VerifyCap and during LinkUp. Does not change
3649 * after link up. I.e. look elsewhere for downgrade information.
3652 * + bits [7:4] contain the number of active transmitters
3653 * + bits [3:0] contain the number of active receivers
3654 * These are numbers 1 through 4 and can be different values if the
3655 * link is asymmetric.
3657 * verify_cap_local_fm_link_width[0] retains its original value.
3659 static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3663 u8 misc_bits, local_flags;
3664 u16 active_tx, active_rx;
3666 read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3668 rx = (widths >> 8) & 0xf;
3670 *tx_width = link_width_to_bits(dd, tx);
3671 *rx_width = link_width_to_bits(dd, rx);
3673 /* print the active widths */
3674 get_link_widths(dd, &active_tx, &active_rx);
3678 * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3679 * hardware information when the link first comes up.
3681 * The link width is not available until after VerifyCap.AllFramesReceived
3682 * (the trigger for handle_verify_cap), so this is outside that routine
3683 * and should be called when the 8051 signals linkup.
3685 void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3687 u16 tx_width, rx_width;
3689 /* get end-of-LNI link widths */
3690 get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3692 /* use tx_width as the link is supposed to be symmetric on link up */
3693 ppd->link_width_active = tx_width;
3694 /* link width downgrade active (LWD.A) starts out matching LW.A */
3695 ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3696 ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3697 /* per OPA spec, on link up LWD.E resets to LWD.S */
3698 ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3699 /* cache the active egress rate (units {10^6 bits/sec]) */
3700 ppd->current_egress_rate = active_egress_rate(ppd);
3704 * Handle a verify capabilities interrupt from the 8051.
3706 * This is a work-queue function outside of the interrupt.
3708 void handle_verify_cap(struct work_struct *work)
3710 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3712 struct hfi1_devdata *dd = ppd->dd;
3714 u8 power_management;
3724 u16 active_tx, active_rx;
3725 u8 partner_supported_crc;
3729 set_link_state(ppd, HLS_VERIFY_CAP);
3731 lcb_shutdown(dd, 0);
3732 adjust_lcb_for_fpga_serdes(dd);
3735 * These are now valid:
3736 * remote VerifyCap fields in the general LNI config
3737 * CSR DC8051_STS_REMOTE_GUID
3738 * CSR DC8051_STS_REMOTE_NODE_TYPE
3739 * CSR DC8051_STS_REMOTE_FM_SECURITY
3740 * CSR DC8051_STS_REMOTE_PORT_NO
3743 read_vc_remote_phy(dd, &power_management, &continious);
3744 read_vc_remote_fabric(
3750 &partner_supported_crc);
3751 read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3752 read_remote_device_id(dd, &device_id, &device_rev);
3754 * And the 'MgmtAllowed' information, which is exchanged during
3755 * LNI, is also be available at this point.
3757 read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3758 /* print the active widths */
3759 get_link_widths(dd, &active_tx, &active_rx);
3761 "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3762 (int)power_management, (int)continious);
3764 "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3769 (int)partner_supported_crc);
3770 dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3771 (u32)remote_tx_rate, (u32)link_widths);
3772 dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3773 (u32)device_id, (u32)device_rev);
3775 * The peer vAU value just read is the peer receiver value. HFI does
3776 * not support a transmit vAU of 0 (AU == 8). We advertised that
3777 * with Z=1 in the fabric capabilities sent to the peer. The peer
3778 * will see our Z=1, and, if it advertised a vAU of 0, will move its
3779 * receive to vAU of 1 (AU == 16). Do the same here. We do not care
3780 * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3781 * subject to the Z value exception.
3785 set_up_vl15(dd, vau, vl15buf);
3787 /* set up the LCB CRC mode */
3788 crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3790 /* order is important: use the lowest bit in common */
3791 if (crc_mask & CAP_CRC_14B)
3792 crc_val = LCB_CRC_14B;
3793 else if (crc_mask & CAP_CRC_48B)
3794 crc_val = LCB_CRC_48B;
3795 else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3796 crc_val = LCB_CRC_12B_16B_PER_LANE;
3798 crc_val = LCB_CRC_16B;
3800 dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3801 write_csr(dd, DC_LCB_CFG_CRC_MODE,
3802 (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3804 /* set (14b only) or clear sideband credit */
3805 reg = read_csr(dd, SEND_CM_CTRL);
3806 if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3807 write_csr(dd, SEND_CM_CTRL,
3808 reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3810 write_csr(dd, SEND_CM_CTRL,
3811 reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3814 ppd->link_speed_active = 0; /* invalid value */
3815 if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3816 /* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3817 switch (remote_tx_rate) {
3819 ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3822 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3826 /* actual rate is highest bit of the ANDed rates */
3827 u8 rate = remote_tx_rate & ppd->local_tx_rate;
3830 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3832 ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3834 if (ppd->link_speed_active == 0) {
3835 dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3836 __func__, (int)remote_tx_rate);
3837 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3841 * Cache the values of the supported, enabled, and active
3842 * LTP CRC modes to return in 'portinfo' queries. But the bit
3843 * flags that are returned in the portinfo query differ from
3844 * what's in the link_crc_mask, crc_sizes, and crc_val
3845 * variables. Convert these here.
3847 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3848 /* supported crc modes */
3849 ppd->port_ltp_crc_mode |=
3850 cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3851 /* enabled crc modes */
3852 ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3853 /* active crc mode */
3855 /* set up the remote credit return table */
3856 assign_remote_cm_au_table(dd, vcu);
3859 * The LCB is reset on entry to handle_verify_cap(), so this must
3860 * be applied on every link up.
3862 * Adjust LCB error kill enable to kill the link if
3863 * these RBUF errors are seen:
3864 * REPLAY_BUF_MBE_SMASK
3865 * FLIT_INPUT_BUF_MBE_SMASK
3867 if (is_a0(dd)) { /* fixed in B0 */
3868 reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3869 reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3870 | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3871 write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3874 /* pull LCB fifos out of reset - all fifo clocks must be stable */
3875 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3877 /* give 8051 access to the LCB CSRs */
3878 write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3879 set_8051_lcb_access(dd);
3881 ppd->neighbor_guid =
3882 read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3883 ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3884 DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3885 ppd->neighbor_type =
3886 read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3887 DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3888 ppd->neighbor_fm_security =
3889 read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3890 DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3892 "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3893 ppd->neighbor_guid, ppd->neighbor_type,
3894 ppd->mgmt_allowed, ppd->neighbor_fm_security);
3895 if (ppd->mgmt_allowed)
3896 add_full_mgmt_pkey(ppd);
3898 /* tell the 8051 to go to LinkUp */
3899 set_link_state(ppd, HLS_GOING_UP);
3903 * Apply the link width downgrade enabled policy against the current active
3906 * Called when the enabled policy changes or the active link widths change.
3908 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3912 u16 lwde = ppd->link_width_downgrade_enabled;
3915 mutex_lock(&ppd->hls_lock);
3916 /* only apply if the link is up */
3917 if (ppd->host_link_state & HLS_UP)
3919 mutex_unlock(&ppd->hls_lock);
3923 if (refresh_widths) {
3924 get_link_widths(ppd->dd, &tx, &rx);
3925 ppd->link_width_downgrade_tx_active = tx;
3926 ppd->link_width_downgrade_rx_active = rx;
3930 /* downgrade is disabled */
3932 /* bounce if not at starting active width */
3933 if ((ppd->link_width_active !=
3934 ppd->link_width_downgrade_tx_active)
3935 || (ppd->link_width_active !=
3936 ppd->link_width_downgrade_rx_active)) {
3938 "Link downgrade is disabled and link has downgraded, downing link\n");
3940 " original 0x%x, tx active 0x%x, rx active 0x%x\n",
3941 ppd->link_width_active,
3942 ppd->link_width_downgrade_tx_active,
3943 ppd->link_width_downgrade_rx_active);
3946 } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3947 || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3948 /* Tx or Rx is outside the enabled policy */
3950 "Link is outside of downgrade allowed, downing link\n");
3952 " enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3954 ppd->link_width_downgrade_tx_active,
3955 ppd->link_width_downgrade_rx_active);
3960 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3961 OPA_LINKDOWN_REASON_WIDTH_POLICY);
3962 set_link_state(ppd, HLS_DN_OFFLINE);
3968 * Handle a link downgrade interrupt from the 8051.
3970 * This is a work-queue function outside of the interrupt.
3972 void handle_link_downgrade(struct work_struct *work)
3974 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3975 link_downgrade_work);
3977 dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3978 apply_link_downgrade_policy(ppd, 1);
3981 static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3983 return flag_string(buf, buf_len, flags, dcc_err_flags,
3984 ARRAY_SIZE(dcc_err_flags));
3987 static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3989 return flag_string(buf, buf_len, flags, lcb_err_flags,
3990 ARRAY_SIZE(lcb_err_flags));
3993 static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3995 return flag_string(buf, buf_len, flags, dc8051_err_flags,
3996 ARRAY_SIZE(dc8051_err_flags));
3999 static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4001 return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4002 ARRAY_SIZE(dc8051_info_err_flags));
4005 static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4007 return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4008 ARRAY_SIZE(dc8051_info_host_msg_flags));
4011 static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4013 struct hfi1_pportdata *ppd = dd->pport;
4014 u64 info, err, host_msg;
4015 int queue_link_down = 0;
4018 /* look at the flags */
4019 if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4020 /* 8051 information set by firmware */
4021 /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4022 info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4023 err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4024 & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4026 DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4027 & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4030 * Handle error flags.
4032 if (err & FAILED_LNI) {
4034 * LNI error indications are cleared by the 8051
4035 * only when starting polling. Only pay attention
4036 * to them when in the states that occur during
4039 if (ppd->host_link_state
4040 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4041 queue_link_down = 1;
4042 dd_dev_info(dd, "Link error: %s\n",
4043 dc8051_info_err_string(buf,
4047 err &= ~(u64)FAILED_LNI;
4050 /* report remaining errors, but do not do anything */
4051 dd_dev_err(dd, "8051 info error: %s\n",
4052 dc8051_info_err_string(buf, sizeof(buf), err));
4056 * Handle host message flags.
4058 if (host_msg & HOST_REQ_DONE) {
4060 * Presently, the driver does a busy wait for
4061 * host requests to complete. This is only an
4062 * informational message.
4063 * NOTE: The 8051 clears the host message
4064 * information *on the next 8051 command*.
4065 * Therefore, when linkup is achieved,
4066 * this flag will still be set.
4068 host_msg &= ~(u64)HOST_REQ_DONE;
4070 if (host_msg & BC_SMA_MSG) {
4071 queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4072 host_msg &= ~(u64)BC_SMA_MSG;
4074 if (host_msg & LINKUP_ACHIEVED) {
4075 dd_dev_info(dd, "8051: Link up\n");
4076 queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4077 host_msg &= ~(u64)LINKUP_ACHIEVED;
4079 if (host_msg & EXT_DEVICE_CFG_REQ) {
4080 handle_8051_request(dd);
4081 host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4083 if (host_msg & VERIFY_CAP_FRAME) {
4084 queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4085 host_msg &= ~(u64)VERIFY_CAP_FRAME;
4087 if (host_msg & LINK_GOING_DOWN) {
4088 const char *extra = "";
4089 /* no downgrade action needed if going down */
4090 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4092 extra = " (ignoring downgrade)";
4094 dd_dev_info(dd, "8051: Link down%s\n", extra);
4095 queue_link_down = 1;
4096 host_msg &= ~(u64)LINK_GOING_DOWN;
4098 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099 queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4100 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4103 /* report remaining messages, but do not do anything */
4104 dd_dev_info(dd, "8051 info host message: %s\n",
4105 dc8051_info_host_msg_string(buf, sizeof(buf),
4109 reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4111 if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4113 * Lost the 8051 heartbeat. If this happens, we
4114 * receive constant interrupts about it. Disable
4115 * the interrupt after the first.
4117 dd_dev_err(dd, "Lost 8051 heartbeat\n");
4118 write_csr(dd, DC_DC8051_ERR_EN,
4119 read_csr(dd, DC_DC8051_ERR_EN)
4120 & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4122 reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4125 /* report the error, but do not do anything */
4126 dd_dev_err(dd, "8051 error: %s\n",
4127 dc8051_err_string(buf, sizeof(buf), reg));
4130 if (queue_link_down) {
4131 /* if the link is already going down or disabled, do not
4133 if ((ppd->host_link_state
4134 & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4135 || ppd->link_enabled == 0) {
4136 dd_dev_info(dd, "%s: not queuing link down\n",
4139 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4144 static const char * const fm_config_txt[] = {
4146 "BadHeadDist: Distance violation between two head flits",
4148 "BadTailDist: Distance violation between two tail flits",
4150 "BadCtrlDist: Distance violation between two credit control flits",
4152 "BadCrdAck: Credits return for unsupported VL",
4154 "UnsupportedVLMarker: Received VL Marker",
4156 "BadPreempt: Exceeded the preemption nesting level",
4158 "BadControlFlit: Received unsupported control flit",
4161 "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4164 static const char * const port_rcv_txt[] = {
4166 "BadPktLen: Illegal PktLen",
4168 "PktLenTooLong: Packet longer than PktLen",
4170 "PktLenTooShort: Packet shorter than PktLen",
4172 "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4174 "BadDLID: Illegal DLID (0, doesn't match HFI)",
4176 "BadL2: Illegal L2 opcode",
4178 "BadSC: Unsupported SC",
4180 "BadRC: Illegal RC",
4182 "PreemptError: Preempting with same VL",
4184 "PreemptVL15: Preempting a VL15 packet",
4187 #define OPA_LDR_FMCONFIG_OFFSET 16
4188 #define OPA_LDR_PORTRCV_OFFSET 0
4189 static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4191 u64 info, hdr0, hdr1;
4194 struct hfi1_pportdata *ppd = dd->pport;
4198 if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4199 if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4200 info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4201 dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4202 /* set status bit */
4203 dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4205 reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4208 if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4209 struct hfi1_pportdata *ppd = dd->pport;
4210 /* this counter saturates at (2^32) - 1 */
4211 if (ppd->link_downed < (u32)UINT_MAX)
4213 reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4216 if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4217 u8 reason_valid = 1;
4219 info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4220 if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4221 dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4222 /* set status bit */
4223 dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4233 extra = fm_config_txt[info];
4236 extra = fm_config_txt[info];
4237 if (ppd->port_error_action &
4238 OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4241 * lcl_reason cannot be derived from info
4245 OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4250 snprintf(buf, sizeof(buf), "reserved%lld", info);
4255 if (reason_valid && !do_bounce) {
4256 do_bounce = ppd->port_error_action &
4257 (1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4258 lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4261 /* just report this */
4262 dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4263 reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4266 if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4267 u8 reason_valid = 1;
4269 info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4270 hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4271 hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4272 if (!(dd->err_info_rcvport.status_and_code &
4273 OPA_EI_STATUS_SMASK)) {
4274 dd->err_info_rcvport.status_and_code =
4275 info & OPA_EI_CODE_SMASK;
4276 /* set status bit */
4277 dd->err_info_rcvport.status_and_code |=
4278 OPA_EI_STATUS_SMASK;
4279 /* save first 2 flits in the packet that caused
4281 dd->err_info_rcvport.packet_flit1 = hdr0;
4282 dd->err_info_rcvport.packet_flit2 = hdr1;
4295 extra = port_rcv_txt[info];
4299 snprintf(buf, sizeof(buf), "reserved%lld", info);
4304 if (reason_valid && !do_bounce) {
4305 do_bounce = ppd->port_error_action &
4306 (1 << (OPA_LDR_PORTRCV_OFFSET + info));
4307 lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4310 /* just report this */
4311 dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4312 dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n",
4315 reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4318 if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4319 /* informative only */
4320 dd_dev_info(dd, "8051 access to LCB blocked\n");
4321 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4323 if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4324 /* informative only */
4325 dd_dev_info(dd, "host access to LCB blocked\n");
4326 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4329 /* report any remaining errors */
4331 dd_dev_info(dd, "DCC Error: %s\n",
4332 dcc_err_string(buf, sizeof(buf), reg));
4334 if (lcl_reason == 0)
4335 lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4338 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4339 set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4340 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4344 static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4348 dd_dev_info(dd, "LCB Error: %s\n",
4349 lcb_err_string(buf, sizeof(buf), reg));
4353 * CCE block DC interrupt. Source is < 8.
4355 static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4357 const struct err_reg_info *eri = &dc_errs[source];
4360 interrupt_clear_down(dd, 0, eri);
4361 } else if (source == 3 /* dc_lbm_int */) {
4363 * This indicates that a parity error has occurred on the
4364 * address/control lines presented to the LBM. The error
4365 * is a single pulse, there is no associated error flag,
4366 * and it is non-maskable. This is because if a parity
4367 * error occurs on the request the request is dropped.
4368 * This should never occur, but it is nice to know if it
4371 dd_dev_err(dd, "Parity error in DC LBM block\n");
4373 dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4378 * TX block send credit interrupt. Source is < 160.
4380 static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4382 sc_group_release_update(dd, source);
4386 * TX block SDMA interrupt. Source is < 48.
4388 * SDMA interrupts are grouped by type:
4391 * N - 2N-1 = SDmaProgress
4392 * 2N - 3N-1 = SDmaIdle
4394 static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4396 /* what interrupt */
4397 unsigned int what = source / TXE_NUM_SDMA_ENGINES;
4399 unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4401 #ifdef CONFIG_SDMA_VERBOSITY
4402 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4403 slashstrip(__FILE__), __LINE__, __func__);
4404 sdma_dumpstate(&dd->per_sdma[which]);
4407 if (likely(what < 3 && which < dd->num_sdma)) {
4408 sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4410 /* should not happen */
4411 dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4416 * RX block receive available interrupt. Source is < 160.
4418 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4420 struct hfi1_ctxtdata *rcd;
4423 if (likely(source < dd->num_rcv_contexts)) {
4424 rcd = dd->rcd[source];
4426 if (source < dd->first_user_ctxt)
4427 rcd->do_interrupt(rcd, 0);
4429 handle_user_interrupt(rcd);
4432 /* received an interrupt, but no rcd */
4433 err_detail = "dataless";
4435 /* received an interrupt, but are not using that context */
4436 err_detail = "out of range";
4438 dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4439 err_detail, source);
4443 * RX block receive urgent interrupt. Source is < 160.
4445 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4447 struct hfi1_ctxtdata *rcd;
4450 if (likely(source < dd->num_rcv_contexts)) {
4451 rcd = dd->rcd[source];
4453 /* only pay attention to user urgent interrupts */
4454 if (source >= dd->first_user_ctxt)
4455 handle_user_interrupt(rcd);
4458 /* received an interrupt, but no rcd */
4459 err_detail = "dataless";
4461 /* received an interrupt, but are not using that context */
4462 err_detail = "out of range";
4464 dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4465 err_detail, source);
4469 * Reserved range interrupt. Should not be called in normal operation.
4471 static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4475 dd_dev_err(dd, "unexpected %s interrupt\n",
4476 is_reserved_name(name, sizeof(name), source));
4479 static const struct is_table is_table[] = {
4481 name func interrupt func */
4482 { IS_GENERAL_ERR_START, IS_GENERAL_ERR_END,
4483 is_misc_err_name, is_misc_err_int },
4484 { IS_SDMAENG_ERR_START, IS_SDMAENG_ERR_END,
4485 is_sdma_eng_err_name, is_sdma_eng_err_int },
4486 { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4487 is_sendctxt_err_name, is_sendctxt_err_int },
4488 { IS_SDMA_START, IS_SDMA_END,
4489 is_sdma_eng_name, is_sdma_eng_int },
4490 { IS_VARIOUS_START, IS_VARIOUS_END,
4491 is_various_name, is_various_int },
4492 { IS_DC_START, IS_DC_END,
4493 is_dc_name, is_dc_int },
4494 { IS_RCVAVAIL_START, IS_RCVAVAIL_END,
4495 is_rcv_avail_name, is_rcv_avail_int },
4496 { IS_RCVURGENT_START, IS_RCVURGENT_END,
4497 is_rcv_urgent_name, is_rcv_urgent_int },
4498 { IS_SENDCREDIT_START, IS_SENDCREDIT_END,
4499 is_send_credit_name, is_send_credit_int},
4500 { IS_RESERVED_START, IS_RESERVED_END,
4501 is_reserved_name, is_reserved_int},
4505 * Interrupt source interrupt - called when the given source has an interrupt.
4506 * Source is a bit index into an array of 64-bit integers.
4508 static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4510 const struct is_table *entry;
4512 /* avoids a double compare by walking the table in-order */
4513 for (entry = &is_table[0]; entry->is_name; entry++) {
4514 if (source < entry->end) {
4515 trace_hfi1_interrupt(dd, entry, source);
4516 entry->is_int(dd, source - entry->start);
4520 /* fell off the end */
4521 dd_dev_err(dd, "invalid interrupt source %u\n", source);
4525 * General interrupt handler. This is able to correctly handle
4526 * all interrupts in case INTx is used.
4528 static irqreturn_t general_interrupt(int irq, void *data)
4530 struct hfi1_devdata *dd = data;
4531 u64 regs[CCE_NUM_INT_CSRS];
4535 this_cpu_inc(*dd->int_counter);
4537 /* phase 1: scan and clear all handled interrupts */
4538 for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4539 if (dd->gi_mask[i] == 0) {
4540 regs[i] = 0; /* used later */
4543 regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4545 /* only clear if anything is set */
4547 write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4550 /* phase 2: call the appropriate handler */
4551 for_each_set_bit(bit, (unsigned long *)®s[0],
4552 CCE_NUM_INT_CSRS*64) {
4553 is_interrupt(dd, bit);
4559 static irqreturn_t sdma_interrupt(int irq, void *data)
4561 struct sdma_engine *sde = data;
4562 struct hfi1_devdata *dd = sde->dd;
4565 #ifdef CONFIG_SDMA_VERBOSITY
4566 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4567 slashstrip(__FILE__), __LINE__, __func__);
4568 sdma_dumpstate(sde);
4571 this_cpu_inc(*dd->int_counter);
4573 /* This read_csr is really bad in the hot path */
4574 status = read_csr(dd,
4575 CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4577 if (likely(status)) {
4578 /* clear the interrupt(s) */
4580 CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4583 /* handle the interrupt(s) */
4584 sdma_engine_interrupt(sde, status);
4586 dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4593 * Clear the receive interrupt, forcing the write and making sure
4594 * we have data from the chip, pushing everything in front of it
4597 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4599 struct hfi1_devdata *dd = rcd->dd;
4600 u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4602 mmiowb(); /* make sure everything before is written */
4603 write_csr(dd, addr, rcd->imask);
4604 /* force the above write on the chip and get a value back */
4605 (void)read_csr(dd, addr);
4608 /* force the receive interrupt */
4609 static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4611 write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4614 /* return non-zero if a packet is present */
4615 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4617 if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4618 return (rcd->seq_cnt ==
4619 rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4621 /* else is RDMA rtail */
4622 return (rcd->head != get_rcvhdrtail(rcd));
4626 * Receive packet IRQ handler. This routine expects to be on its own IRQ.
4627 * This routine will try to handle packets immediately (latency), but if
4628 * it finds too many, it will invoke the thread handler (bandwitdh). The
4629 * chip receive interupt is *not* cleared down until this or the thread (if
4630 * invoked) is finished. The intent is to avoid extra interrupts while we
4631 * are processing packets anyway.
4633 static irqreturn_t receive_context_interrupt(int irq, void *data)
4635 struct hfi1_ctxtdata *rcd = data;
4636 struct hfi1_devdata *dd = rcd->dd;
4640 trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4641 this_cpu_inc(*dd->int_counter);
4643 /* receive interrupt remains blocked while processing packets */
4644 disposition = rcd->do_interrupt(rcd, 0);
4647 * Too many packets were seen while processing packets in this
4648 * IRQ handler. Invoke the handler thread. The receive interrupt
4651 if (disposition == RCV_PKT_LIMIT)
4652 return IRQ_WAKE_THREAD;
4655 * The packet processor detected no more packets. Clear the receive
4656 * interrupt and recheck for a packet packet that may have arrived
4657 * after the previous check and interrupt clear. If a packet arrived,
4658 * force another interrupt.
4660 clear_recv_intr(rcd);
4661 present = check_packet_present(rcd);
4663 force_recv_intr(rcd);
4669 * Receive packet thread handler. This expects to be invoked with the
4670 * receive interrupt still blocked.
4672 static irqreturn_t receive_context_thread(int irq, void *data)
4674 struct hfi1_ctxtdata *rcd = data;
4677 /* receive interrupt is still blocked from the IRQ handler */
4678 (void)rcd->do_interrupt(rcd, 1);
4681 * The packet processor will only return if it detected no more
4682 * packets. Hold IRQs here so we can safely clear the interrupt and
4683 * recheck for a packet that may have arrived after the previous
4684 * check and the interrupt clear. If a packet arrived, force another
4687 local_irq_disable();
4688 clear_recv_intr(rcd);
4689 present = check_packet_present(rcd);
4691 force_recv_intr(rcd);
4697 /* ========================================================================= */
4699 u32 read_physical_state(struct hfi1_devdata *dd)
4703 reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4704 return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4705 & DC_DC8051_STS_CUR_STATE_PORT_MASK;
4708 static u32 read_logical_state(struct hfi1_devdata *dd)
4712 reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4713 return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4714 & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4717 static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4721 reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4722 /* clear current state, set new state */
4723 reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4724 reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4725 write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4729 * Use the 8051 to read a LCB CSR.
4731 static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4736 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4737 if (acquire_lcb_access(dd, 0) == 0) {
4738 *data = read_csr(dd, addr);
4739 release_lcb_access(dd, 0);
4745 /* register is an index of LCB registers: (offset - base) / 8 */
4746 regno = (addr - DC_LCB_CFG_RUN) >> 3;
4747 ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4748 if (ret != HCMD_SUCCESS)
4754 * Read an LCB CSR. Access may not be in host control, so check.
4755 * Return 0 on success, -EBUSY on failure.
4757 int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4759 struct hfi1_pportdata *ppd = dd->pport;
4761 /* if up, go through the 8051 for the value */
4762 if (ppd->host_link_state & HLS_UP)
4763 return read_lcb_via_8051(dd, addr, data);
4764 /* if going up or down, no access */
4765 if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4767 /* otherwise, host has access */
4768 *data = read_csr(dd, addr);
4773 * Use the 8051 to write a LCB CSR.
4775 static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4778 if (acquire_lcb_access(dd, 0) == 0) {
4779 write_csr(dd, addr, data);
4780 release_lcb_access(dd, 0);
4787 * Write an LCB CSR. Access may not be in host control, so check.
4788 * Return 0 on success, -EBUSY on failure.
4790 int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4792 struct hfi1_pportdata *ppd = dd->pport;
4794 /* if up, go through the 8051 for the value */
4795 if (ppd->host_link_state & HLS_UP)
4796 return write_lcb_via_8051(dd, addr, data);
4797 /* if going up or down, no access */
4798 if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4800 /* otherwise, host has access */
4801 write_csr(dd, addr, data);
4807 * < 0 = Linux error, not able to get access
4808 * > 0 = 8051 command RETURN_CODE
4810 static int do_8051_command(
4811 struct hfi1_devdata *dd,
4818 unsigned long flags;
4819 unsigned long timeout;
4821 hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4824 * Alternative to holding the lock for a long time:
4825 * - keep busy wait - have other users bounce off
4827 spin_lock_irqsave(&dd->dc8051_lock, flags);
4829 /* We can't send any commands to the 8051 if it's in reset */
4830 if (dd->dc_shutdown) {
4831 return_code = -ENODEV;
4836 * If an 8051 host command timed out previously, then the 8051 is
4839 * On first timeout, attempt to reset and restart the entire DC
4840 * block (including 8051). (Is this too big of a hammer?)
4842 * If the 8051 times out a second time, the reset did not bring it
4843 * back to healthy life. In that case, fail any subsequent commands.
4845 if (dd->dc8051_timed_out) {
4846 if (dd->dc8051_timed_out > 1) {
4848 "Previous 8051 host command timed out, skipping command %u\n",
4850 return_code = -ENXIO;
4853 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4856 spin_lock_irqsave(&dd->dc8051_lock, flags);
4860 * If there is no timeout, then the 8051 command interface is
4861 * waiting for a command.
4865 * Do two writes: the first to stabilize the type and req_data, the
4866 * second to activate.
4868 reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4869 << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4870 | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4871 << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4872 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4873 reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4874 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4876 /* wait for completion, alternate: interrupt */
4877 timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4879 reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4880 completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4883 if (time_after(jiffies, timeout)) {
4884 dd->dc8051_timed_out++;
4885 dd_dev_err(dd, "8051 host command %u timeout\n", type);
4888 return_code = -ETIMEDOUT;
4895 *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4896 & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4897 if (type == HCMD_READ_LCB_CSR) {
4898 /* top 16 bits are in a different register */
4899 *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4900 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4902 - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4905 return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4906 & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4907 dd->dc8051_timed_out = 0;
4909 * Clear command for next user.
4911 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4914 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4919 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4921 return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4924 static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4925 u8 lane_id, u32 config_data)
4930 data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4931 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4932 | (u64)config_data << LOAD_DATA_DATA_SHIFT;
4933 ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4934 if (ret != HCMD_SUCCESS) {
4936 "load 8051 config: field id %d, lane %d, err %d\n",
4937 (int)field_id, (int)lane_id, ret);
4943 * Read the 8051 firmware "registers". Use the RAM directly. Always
4944 * set the result, even on error.
4945 * Return 0 on success, -errno on failure
4947 static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4954 /* address start depends on the lane_id */
4956 addr = (4 * NUM_GENERAL_FIELDS)
4957 + (lane_id * 4 * NUM_LANE_FIELDS);
4960 addr += field_id * 4;
4962 /* read is in 8-byte chunks, hardware will truncate the address down */
4963 ret = read_8051_data(dd, addr, 8, &big_data);
4966 /* extract the 4 bytes we want */
4968 *result = (u32)(big_data >> 32);
4970 *result = (u32)big_data;
4973 dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4974 __func__, lane_id, field_id);
4980 static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4985 frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4986 | power_management << POWER_MANAGEMENT_SHIFT;
4987 return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4988 GENERAL_CONFIG, frame);
4991 static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4992 u16 vl15buf, u8 crc_sizes)
4996 frame = (u32)vau << VAU_SHIFT
4998 | (u32)vcu << VCU_SHIFT
4999 | (u32)vl15buf << VL15BUF_SHIFT
5000 | (u32)crc_sizes << CRC_SIZES_SHIFT;
5001 return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5002 GENERAL_CONFIG, frame);
5005 static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5006 u8 *flag_bits, u16 *link_widths)
5010 read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5012 *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5013 *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5014 *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5017 static int write_vc_local_link_width(struct hfi1_devdata *dd,
5024 frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5025 | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5026 | (u32)link_widths << LINK_WIDTH_SHIFT;
5027 return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5031 static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5036 frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5037 | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5038 return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5041 static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5046 read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5047 *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5048 *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5049 & REMOTE_DEVICE_REV_MASK;
5052 void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5056 read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5057 *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5058 *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5061 static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5066 read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5067 *power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5068 & POWER_MANAGEMENT_MASK;
5069 *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5070 & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5073 static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5074 u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5078 read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5079 *vau = (frame >> VAU_SHIFT) & VAU_MASK;
5080 *z = (frame >> Z_SHIFT) & Z_MASK;
5081 *vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5082 *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5083 *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5086 static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5092 read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5094 *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5095 & REMOTE_TX_RATE_MASK;
5096 *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5099 static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5103 read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5104 *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5107 static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5111 read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5112 *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5115 static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5117 read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5120 static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5122 read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5125 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5131 if (dd->pport->host_link_state & HLS_UP) {
5132 ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5135 *link_quality = (frame >> LINK_QUALITY_SHIFT)
5136 & LINK_QUALITY_MASK;
5140 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5144 read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5145 *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5148 static int read_tx_settings(struct hfi1_devdata *dd,
5150 u8 *tx_polarity_inversion,
5151 u8 *rx_polarity_inversion,
5157 ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5158 *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5159 & ENABLE_LANE_TX_MASK;
5160 *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5161 & TX_POLARITY_INVERSION_MASK;
5162 *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5163 & RX_POLARITY_INVERSION_MASK;
5164 *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5168 static int write_tx_settings(struct hfi1_devdata *dd,
5170 u8 tx_polarity_inversion,
5171 u8 rx_polarity_inversion,
5176 /* no need to mask, all variable sizes match field widths */
5177 frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5178 | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5179 | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5180 | max_rate << MAX_RATE_SHIFT;
5181 return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5184 static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5186 u32 frame, version, prod_id;
5190 for (lane = 0; lane < 4; lane++) {
5191 ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5195 "Unable to read lane %d firmware details\n",
5199 version = (frame >> SPICO_ROM_VERSION_SHIFT)
5200 & SPICO_ROM_VERSION_MASK;
5201 prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5202 & SPICO_ROM_PROD_ID_MASK;
5204 "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5205 lane, version, prod_id);
5210 * Read an idle LCB message.
5212 * Returns 0 on success, -EINVAL on error
5214 static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5218 ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5220 if (ret != HCMD_SUCCESS) {
5221 dd_dev_err(dd, "read idle message: type %d, err %d\n",
5225 dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5226 /* return only the payload as we already know the type */
5227 *data_out >>= IDLE_PAYLOAD_SHIFT;
5232 * Read an idle SMA message. To be done in response to a notification from
5235 * Returns 0 on success, -EINVAL on error
5237 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5239 return read_idle_message(dd,
5240 (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5244 * Send an idle LCB message.
5246 * Returns 0 on success, -EINVAL on error
5248 static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5252 dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5253 ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5254 if (ret != HCMD_SUCCESS) {
5255 dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5263 * Send an idle SMA message.
5265 * Returns 0 on success, -EINVAL on error
5267 int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5271 data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5272 | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5273 return send_idle_message(dd, data);
5277 * Initialize the LCB then do a quick link up. This may or may not be
5280 * return 0 on success, -errno on error
5282 static int do_quick_linkup(struct hfi1_devdata *dd)
5285 unsigned long timeout;
5288 lcb_shutdown(dd, 0);
5291 /* LCB_CFG_LOOPBACK.VAL = 2 */
5292 /* LCB_CFG_LANE_WIDTH.VAL = 0 */
5293 write_csr(dd, DC_LCB_CFG_LOOPBACK,
5294 IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5295 write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5298 /* start the LCBs */
5299 /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5300 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5302 /* simulator only loopback steps */
5303 if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5304 /* LCB_CFG_RUN.EN = 1 */
5305 write_csr(dd, DC_LCB_CFG_RUN,
5306 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5308 /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5309 timeout = jiffies + msecs_to_jiffies(10);
5312 DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5315 if (time_after(jiffies, timeout)) {
5317 "timeout waiting for LINK_TRANSFER_ACTIVE\n");
5323 write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5324 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5329 * When doing quick linkup and not in loopback, both
5330 * sides must be done with LCB set-up before either
5331 * starts the quick linkup. Put a delay here so that
5332 * both sides can be started and have a chance to be
5333 * done with LCB set up before resuming.
5336 "Pausing for peer to be finished with LCB set up\n");
5339 "Continuing with quick linkup\n");
5342 write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5343 set_8051_lcb_access(dd);
5346 * State "quick" LinkUp request sets the physical link state to
5347 * LinkUp without a verify capability sequence.
5348 * This state is in simulator v37 and later.
5350 ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5351 if (ret != HCMD_SUCCESS) {
5353 "%s: set physical link state to quick LinkUp failed with return %d\n",
5356 set_host_lcb_access(dd);
5357 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5364 return 0; /* success */
5368 * Set the SerDes to internal loopback mode.
5369 * Returns 0 on success, -errno on error.
5371 static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5375 ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5376 if (ret == HCMD_SUCCESS)
5379 "Set physical link state to SerDes Loopback failed with return %d\n",
5387 * Do all special steps to set up loopback.
5389 static int init_loopback(struct hfi1_devdata *dd)
5391 dd_dev_info(dd, "Entering loopback mode\n");
5393 /* all loopbacks should disable self GUID check */
5394 write_csr(dd, DC_DC8051_CFG_MODE,
5395 (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5398 * The simulator has only one loopback option - LCB. Switch
5399 * to that option, which includes quick link up.
5401 * Accept all valid loopback values.
5403 if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5404 && (loopback == LOOPBACK_SERDES
5405 || loopback == LOOPBACK_LCB
5406 || loopback == LOOPBACK_CABLE)) {
5407 loopback = LOOPBACK_LCB;
5412 /* handle serdes loopback */
5413 if (loopback == LOOPBACK_SERDES) {
5414 /* internal serdes loopack needs quick linkup on RTL */
5415 if (dd->icode == ICODE_RTL_SILICON)
5417 return set_serdes_loopback_mode(dd);
5420 /* LCB loopback - handled at poll time */
5421 if (loopback == LOOPBACK_LCB) {
5422 quick_linkup = 1; /* LCB is always quick linkup */
5424 /* not supported in emulation due to emulation RTL changes */
5425 if (dd->icode == ICODE_FPGA_EMULATION) {
5427 "LCB loopback not supported in emulation\n");
5433 /* external cable loopback requires no extra steps */
5434 if (loopback == LOOPBACK_CABLE)
5437 dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5442 * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5443 * used in the Verify Capability link width attribute.
5445 static u16 opa_to_vc_link_widths(u16 opa_widths)
5450 static const struct link_bits {
5453 } opa_link_xlate[] = {
5454 { OPA_LINK_WIDTH_1X, 1 << (1-1) },
5455 { OPA_LINK_WIDTH_2X, 1 << (2-1) },
5456 { OPA_LINK_WIDTH_3X, 1 << (3-1) },
5457 { OPA_LINK_WIDTH_4X, 1 << (4-1) },
5460 for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5461 if (opa_widths & opa_link_xlate[i].from)
5462 result |= opa_link_xlate[i].to;
5468 * Set link attributes before moving to polling.
5470 static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5472 struct hfi1_devdata *dd = ppd->dd;
5474 u8 tx_polarity_inversion;
5475 u8 rx_polarity_inversion;
5478 /* reset our fabric serdes to clear any lingering problems */
5479 fabric_serdes_reset(dd);
5481 /* set the local tx rate - need to read-modify-write */
5482 ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5483 &rx_polarity_inversion, &ppd->local_tx_rate);
5485 goto set_local_link_attributes_fail;
5487 if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5488 /* set the tx rate to the fastest enabled */
5489 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5490 ppd->local_tx_rate = 1;
5492 ppd->local_tx_rate = 0;
5494 /* set the tx rate to all enabled */
5495 ppd->local_tx_rate = 0;
5496 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5497 ppd->local_tx_rate |= 2;
5498 if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5499 ppd->local_tx_rate |= 1;
5502 enable_lane_tx = 0xF; /* enable all four lanes */
5503 ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5504 rx_polarity_inversion, ppd->local_tx_rate);
5505 if (ret != HCMD_SUCCESS)
5506 goto set_local_link_attributes_fail;
5509 * DC supports continuous updates.
5511 ret = write_vc_local_phy(dd, 0 /* no power management */,
5512 1 /* continuous updates */);
5513 if (ret != HCMD_SUCCESS)
5514 goto set_local_link_attributes_fail;
5516 /* z=1 in the next call: AU of 0 is not supported by the hardware */
5517 ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5518 ppd->port_crc_mode_enabled);
5519 if (ret != HCMD_SUCCESS)
5520 goto set_local_link_attributes_fail;
5522 ret = write_vc_local_link_width(dd, 0, 0,
5523 opa_to_vc_link_widths(ppd->link_width_enabled));
5524 if (ret != HCMD_SUCCESS)
5525 goto set_local_link_attributes_fail;
5527 /* let peer know who we are */
5528 ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5529 if (ret == HCMD_SUCCESS)
5532 set_local_link_attributes_fail:
5534 "Failed to set local link attributes, return 0x%x\n",
5540 * Call this to start the link. Schedule a retry if the cable is not
5541 * present or if unable to start polling. Do not do anything if the
5542 * link is disabled. Returns 0 if link is disabled or moved to polling
5544 int start_link(struct hfi1_pportdata *ppd)
5546 if (!ppd->link_enabled) {
5547 dd_dev_info(ppd->dd,
5548 "%s: stopping link start because link is disabled\n",
5552 if (!ppd->driver_link_ready) {
5553 dd_dev_info(ppd->dd,
5554 "%s: stopping link start because driver is not ready\n",
5559 if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5560 loopback == LOOPBACK_LCB ||
5561 ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5562 return set_link_state(ppd, HLS_DN_POLL);
5564 dd_dev_info(ppd->dd,
5565 "%s: stopping link start because no cable is present\n",
5570 static void reset_qsfp(struct hfi1_pportdata *ppd)
5572 struct hfi1_devdata *dd = ppd->dd;
5573 u64 mask, qsfp_mask;
5575 mask = (u64)QSFP_HFI0_RESET_N;
5576 qsfp_mask = read_csr(dd,
5577 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5580 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5583 qsfp_mask = read_csr(dd,
5584 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5587 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5594 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5598 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5599 u8 *qsfp_interrupt_status)
5601 struct hfi1_devdata *dd = ppd->dd;
5603 if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5604 (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5606 "%s: QSFP cable on fire\n",
5609 if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5610 (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5612 "%s: QSFP cable temperature too low\n",
5615 if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5616 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5618 "%s: QSFP supply voltage too high\n",
5621 if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5622 (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5624 "%s: QSFP supply voltage too low\n",
5627 /* Byte 2 is vendor specific */
5629 if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5630 (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5632 "%s: Cable RX channel 1/2 power too high\n",
5635 if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5636 (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5638 "%s: Cable RX channel 1/2 power too low\n",
5641 if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5642 (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5644 "%s: Cable RX channel 3/4 power too high\n",
5647 if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5648 (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5650 "%s: Cable RX channel 3/4 power too low\n",
5653 if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5654 (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5656 "%s: Cable TX channel 1/2 bias too high\n",
5659 if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5660 (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5662 "%s: Cable TX channel 1/2 bias too low\n",
5665 if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5666 (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5668 "%s: Cable TX channel 3/4 bias too high\n",
5671 if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5672 (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5674 "%s: Cable TX channel 3/4 bias too low\n",
5677 if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5678 (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5680 "%s: Cable TX channel 1/2 power too high\n",
5683 if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5684 (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5686 "%s: Cable TX channel 1/2 power too low\n",
5689 if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5690 (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5692 "%s: Cable TX channel 3/4 power too high\n",
5695 if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5696 (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5698 "%s: Cable TX channel 3/4 power too low\n",
5701 /* Bytes 9-10 and 11-12 are reserved */
5702 /* Bytes 13-15 are vendor specific */
5707 static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5709 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5714 static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5716 struct hfi1_devdata *dd = ppd->dd;
5717 u8 qsfp_interrupt_status = 0;
5719 if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5722 "%s: Failed to read status of QSFP module\n",
5727 /* We don't care about alarms & warnings with a non-functional INT_N */
5728 if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5729 do_pre_lni_host_behaviors(ppd);
5734 /* This routine will only be scheduled if the QSFP module is present */
5735 static void qsfp_event(struct work_struct *work)
5737 struct qsfp_data *qd;
5738 struct hfi1_pportdata *ppd;
5739 struct hfi1_devdata *dd;
5741 qd = container_of(work, struct qsfp_data, qsfp_work);
5746 if (!qsfp_mod_present(ppd))
5750 * Turn DC back on after cables has been
5751 * re-inserted. Up until now, the DC has been in
5752 * reset to save power.
5756 if (qd->cache_refresh_required) {
5760 /* Check for QSFP interrupt after t_init (SFF 8679)
5764 if (!qd->qsfp_interrupt_functional) {
5765 if (do_qsfp_intr_fallback(ppd) < 0)
5766 dd_dev_info(dd, "%s: QSFP fallback failed\n",
5768 ppd->driver_link_ready = 1;
5773 if (qd->check_interrupt_flags) {
5774 u8 qsfp_interrupt_status[16] = {0,};
5776 if (qsfp_read(ppd, dd->hfi1_id, 6,
5777 &qsfp_interrupt_status[0], 16) != 16) {
5779 "%s: Failed to read status of QSFP module\n",
5782 unsigned long flags;
5785 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5786 ppd->qsfp_info.check_interrupt_flags = 0;
5787 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5790 if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5793 "%s: Failed to read status of QSFP module\n",
5796 if (!(data_status & QSFP_DATA_NOT_READY)) {
5797 do_pre_lni_host_behaviors(ppd);
5800 handle_qsfp_error_conditions(ppd,
5801 qsfp_interrupt_status);
5806 void init_qsfp(struct hfi1_pportdata *ppd)
5808 struct hfi1_devdata *dd = ppd->dd;
5811 if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5812 ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5813 ppd->driver_link_ready = 1;
5817 ppd->qsfp_info.ppd = ppd;
5818 INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5820 qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5821 /* Clear current status to avoid spurious interrupts */
5828 /* Handle active low nature of INT_N and MODPRST_N pins */
5829 if (qsfp_mod_present(ppd))
5830 qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5832 dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5835 /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5836 qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5838 dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5841 if (qsfp_mod_present(ppd)) {
5845 /* Check for QSFP interrupt after t_init (SFF 8679)
5849 if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5850 if (do_qsfp_intr_fallback(ppd) < 0)
5852 "%s: QSFP fallback failed\n",
5854 ppd->driver_link_ready = 1;
5859 int bringup_serdes(struct hfi1_pportdata *ppd)
5861 struct hfi1_devdata *dd = ppd->dd;
5865 if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5866 add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5871 guid = dd->base_guid + ppd->port - 1;
5875 /* the link defaults to enabled */
5876 ppd->link_enabled = 1;
5877 /* Set linkinit_reason on power up per OPA spec */
5878 ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5881 ret = init_loopback(dd);
5886 return start_link(ppd);
5889 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5891 struct hfi1_devdata *dd = ppd->dd;
5894 * Shut down the link and keep it down. First turn off that the
5895 * driver wants to allow the link to be up (driver_link_ready).
5896 * Then make sure the link is not automatically restarted
5897 * (link_enabled). Cancel any pending restart. And finally
5900 ppd->driver_link_ready = 0;
5901 ppd->link_enabled = 0;
5903 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5904 OPA_LINKDOWN_REASON_SMA_DISABLED);
5905 set_link_state(ppd, HLS_DN_OFFLINE);
5907 /* disable the port */
5908 clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5909 cancel_work_sync(&ppd->freeze_work);
5912 static inline int init_cpu_counters(struct hfi1_devdata *dd)
5914 struct hfi1_pportdata *ppd;
5917 ppd = (struct hfi1_pportdata *)(dd + 1);
5918 for (i = 0; i < dd->num_pports; i++, ppd++) {
5919 ppd->ibport_data.rc_acks = NULL;
5920 ppd->ibport_data.rc_qacks = NULL;
5921 ppd->ibport_data.rc_acks = alloc_percpu(u64);
5922 ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5923 ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5924 if ((ppd->ibport_data.rc_acks == NULL) ||
5925 (ppd->ibport_data.rc_delayed_comp == NULL) ||
5926 (ppd->ibport_data.rc_qacks == NULL))
5933 static const char * const pt_names[] = {
5939 static const char *pt_name(u32 type)
5941 return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5945 * index is the index into the receive array
5947 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5948 u32 type, unsigned long pa, u16 order)
5951 void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5952 (dd->kregbase + RCV_ARRAY));
5954 if (!(dd->flags & HFI1_PRESENT))
5957 if (type == PT_INVALID) {
5959 } else if (type > PT_INVALID) {
5961 "unexpected receive array type %u for index %u, not handled\n",
5966 hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5967 pt_name(type), index, pa, (unsigned long)order);
5969 #define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
5970 reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5971 | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5972 | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5973 << RCV_ARRAY_RT_ADDR_SHIFT;
5974 writeq(reg, base + (index * 8));
5976 if (type == PT_EAGER)
5978 * Eager entries are written one-by-one so we have to push them
5979 * after we write the entry.
5986 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5988 struct hfi1_devdata *dd = rcd->dd;
5991 /* this could be optimized */
5992 for (i = rcd->eager_base; i < rcd->eager_base +
5993 rcd->egrbufs.alloced; i++)
5994 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5996 for (i = rcd->expected_base;
5997 i < rcd->expected_base + rcd->expected_count; i++)
5998 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
6001 int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6002 struct hfi1_ctxt_info *kinfo)
6004 kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6005 HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6009 struct hfi1_message_header *hfi1_get_msgheader(
6010 struct hfi1_devdata *dd, __le32 *rhf_addr)
6012 u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6014 return (struct hfi1_message_header *)
6015 (rhf_addr - dd->rhf_offset + offset);
6018 static const char * const ib_cfg_name_strings[] = {
6019 "HFI1_IB_CFG_LIDLMC",
6020 "HFI1_IB_CFG_LWID_DG_ENB",
6021 "HFI1_IB_CFG_LWID_ENB",
6023 "HFI1_IB_CFG_SPD_ENB",
6025 "HFI1_IB_CFG_RXPOL_ENB",
6026 "HFI1_IB_CFG_LREV_ENB",
6027 "HFI1_IB_CFG_LINKLATENCY",
6028 "HFI1_IB_CFG_HRTBT",
6029 "HFI1_IB_CFG_OP_VLS",
6030 "HFI1_IB_CFG_VL_HIGH_CAP",
6031 "HFI1_IB_CFG_VL_LOW_CAP",
6032 "HFI1_IB_CFG_OVERRUN_THRESH",
6033 "HFI1_IB_CFG_PHYERR_THRESH",
6034 "HFI1_IB_CFG_LINKDEFAULT",
6035 "HFI1_IB_CFG_PKEYS",
6037 "HFI1_IB_CFG_LSTATE",
6038 "HFI1_IB_CFG_VL_HIGH_LIMIT",
6039 "HFI1_IB_CFG_PMA_TICKS",
6043 static const char *ib_cfg_name(int which)
6045 if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6047 return ib_cfg_name_strings[which];
6050 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6052 struct hfi1_devdata *dd = ppd->dd;
6056 case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6057 val = ppd->link_width_enabled;
6059 case HFI1_IB_CFG_LWID: /* currently active Link-width */
6060 val = ppd->link_width_active;
6062 case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6063 val = ppd->link_speed_enabled;
6065 case HFI1_IB_CFG_SPD: /* current Link speed */
6066 val = ppd->link_speed_active;
6069 case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6070 case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6071 case HFI1_IB_CFG_LINKLATENCY:
6074 case HFI1_IB_CFG_OP_VLS:
6075 val = ppd->vls_operational;
6077 case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6078 val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6080 case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6081 val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6083 case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6084 val = ppd->overrun_threshold;
6086 case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6087 val = ppd->phy_error_threshold;
6089 case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6090 val = dd->link_default;
6093 case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6094 case HFI1_IB_CFG_PMA_TICKS:
6097 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6100 "%s: which %s: not implemented\n",
6102 ib_cfg_name(which));
6110 * The largest MAD packet size.
6112 #define MAX_MAD_PACKET 2048
6115 * Return the maximum header bytes that can go on the _wire_
6116 * for this device. This count includes the ICRC which is
6117 * not part of the packet held in memory but it is appended
6119 * This is dependent on the device's receive header entry size.
6120 * HFI allows this to be set per-receive context, but the
6121 * driver presently enforces a global value.
6123 u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6126 * The maximum non-payload (MTU) bytes in LRH.PktLen are
6127 * the Receive Header Entry Size minus the PBC (or RHF) size
6128 * plus one DW for the ICRC appended by HW.
6130 * dd->rcd[0].rcvhdrqentsize is in DW.
6131 * We use rcd[0] as all context will have the same value. Also,
6132 * the first kernel context would have been allocated by now so
6133 * we are guaranteed a valid value.
6135 return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6140 * @ppd - per port data
6142 * Set the MTU by limiting how many DWs may be sent. The SendLenCheck*
6143 * registers compare against LRH.PktLen, so use the max bytes included
6146 * This routine changes all VL values except VL15, which it maintains at
6149 static void set_send_length(struct hfi1_pportdata *ppd)
6151 struct hfi1_devdata *dd = ppd->dd;
6152 u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6153 u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6154 & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6155 SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6158 for (i = 0; i < ppd->vls_supported; i++) {
6159 if (dd->vld[i].mtu > maxvlmtu)
6160 maxvlmtu = dd->vld[i].mtu;
6162 len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6163 & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6164 ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6166 len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6167 & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6168 ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6170 write_csr(dd, SEND_LEN_CHECK0, len1);
6171 write_csr(dd, SEND_LEN_CHECK1, len2);
6172 /* adjust kernel credit return thresholds based on new MTUs */
6173 /* all kernel receive contexts have the same hdrqentsize */
6174 for (i = 0; i < ppd->vls_supported; i++) {
6175 sc_set_cr_threshold(dd->vld[i].sc,
6176 sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6177 dd->rcd[0]->rcvhdrqentsize));
6179 sc_set_cr_threshold(dd->vld[15].sc,
6180 sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6181 dd->rcd[0]->rcvhdrqentsize));
6183 /* Adjust maximum MTU for the port in DC */
6184 dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6185 (ilog2(maxvlmtu >> 8) + 1);
6186 len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6187 len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6188 len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6189 DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6190 write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6193 static void set_lidlmc(struct hfi1_pportdata *ppd)
6197 struct hfi1_devdata *dd = ppd->dd;
6198 u32 mask = ~((1U << ppd->lmc) - 1);
6199 u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6201 if (dd->hfi1_snoop.mode_flag)
6202 dd_dev_info(dd, "Set lid/lmc while snooping");
6204 c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6205 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6206 c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6207 << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6208 ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6209 << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6210 write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6213 * Iterate over all the send contexts and set their SLID check
6215 sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6216 SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6217 (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6218 SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6220 for (i = 0; i < dd->chip_send_contexts; i++) {
6221 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6223 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6226 /* Now we have to do the same thing for the sdma engines */
6227 sdma_update_lmc(dd, mask, ppd->lid);
6230 static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6232 unsigned long timeout;
6235 timeout = jiffies + msecs_to_jiffies(msecs);
6237 curr_state = read_physical_state(dd);
6238 if (curr_state == state)
6240 if (time_after(jiffies, timeout)) {
6242 "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6246 usleep_range(1950, 2050); /* sleep 2ms-ish */
6253 * Helper for set_link_state(). Do not call except from that routine.
6254 * Expects ppd->hls_mutex to be held.
6256 * @rem_reason value to be sent to the neighbor
6258 * LinkDownReasons only set if transition succeeds.
6260 static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6262 struct hfi1_devdata *dd = ppd->dd;
6263 u32 pstate, previous_state;
6264 u32 last_local_state;
6265 u32 last_remote_state;
6270 previous_state = ppd->host_link_state;
6271 ppd->host_link_state = HLS_GOING_OFFLINE;
6272 pstate = read_physical_state(dd);
6273 if (pstate == PLS_OFFLINE) {
6274 do_transition = 0; /* in right state */
6275 do_wait = 0; /* ...no need to wait */
6276 } else if ((pstate & 0xff) == PLS_OFFLINE) {
6277 do_transition = 0; /* in an offline transient state */
6278 do_wait = 1; /* ...wait for it to settle */
6280 do_transition = 1; /* need to move to offline */
6281 do_wait = 1; /* ...will need to wait */
6284 if (do_transition) {
6285 ret = set_physical_link_state(dd,
6286 PLS_OFFLINE | (rem_reason << 8));
6288 if (ret != HCMD_SUCCESS) {
6290 "Failed to transition to Offline link state, return %d\n",
6294 if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6295 ppd->offline_disabled_reason =
6296 OPA_LINKDOWN_REASON_TRANSIENT;
6300 /* it can take a while for the link to go down */
6301 ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6306 /* make sure the logical state is also down */
6307 wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6310 * Now in charge of LCB - must be after the physical state is
6311 * offline.quiet and before host_link_state is changed.
6313 set_host_lcb_access(dd);
6314 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6315 ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6318 * The LNI has a mandatory wait time after the physical state
6319 * moves to Offline.Quiet. The wait time may be different
6320 * depending on how the link went down. The 8051 firmware
6321 * will observe the needed wait time and only move to ready
6322 * when that is completed. The largest of the quiet timeouts
6323 * is 2.5s, so wait that long and then a bit more.
6325 ret = wait_fm_ready(dd, 3000);
6328 "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6329 /* state is really offline, so make it so */
6330 ppd->host_link_state = HLS_DN_OFFLINE;
6335 * The state is now offline and the 8051 is ready to accept host
6337 * - change our state
6338 * - notify others if we were previously in a linkup state
6340 ppd->host_link_state = HLS_DN_OFFLINE;
6341 if (previous_state & HLS_UP) {
6342 /* went down while link was up */
6343 handle_linkup_change(dd, 0);
6344 } else if (previous_state
6345 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6346 /* went down while attempting link up */
6347 /* byte 1 of last_*_state is the failure reason */
6348 read_last_local_state(dd, &last_local_state);
6349 read_last_remote_state(dd, &last_remote_state);
6351 "LNI failure last states: local 0x%08x, remote 0x%08x\n",
6352 last_local_state, last_remote_state);
6355 /* the active link width (downgrade) is 0 on link down */
6356 ppd->link_width_active = 0;
6357 ppd->link_width_downgrade_tx_active = 0;
6358 ppd->link_width_downgrade_rx_active = 0;
6359 ppd->current_egress_rate = 0;
6363 /* return the link state name */
6364 static const char *link_state_name(u32 state)
6367 int n = ilog2(state);
6368 static const char * const names[] = {
6369 [__HLS_UP_INIT_BP] = "INIT",
6370 [__HLS_UP_ARMED_BP] = "ARMED",
6371 [__HLS_UP_ACTIVE_BP] = "ACTIVE",
6372 [__HLS_DN_DOWNDEF_BP] = "DOWNDEF",
6373 [__HLS_DN_POLL_BP] = "POLL",
6374 [__HLS_DN_DISABLE_BP] = "DISABLE",
6375 [__HLS_DN_OFFLINE_BP] = "OFFLINE",
6376 [__HLS_VERIFY_CAP_BP] = "VERIFY_CAP",
6377 [__HLS_GOING_UP_BP] = "GOING_UP",
6378 [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6379 [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6382 name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6383 return name ? name : "unknown";
6386 /* return the link state reason name */
6387 static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6389 if (state == HLS_UP_INIT) {
6390 switch (ppd->linkinit_reason) {
6391 case OPA_LINKINIT_REASON_LINKUP:
6393 case OPA_LINKINIT_REASON_FLAPPING:
6394 return "(FLAPPING)";
6395 case OPA_LINKINIT_OUTSIDE_POLICY:
6396 return "(OUTSIDE_POLICY)";
6397 case OPA_LINKINIT_QUARANTINED:
6398 return "(QUARANTINED)";
6399 case OPA_LINKINIT_INSUFIC_CAPABILITY:
6400 return "(INSUFIC_CAPABILITY)";
6409 * driver_physical_state - convert the driver's notion of a port's
6410 * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6411 * Return -1 (converted to a u32) to indicate error.
6413 u32 driver_physical_state(struct hfi1_pportdata *ppd)
6415 switch (ppd->host_link_state) {
6419 return IB_PORTPHYSSTATE_LINKUP;
6421 return IB_PORTPHYSSTATE_POLLING;
6422 case HLS_DN_DISABLE:
6423 return IB_PORTPHYSSTATE_DISABLED;
6424 case HLS_DN_OFFLINE:
6425 return OPA_PORTPHYSSTATE_OFFLINE;
6426 case HLS_VERIFY_CAP:
6427 return IB_PORTPHYSSTATE_POLLING;
6429 return IB_PORTPHYSSTATE_POLLING;
6430 case HLS_GOING_OFFLINE:
6431 return OPA_PORTPHYSSTATE_OFFLINE;
6432 case HLS_LINK_COOLDOWN:
6433 return OPA_PORTPHYSSTATE_OFFLINE;
6434 case HLS_DN_DOWNDEF:
6436 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6437 ppd->host_link_state);
6443 * driver_logical_state - convert the driver's notion of a port's
6444 * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6445 * (converted to a u32) to indicate error.
6447 u32 driver_logical_state(struct hfi1_pportdata *ppd)
6449 if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6450 return IB_PORT_DOWN;
6452 switch (ppd->host_link_state & HLS_UP) {
6454 return IB_PORT_INIT;
6456 return IB_PORT_ARMED;
6458 return IB_PORT_ACTIVE;
6460 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6461 ppd->host_link_state);
6466 void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6467 u8 neigh_reason, u8 rem_reason)
6469 if (ppd->local_link_down_reason.latest == 0 &&
6470 ppd->neigh_link_down_reason.latest == 0) {
6471 ppd->local_link_down_reason.latest = lcl_reason;
6472 ppd->neigh_link_down_reason.latest = neigh_reason;
6473 ppd->remote_link_down_reason = rem_reason;
6478 * Change the physical and/or logical link state.
6480 * Do not call this routine while inside an interrupt. It contains
6481 * calls to routines that can take multiple seconds to finish.
6483 * Returns 0 on success, -errno on failure.
6485 int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6487 struct hfi1_devdata *dd = ppd->dd;
6488 struct ib_event event = {.device = NULL};
6490 int was_up, is_down;
6491 int orig_new_state, poll_bounce;
6493 mutex_lock(&ppd->hls_lock);
6495 orig_new_state = state;
6496 if (state == HLS_DN_DOWNDEF)
6497 state = dd->link_default;
6499 /* interpret poll -> poll as a link bounce */
6500 poll_bounce = ppd->host_link_state == HLS_DN_POLL
6501 && state == HLS_DN_POLL;
6503 dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6504 link_state_name(ppd->host_link_state),
6505 link_state_name(orig_new_state),
6506 poll_bounce ? "(bounce) " : "",
6507 link_state_reason_name(ppd, state));
6509 was_up = !!(ppd->host_link_state & HLS_UP);
6512 * If we're going to a (HLS_*) link state that implies the logical
6513 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6514 * reset is_sm_config_started to 0.
6516 if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6517 ppd->is_sm_config_started = 0;
6520 * Do nothing if the states match. Let a poll to poll link bounce
6523 if (ppd->host_link_state == state && !poll_bounce)
6528 if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6529 || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6531 * Quick link up jumps from polling to here.
6533 * Whether in normal or loopback mode, the
6534 * simulator jumps from polling to link up.
6538 } else if (ppd->host_link_state != HLS_GOING_UP) {
6542 ppd->host_link_state = HLS_UP_INIT;
6543 ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6545 /* logical state didn't change, stay at going_up */
6546 ppd->host_link_state = HLS_GOING_UP;
6548 "%s: logical state did not change to INIT\n",
6551 /* clear old transient LINKINIT_REASON code */
6552 if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6553 ppd->linkinit_reason =
6554 OPA_LINKINIT_REASON_LINKUP;
6556 /* enable the port */
6557 add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6559 handle_linkup_change(dd, 1);
6563 if (ppd->host_link_state != HLS_UP_INIT)
6566 ppd->host_link_state = HLS_UP_ARMED;
6567 set_logical_state(dd, LSTATE_ARMED);
6568 ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6570 /* logical state didn't change, stay at init */
6571 ppd->host_link_state = HLS_UP_INIT;
6573 "%s: logical state did not change to ARMED\n",
6577 * The simulator does not currently implement SMA messages,
6578 * so neighbor_normal is not set. Set it here when we first
6581 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6582 ppd->neighbor_normal = 1;
6585 if (ppd->host_link_state != HLS_UP_ARMED)
6588 ppd->host_link_state = HLS_UP_ACTIVE;
6589 set_logical_state(dd, LSTATE_ACTIVE);
6590 ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6592 /* logical state didn't change, stay at armed */
6593 ppd->host_link_state = HLS_UP_ARMED;
6595 "%s: logical state did not change to ACTIVE\n",
6599 /* tell all engines to go running */
6600 sdma_all_running(dd);
6602 /* Signal the IB layer that the port has went active */
6603 event.device = &dd->verbs_dev.ibdev;
6604 event.element.port_num = ppd->port;
6605 event.event = IB_EVENT_PORT_ACTIVE;
6609 if ((ppd->host_link_state == HLS_DN_DISABLE ||
6610 ppd->host_link_state == HLS_DN_OFFLINE) &&
6613 /* Hand LED control to the DC */
6614 write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6616 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6617 u8 tmp = ppd->link_enabled;
6619 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6621 ppd->link_enabled = tmp;
6624 ppd->remote_link_down_reason = 0;
6626 if (ppd->driver_link_ready)
6627 ppd->link_enabled = 1;
6630 ret = set_local_link_attributes(ppd);
6634 ppd->port_error_action = 0;
6635 ppd->host_link_state = HLS_DN_POLL;
6638 /* quick linkup does not go into polling */
6639 ret = do_quick_linkup(dd);
6641 ret1 = set_physical_link_state(dd, PLS_POLLING);
6642 if (ret1 != HCMD_SUCCESS) {
6644 "Failed to transition to Polling link state, return 0x%x\n",
6649 ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6651 * If an error occurred above, go back to offline. The
6652 * caller may reschedule another attempt.
6655 goto_offline(ppd, 0);
6657 case HLS_DN_DISABLE:
6658 /* link is disabled */
6659 ppd->link_enabled = 0;
6661 /* allow any state to transition to disabled */
6663 /* must transition to offline first */
6664 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6665 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6668 ppd->remote_link_down_reason = 0;
6671 ret1 = set_physical_link_state(dd, PLS_DISABLED);
6672 if (ret1 != HCMD_SUCCESS) {
6674 "Failed to transition to Disabled link state, return 0x%x\n",
6679 ppd->host_link_state = HLS_DN_DISABLE;
6682 case HLS_DN_OFFLINE:
6683 if (ppd->host_link_state == HLS_DN_DISABLE)
6686 /* allow any state to transition to offline */
6687 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6689 ppd->remote_link_down_reason = 0;
6691 case HLS_VERIFY_CAP:
6692 if (ppd->host_link_state != HLS_DN_POLL)
6694 ppd->host_link_state = HLS_VERIFY_CAP;
6697 if (ppd->host_link_state != HLS_VERIFY_CAP)
6700 ret1 = set_physical_link_state(dd, PLS_LINKUP);
6701 if (ret1 != HCMD_SUCCESS) {
6703 "Failed to transition to link up state, return 0x%x\n",
6708 ppd->host_link_state = HLS_GOING_UP;
6711 case HLS_GOING_OFFLINE: /* transient within goto_offline() */
6712 case HLS_LINK_COOLDOWN: /* transient within goto_offline() */
6714 dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6720 is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6721 HLS_DN_DISABLE | HLS_DN_OFFLINE));
6723 if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6724 ppd->neigh_link_down_reason.sma == 0) {
6725 ppd->local_link_down_reason.sma =
6726 ppd->local_link_down_reason.latest;
6727 ppd->neigh_link_down_reason.sma =
6728 ppd->neigh_link_down_reason.latest;
6734 dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6735 __func__, link_state_name(ppd->host_link_state),
6736 link_state_name(state));
6740 mutex_unlock(&ppd->hls_lock);
6743 ib_dispatch_event(&event);
6748 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6754 case HFI1_IB_CFG_LIDLMC:
6757 case HFI1_IB_CFG_VL_HIGH_LIMIT:
6759 * The VL Arbitrator high limit is sent in units of 4k
6760 * bytes, while HFI stores it in units of 64 bytes.
6763 reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6764 << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6765 write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6767 case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6768 /* HFI only supports POLL as the default link down state */
6769 if (val != HLS_DN_POLL)
6772 case HFI1_IB_CFG_OP_VLS:
6773 if (ppd->vls_operational != val) {
6774 ppd->vls_operational = val;
6778 ret = sdma_map_init(
6786 * For link width, link width downgrade, and speed enable, always AND
6787 * the setting with what is actually supported. This has two benefits.
6788 * First, enabled can't have unsupported values, no matter what the
6789 * SM or FM might want. Second, the ALL_SUPPORTED wildcards that mean
6790 * "fill in with your supported value" have all the bits in the
6791 * field set, so simply ANDing with supported has the desired result.
6793 case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6794 ppd->link_width_enabled = val & ppd->link_width_supported;
6796 case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6797 ppd->link_width_downgrade_enabled =
6798 val & ppd->link_width_downgrade_supported;
6800 case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6801 ppd->link_speed_enabled = val & ppd->link_speed_supported;
6803 case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6805 * HFI does not follow IB specs, save this value
6806 * so we can report it, if asked.
6808 ppd->overrun_threshold = val;
6810 case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6812 * HFI does not follow IB specs, save this value
6813 * so we can report it, if asked.
6815 ppd->phy_error_threshold = val;
6818 case HFI1_IB_CFG_MTU:
6819 set_send_length(ppd);
6822 case HFI1_IB_CFG_PKEYS:
6823 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6824 set_partition_keys(ppd);
6828 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6829 dd_dev_info(ppd->dd,
6830 "%s: which %s, val 0x%x: not implemented\n",
6831 __func__, ib_cfg_name(which), val);
6837 /* begin functions related to vl arbitration table caching */
6838 static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6842 BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6843 VL_ARB_LOW_PRIO_TABLE_SIZE);
6844 BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6845 VL_ARB_HIGH_PRIO_TABLE_SIZE);
6848 * Note that we always return values directly from the
6849 * 'vl_arb_cache' (and do no CSR reads) in response to a
6850 * 'Get(VLArbTable)'. This is obviously correct after a
6851 * 'Set(VLArbTable)', since the cache will then be up to
6852 * date. But it's also correct prior to any 'Set(VLArbTable)'
6853 * since then both the cache, and the relevant h/w registers
6857 for (i = 0; i < MAX_PRIO_TABLE; i++)
6858 spin_lock_init(&ppd->vl_arb_cache[i].lock);
6864 * All other vl_arb_* functions should be called only after locking
6867 static inline struct vl_arb_cache *
6868 vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6870 if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6872 spin_lock(&ppd->vl_arb_cache[idx].lock);
6873 return &ppd->vl_arb_cache[idx];
6876 static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6878 spin_unlock(&ppd->vl_arb_cache[idx].lock);
6881 static void vl_arb_get_cache(struct vl_arb_cache *cache,
6882 struct ib_vl_weight_elem *vl)
6884 memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6887 static void vl_arb_set_cache(struct vl_arb_cache *cache,
6888 struct ib_vl_weight_elem *vl)
6890 memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6893 static int vl_arb_match_cache(struct vl_arb_cache *cache,
6894 struct ib_vl_weight_elem *vl)
6896 return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6898 /* end functions related to vl arbitration table caching */
6900 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6901 u32 size, struct ib_vl_weight_elem *vl)
6903 struct hfi1_devdata *dd = ppd->dd;
6905 unsigned int i, is_up = 0;
6908 mutex_lock(&ppd->hls_lock);
6910 if (ppd->host_link_state & HLS_UP)
6913 drain = !is_ax(dd) && is_up;
6917 * Before adjusting VL arbitration weights, empty per-VL
6918 * FIFOs, otherwise a packet whose VL weight is being
6919 * set to 0 could get stuck in a FIFO with no chance to
6922 ret = stop_drain_data_vls(dd);
6927 "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6932 for (i = 0; i < size; i++, vl++) {
6934 * NOTE: The low priority shift and mask are used here, but
6935 * they are the same for both the low and high registers.
6937 reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6938 << SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6940 & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6941 << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6942 write_csr(dd, target + (i * 8), reg);
6944 pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6947 open_fill_data_vls(dd); /* reopen all VLs */
6950 mutex_unlock(&ppd->hls_lock);
6956 * Read one credit merge VL register.
6958 static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6959 struct vl_limit *vll)
6961 u64 reg = read_csr(dd, csr);
6963 vll->dedicated = cpu_to_be16(
6964 (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6965 & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6966 vll->shared = cpu_to_be16(
6967 (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6968 & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6972 * Read the current credit merge limits.
6974 static int get_buffer_control(struct hfi1_devdata *dd,
6975 struct buffer_control *bc, u16 *overall_limit)
6980 /* not all entries are filled in */
6981 memset(bc, 0, sizeof(*bc));
6983 /* OPA and HFI have a 1-1 mapping */
6984 for (i = 0; i < TXE_NUM_DATA_VL; i++)
6985 read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6987 /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6988 read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6990 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6991 bc->overall_shared_limit = cpu_to_be16(
6992 (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6993 & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6995 *overall_limit = (reg
6996 >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6997 & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6998 return sizeof(struct buffer_control);
7001 static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7006 /* each register contains 16 SC->VLnt mappings, 4 bits each */
7007 reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7008 for (i = 0; i < sizeof(u64); i++) {
7009 u8 byte = *(((u8 *)®) + i);
7011 dp->vlnt[2 * i] = byte & 0xf;
7012 dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7015 reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7016 for (i = 0; i < sizeof(u64); i++) {
7017 u8 byte = *(((u8 *)®) + i);
7019 dp->vlnt[16 + (2 * i)] = byte & 0xf;
7020 dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7022 return sizeof(struct sc2vlnt);
7025 static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7026 struct ib_vl_weight_elem *vl)
7030 for (i = 0; i < nelems; i++, vl++) {
7036 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7038 write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7040 0, dp->vlnt[0] & 0xf,
7041 1, dp->vlnt[1] & 0xf,
7042 2, dp->vlnt[2] & 0xf,
7043 3, dp->vlnt[3] & 0xf,
7044 4, dp->vlnt[4] & 0xf,
7045 5, dp->vlnt[5] & 0xf,
7046 6, dp->vlnt[6] & 0xf,
7047 7, dp->vlnt[7] & 0xf,
7048 8, dp->vlnt[8] & 0xf,
7049 9, dp->vlnt[9] & 0xf,
7050 10, dp->vlnt[10] & 0xf,
7051 11, dp->vlnt[11] & 0xf,
7052 12, dp->vlnt[12] & 0xf,
7053 13, dp->vlnt[13] & 0xf,
7054 14, dp->vlnt[14] & 0xf,
7055 15, dp->vlnt[15] & 0xf));
7056 write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7058 16, dp->vlnt[16] & 0xf,
7059 17, dp->vlnt[17] & 0xf,
7060 18, dp->vlnt[18] & 0xf,
7061 19, dp->vlnt[19] & 0xf,
7062 20, dp->vlnt[20] & 0xf,
7063 21, dp->vlnt[21] & 0xf,
7064 22, dp->vlnt[22] & 0xf,
7065 23, dp->vlnt[23] & 0xf,
7066 24, dp->vlnt[24] & 0xf,
7067 25, dp->vlnt[25] & 0xf,
7068 26, dp->vlnt[26] & 0xf,
7069 27, dp->vlnt[27] & 0xf,
7070 28, dp->vlnt[28] & 0xf,
7071 29, dp->vlnt[29] & 0xf,
7072 30, dp->vlnt[30] & 0xf,
7073 31, dp->vlnt[31] & 0xf));
7076 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7080 dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7081 what, (int)limit, idx);
7084 /* change only the shared limit portion of SendCmGLobalCredit */
7085 static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7089 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7090 reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7091 reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7092 write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7095 /* change only the total credit limit portion of SendCmGLobalCredit */
7096 static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7100 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7101 reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7102 reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7103 write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7106 /* set the given per-VL shared limit */
7107 static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7112 if (vl < TXE_NUM_DATA_VL)
7113 addr = SEND_CM_CREDIT_VL + (8 * vl);
7115 addr = SEND_CM_CREDIT_VL15;
7117 reg = read_csr(dd, addr);
7118 reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7119 reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7120 write_csr(dd, addr, reg);
7123 /* set the given per-VL dedicated limit */
7124 static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7129 if (vl < TXE_NUM_DATA_VL)
7130 addr = SEND_CM_CREDIT_VL + (8 * vl);
7132 addr = SEND_CM_CREDIT_VL15;
7134 reg = read_csr(dd, addr);
7135 reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7136 reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7137 write_csr(dd, addr, reg);
7140 /* spin until the given per-VL status mask bits clear */
7141 static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7144 unsigned long timeout;
7147 timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7149 reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7152 return; /* success */
7153 if (time_after(jiffies, timeout))
7154 break; /* timed out */
7159 "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7160 which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7162 * If this occurs, it is likely there was a credit loss on the link.
7163 * The only recovery from that is a link bounce.
7166 "Continuing anyway. A credit loss may occur. Suggest a link bounce\n");
7170 * The number of credits on the VLs may be changed while everything
7171 * is "live", but the following algorithm must be followed due to
7172 * how the hardware is actually implemented. In particular,
7173 * Return_Credit_Status[] is the only correct status check.
7175 * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7176 * set Global_Shared_Credit_Limit = 0
7178 * mask0 = all VLs that are changing either dedicated or shared limits
7179 * set Shared_Limit[mask0] = 0
7180 * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7181 * if (changing any dedicated limit)
7182 * mask1 = all VLs that are lowering dedicated limits
7183 * lower Dedicated_Limit[mask1]
7184 * spin until Return_Credit_Status[mask1] == 0
7185 * raise Dedicated_Limits
7186 * raise Shared_Limits
7187 * raise Global_Shared_Credit_Limit
7189 * lower = if the new limit is lower, set the limit to the new value
7190 * raise = if the new limit is higher than the current value (may be changed
7191 * earlier in the algorithm), set the new limit to the new value
7193 static int set_buffer_control(struct hfi1_devdata *dd,
7194 struct buffer_control *new_bc)
7196 u64 changing_mask, ld_mask, stat_mask;
7198 int i, use_all_mask;
7199 int this_shared_changing;
7201 * A0: add the variable any_shared_limit_changing below and in the
7202 * algorithm above. If removing A0 support, it can be removed.
7204 int any_shared_limit_changing;
7205 struct buffer_control cur_bc;
7206 u8 changing[OPA_MAX_VLS];
7207 u8 lowering_dedicated[OPA_MAX_VLS];
7210 const u64 all_mask =
7211 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7212 | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7213 | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7214 | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7215 | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7216 | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7217 | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7218 | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7219 | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7221 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7222 #define NUM_USABLE_VLS 16 /* look at VL15 and less */
7225 /* find the new total credits, do sanity check on unused VLs */
7226 for (i = 0; i < OPA_MAX_VLS; i++) {
7228 new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7231 nonzero_msg(dd, i, "dedicated",
7232 be16_to_cpu(new_bc->vl[i].dedicated));
7233 nonzero_msg(dd, i, "shared",
7234 be16_to_cpu(new_bc->vl[i].shared));
7235 new_bc->vl[i].dedicated = 0;
7236 new_bc->vl[i].shared = 0;
7238 new_total += be16_to_cpu(new_bc->overall_shared_limit);
7239 if (new_total > (u32)dd->link_credits)
7241 /* fetch the current values */
7242 get_buffer_control(dd, &cur_bc, &cur_total);
7245 * Create the masks we will use.
7247 memset(changing, 0, sizeof(changing));
7248 memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7249 /* NOTE: Assumes that the individual VL bits are adjacent and in
7252 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7256 any_shared_limit_changing = 0;
7257 for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7260 this_shared_changing = new_bc->vl[i].shared
7261 != cur_bc.vl[i].shared;
7262 if (this_shared_changing)
7263 any_shared_limit_changing = 1;
7264 if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7265 || this_shared_changing) {
7267 changing_mask |= stat_mask;
7270 if (be16_to_cpu(new_bc->vl[i].dedicated) <
7271 be16_to_cpu(cur_bc.vl[i].dedicated)) {
7272 lowering_dedicated[i] = 1;
7273 ld_mask |= stat_mask;
7277 /* bracket the credit change with a total adjustment */
7278 if (new_total > cur_total)
7279 set_global_limit(dd, new_total);
7282 * Start the credit change algorithm.
7285 if ((be16_to_cpu(new_bc->overall_shared_limit) <
7286 be16_to_cpu(cur_bc.overall_shared_limit))
7287 || (is_a0(dd) && any_shared_limit_changing)) {
7288 set_global_shared(dd, 0);
7289 cur_bc.overall_shared_limit = 0;
7293 for (i = 0; i < NUM_USABLE_VLS; i++) {
7298 set_vl_shared(dd, i, 0);
7299 cur_bc.vl[i].shared = 0;
7303 wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7306 if (change_count > 0) {
7307 for (i = 0; i < NUM_USABLE_VLS; i++) {
7311 if (lowering_dedicated[i]) {
7312 set_vl_dedicated(dd, i,
7313 be16_to_cpu(new_bc->vl[i].dedicated));
7314 cur_bc.vl[i].dedicated =
7315 new_bc->vl[i].dedicated;
7319 wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7321 /* now raise all dedicated that are going up */
7322 for (i = 0; i < NUM_USABLE_VLS; i++) {
7326 if (be16_to_cpu(new_bc->vl[i].dedicated) >
7327 be16_to_cpu(cur_bc.vl[i].dedicated))
7328 set_vl_dedicated(dd, i,
7329 be16_to_cpu(new_bc->vl[i].dedicated));
7333 /* next raise all shared that are going up */
7334 for (i = 0; i < NUM_USABLE_VLS; i++) {
7338 if (be16_to_cpu(new_bc->vl[i].shared) >
7339 be16_to_cpu(cur_bc.vl[i].shared))
7340 set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7343 /* finally raise the global shared */
7344 if (be16_to_cpu(new_bc->overall_shared_limit) >
7345 be16_to_cpu(cur_bc.overall_shared_limit))
7346 set_global_shared(dd,
7347 be16_to_cpu(new_bc->overall_shared_limit));
7349 /* bracket the credit change with a total adjustment */
7350 if (new_total < cur_total)
7351 set_global_limit(dd, new_total);
7356 * Read the given fabric manager table. Return the size of the
7357 * table (in bytes) on success, and a negative error code on
7360 int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7364 struct vl_arb_cache *vlc;
7367 case FM_TBL_VL_HIGH_ARB:
7370 * OPA specifies 128 elements (of 2 bytes each), though
7371 * HFI supports only 16 elements in h/w.
7373 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7374 vl_arb_get_cache(vlc, t);
7375 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7377 case FM_TBL_VL_LOW_ARB:
7380 * OPA specifies 128 elements (of 2 bytes each), though
7381 * HFI supports only 16 elements in h/w.
7383 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7384 vl_arb_get_cache(vlc, t);
7385 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7387 case FM_TBL_BUFFER_CONTROL:
7388 size = get_buffer_control(ppd->dd, t, NULL);
7390 case FM_TBL_SC2VLNT:
7391 size = get_sc2vlnt(ppd->dd, t);
7393 case FM_TBL_VL_PREEMPT_ELEMS:
7395 /* OPA specifies 128 elements, of 2 bytes each */
7396 get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7398 case FM_TBL_VL_PREEMPT_MATRIX:
7401 * OPA specifies that this is the same size as the VL
7402 * arbitration tables (i.e., 256 bytes).
7412 * Write the given fabric manager table.
7414 int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7417 struct vl_arb_cache *vlc;
7420 case FM_TBL_VL_HIGH_ARB:
7421 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7422 if (vl_arb_match_cache(vlc, t)) {
7423 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7426 vl_arb_set_cache(vlc, t);
7427 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7428 ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7429 VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7431 case FM_TBL_VL_LOW_ARB:
7432 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7433 if (vl_arb_match_cache(vlc, t)) {
7434 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7437 vl_arb_set_cache(vlc, t);
7438 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7439 ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7440 VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7442 case FM_TBL_BUFFER_CONTROL:
7443 ret = set_buffer_control(ppd->dd, t);
7445 case FM_TBL_SC2VLNT:
7446 set_sc2vlnt(ppd->dd, t);
7455 * Disable all data VLs.
7457 * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7459 static int disable_data_vls(struct hfi1_devdata *dd)
7464 pio_send_control(dd, PSC_DATA_VL_DISABLE);
7470 * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7471 * Just re-enables all data VLs (the "fill" part happens
7472 * automatically - the name was chosen for symmetry with
7473 * stop_drain_data_vls()).
7475 * Return 0 if successful, non-zero if the VLs cannot be enabled.
7477 int open_fill_data_vls(struct hfi1_devdata *dd)
7482 pio_send_control(dd, PSC_DATA_VL_ENABLE);
7488 * drain_data_vls() - assumes that disable_data_vls() has been called,
7489 * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7490 * engines to drop to 0.
7492 static void drain_data_vls(struct hfi1_devdata *dd)
7496 pause_for_credit_return(dd);
7500 * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7502 * Use open_fill_data_vls() to resume using data VLs. This pair is
7503 * meant to be used like this:
7505 * stop_drain_data_vls(dd);
7506 * // do things with per-VL resources
7507 * open_fill_data_vls(dd);
7509 int stop_drain_data_vls(struct hfi1_devdata *dd)
7513 ret = disable_data_vls(dd);
7521 * Convert a nanosecond time to a cclock count. No matter how slow
7522 * the cclock, a non-zero ns will always have a non-zero result.
7524 u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7528 if (dd->icode == ICODE_FPGA_EMULATION)
7529 cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7530 else /* simulation pretends to be ASIC */
7531 cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7532 if (ns && !cclocks) /* if ns nonzero, must be at least 1 */
7538 * Convert a cclock count to nanoseconds. Not matter how slow
7539 * the cclock, a non-zero cclocks will always have a non-zero result.
7541 u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7545 if (dd->icode == ICODE_FPGA_EMULATION)
7546 ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7547 else /* simulation pretends to be ASIC */
7548 ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7555 * Dynamically adjust the receive interrupt timeout for a context based on
7556 * incoming packet rate.
7558 * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7560 static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7562 struct hfi1_devdata *dd = rcd->dd;
7563 u32 timeout = rcd->rcvavail_timeout;
7566 * This algorithm doubles or halves the timeout depending on whether
7567 * the number of packets received in this interrupt were less than or
7568 * greater equal the interrupt count.
7570 * The calculations below do not allow a steady state to be achieved.
7571 * Only at the endpoints it is possible to have an unchanging
7574 if (npkts < rcv_intr_count) {
7576 * Not enough packets arrived before the timeout, adjust
7579 if (timeout < 2) /* already at minimum? */
7584 * More than enough packets arrived before the timeout, adjust
7587 if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7589 timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7592 rcd->rcvavail_timeout = timeout;
7593 /* timeout cannot be larger than rcv_intr_timeout_csr which has already
7594 been verified to be in range */
7595 write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7596 (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7599 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7600 u32 intr_adjust, u32 npkts)
7602 struct hfi1_devdata *dd = rcd->dd;
7604 u32 ctxt = rcd->ctxt;
7607 * Need to write timeout register before updating RcvHdrHead to ensure
7608 * that a new value is used when the HW decides to restart counting.
7611 adjust_rcv_timeout(rcd, npkts);
7613 reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7614 << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7615 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7618 reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7619 (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7620 << RCV_HDR_HEAD_HEAD_SHIFT);
7621 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7625 u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7629 head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7630 & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7632 if (rcd->rcvhdrtail_kvaddr)
7633 tail = get_rcvhdrtail(rcd);
7635 tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7637 return head == tail;
7641 * Context Control and Receive Array encoding for buffer size:
7650 * 0x8 512 KB (Receive Array only)
7651 * 0x9 1 MB (Receive Array only)
7652 * 0xa 2 MB (Receive Array only)
7654 * 0xB-0xF - reserved (Receive Array only)
7657 * This routine assumes that the value has already been sanity checked.
7659 static u32 encoded_size(u32 size)
7662 case 4*1024: return 0x1;
7663 case 8*1024: return 0x2;
7664 case 16*1024: return 0x3;
7665 case 32*1024: return 0x4;
7666 case 64*1024: return 0x5;
7667 case 128*1024: return 0x6;
7668 case 256*1024: return 0x7;
7669 case 512*1024: return 0x8;
7670 case 1*1024*1024: return 0x9;
7671 case 2*1024*1024: return 0xa;
7673 return 0x1; /* if invalid, go with the minimum size */
7676 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7678 struct hfi1_ctxtdata *rcd;
7682 rcd = dd->rcd[ctxt];
7686 hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7688 rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7689 /* if the context already enabled, don't do the extra steps */
7690 if ((op & HFI1_RCVCTRL_CTXT_ENB)
7691 && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7692 /* reset the tail and hdr addresses, and sequence count */
7693 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7695 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7696 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7697 rcd->rcvhdrqtailaddr_phys);
7700 /* reset the cached receive header queue head value */
7704 * Zero the receive header queue so we don't get false
7705 * positives when checking the sequence number. The
7706 * sequence numbers could land exactly on the same spot.
7707 * E.g. a rcd restart before the receive header wrapped.
7709 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7711 /* starting timeout */
7712 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7714 /* enable the context */
7715 rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7717 /* clean the egr buffer size first */
7718 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7719 rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7720 & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7721 << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7723 /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7724 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7727 /* zero RcvEgrIndexHead */
7728 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7730 /* set eager count and base index */
7731 reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7732 & RCV_EGR_CTRL_EGR_CNT_MASK)
7733 << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7734 (((rcd->eager_base >> RCV_SHIFT)
7735 & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7736 << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7737 write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7740 * Set TID (expected) count and base index.
7741 * rcd->expected_count is set to individual RcvArray entries,
7742 * not pairs, and the CSR takes a pair-count in groups of
7743 * four, so divide by 8.
7745 reg = (((rcd->expected_count >> RCV_SHIFT)
7746 & RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7747 << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7748 (((rcd->expected_base >> RCV_SHIFT)
7749 & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7750 << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7751 write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7752 if (ctxt == VL15CTXT)
7753 write_csr(dd, RCV_VL15, VL15CTXT);
7755 if (op & HFI1_RCVCTRL_CTXT_DIS) {
7756 write_csr(dd, RCV_VL15, 0);
7757 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7759 if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7760 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7761 if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7762 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7763 if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7764 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7765 if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7766 rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7767 if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7768 rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7769 if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7770 rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7771 if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7772 /* In one-packet-per-eager mode, the size comes from
7773 the RcvArray entry. */
7774 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7775 rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7777 if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7778 rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7779 if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7780 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7781 if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7782 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7783 if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7784 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7785 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7786 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7787 rcd->rcvctrl = rcvctrl;
7788 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7789 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7791 /* work around sticky RcvCtxtStatus.BlockedRHQFull */
7793 && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7794 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7796 dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7798 read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7799 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7800 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7801 read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7802 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7803 dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7804 ctxt, reg, reg == 0 ? "not" : "still");
7810 * The interrupt timeout and count must be set after
7811 * the context is enabled to take effect.
7813 /* set interrupt timeout */
7814 write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7815 (u64)rcd->rcvavail_timeout <<
7816 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7818 /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7819 reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7820 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7823 if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7825 * If the context has been disabled and the Tail Update has
7826 * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7827 * it doesn't contain an address that is invalid.
7829 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7832 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7839 ret = dd->cntrnameslen;
7841 dd_dev_err(dd, "read_cntrs does not support indexing");
7844 *namep = dd->cntrnames;
7846 const struct cntr_entry *entry;
7849 ret = (dd->ndevcntrs) * sizeof(u64);
7851 dd_dev_err(dd, "read_cntrs does not support indexing");
7855 /* Get the start of the block of counters */
7859 * Now go and fill in each counter in the block.
7861 for (i = 0; i < DEV_CNTR_LAST; i++) {
7862 entry = &dev_cntrs[i];
7863 hfi1_cdbg(CNTR, "reading %s", entry->name);
7864 if (entry->flags & CNTR_DISABLED) {
7866 hfi1_cdbg(CNTR, "\tDisabled\n");
7868 if (entry->flags & CNTR_VL) {
7869 hfi1_cdbg(CNTR, "\tPer VL\n");
7870 for (j = 0; j < C_VL_COUNT; j++) {
7871 val = entry->rw_cntr(entry,
7877 "\t\tRead 0x%llx for %d\n",
7879 dd->cntrs[entry->offset + j] =
7883 val = entry->rw_cntr(entry, dd,
7886 dd->cntrs[entry->offset] = val;
7887 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7896 * Used by sysfs to create files for hfi stats to read
7898 u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7899 char **namep, u64 **cntrp)
7905 ret = dd->portcntrnameslen;
7907 dd_dev_err(dd, "index not supported");
7910 *namep = dd->portcntrnames;
7912 const struct cntr_entry *entry;
7913 struct hfi1_pportdata *ppd;
7916 ret = (dd->nportcntrs) * sizeof(u64);
7918 dd_dev_err(dd, "indexing not supported");
7921 ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7922 *cntrp = ppd->cntrs;
7924 for (i = 0; i < PORT_CNTR_LAST; i++) {
7925 entry = &port_cntrs[i];
7926 hfi1_cdbg(CNTR, "reading %s", entry->name);
7927 if (entry->flags & CNTR_DISABLED) {
7929 hfi1_cdbg(CNTR, "\tDisabled\n");
7933 if (entry->flags & CNTR_VL) {
7934 hfi1_cdbg(CNTR, "\tPer VL");
7935 for (j = 0; j < C_VL_COUNT; j++) {
7936 val = entry->rw_cntr(entry, ppd, j,
7941 "\t\tRead 0x%llx for %d",
7943 ppd->cntrs[entry->offset + j] = val;
7946 val = entry->rw_cntr(entry, ppd,
7950 ppd->cntrs[entry->offset] = val;
7951 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7958 static void free_cntrs(struct hfi1_devdata *dd)
7960 struct hfi1_pportdata *ppd;
7963 if (dd->synth_stats_timer.data)
7964 del_timer_sync(&dd->synth_stats_timer);
7965 dd->synth_stats_timer.data = 0;
7966 ppd = (struct hfi1_pportdata *)(dd + 1);
7967 for (i = 0; i < dd->num_pports; i++, ppd++) {
7970 free_percpu(ppd->ibport_data.rc_acks);
7971 free_percpu(ppd->ibport_data.rc_qacks);
7972 free_percpu(ppd->ibport_data.rc_delayed_comp);
7975 ppd->ibport_data.rc_acks = NULL;
7976 ppd->ibport_data.rc_qacks = NULL;
7977 ppd->ibport_data.rc_delayed_comp = NULL;
7979 kfree(dd->portcntrnames);
7980 dd->portcntrnames = NULL;
7985 kfree(dd->cntrnames);
7986 dd->cntrnames = NULL;
7989 #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7990 #define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7992 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7993 u64 *psval, void *context, int vl)
7998 if (entry->flags & CNTR_DISABLED) {
7999 dd_dev_err(dd, "Counter %s not enabled", entry->name);
8003 hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8005 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8007 /* If its a synthetic counter there is more work we need to do */
8008 if (entry->flags & CNTR_SYNTH) {
8009 if (sval == CNTR_MAX) {
8010 /* No need to read already saturated */
8014 if (entry->flags & CNTR_32BIT) {
8015 /* 32bit counters can wrap multiple times */
8016 u64 upper = sval >> 32;
8017 u64 lower = (sval << 32) >> 32;
8019 if (lower > val) { /* hw wrapped */
8020 if (upper == CNTR_32BIT_MAX)
8026 if (val != CNTR_MAX)
8027 val = (upper << 32) | val;
8030 /* If we rolled we are saturated */
8031 if ((val < sval) || (val > CNTR_MAX))
8038 hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8043 static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8044 struct cntr_entry *entry,
8045 u64 *psval, void *context, int vl, u64 data)
8049 if (entry->flags & CNTR_DISABLED) {
8050 dd_dev_err(dd, "Counter %s not enabled", entry->name);
8054 hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8056 if (entry->flags & CNTR_SYNTH) {
8058 if (entry->flags & CNTR_32BIT) {
8059 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8060 (data << 32) >> 32);
8061 val = data; /* return the full 64bit value */
8063 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8067 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8072 hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8077 u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8079 struct cntr_entry *entry;
8082 entry = &dev_cntrs[index];
8083 sval = dd->scntrs + entry->offset;
8085 if (vl != CNTR_INVALID_VL)
8088 return read_dev_port_cntr(dd, entry, sval, dd, vl);
8091 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8093 struct cntr_entry *entry;
8096 entry = &dev_cntrs[index];
8097 sval = dd->scntrs + entry->offset;
8099 if (vl != CNTR_INVALID_VL)
8102 return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8105 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8107 struct cntr_entry *entry;
8110 entry = &port_cntrs[index];
8111 sval = ppd->scntrs + entry->offset;
8113 if (vl != CNTR_INVALID_VL)
8116 if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8117 (index <= C_RCV_HDR_OVF_LAST)) {
8118 /* We do not want to bother for disabled contexts */
8122 return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8125 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8127 struct cntr_entry *entry;
8130 entry = &port_cntrs[index];
8131 sval = ppd->scntrs + entry->offset;
8133 if (vl != CNTR_INVALID_VL)
8136 if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8137 (index <= C_RCV_HDR_OVF_LAST)) {
8138 /* We do not want to bother for disabled contexts */
8142 return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8145 static void update_synth_timer(unsigned long opaque)
8152 struct hfi1_pportdata *ppd;
8153 struct cntr_entry *entry;
8155 struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8158 * Rather than keep beating on the CSRs pick a minimal set that we can
8159 * check to watch for potential roll over. We can do this by looking at
8160 * the number of flits sent/recv. If the total flits exceeds 32bits then
8161 * we have to iterate all the counters and update.
8163 entry = &dev_cntrs[C_DC_RCV_FLITS];
8164 cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8166 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8167 cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8171 "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8172 dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8174 if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8176 * May not be strictly necessary to update but it won't hurt and
8177 * simplifies the logic here.
8180 hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8183 total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8185 "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8186 total_flits, (u64)CNTR_32BIT_MAX);
8187 if (total_flits >= CNTR_32BIT_MAX) {
8188 hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8195 hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8196 for (i = 0; i < DEV_CNTR_LAST; i++) {
8197 entry = &dev_cntrs[i];
8198 if (entry->flags & CNTR_VL) {
8199 for (vl = 0; vl < C_VL_COUNT; vl++)
8200 read_dev_cntr(dd, i, vl);
8202 read_dev_cntr(dd, i, CNTR_INVALID_VL);
8205 ppd = (struct hfi1_pportdata *)(dd + 1);
8206 for (i = 0; i < dd->num_pports; i++, ppd++) {
8207 for (j = 0; j < PORT_CNTR_LAST; j++) {
8208 entry = &port_cntrs[j];
8209 if (entry->flags & CNTR_VL) {
8210 for (vl = 0; vl < C_VL_COUNT; vl++)
8211 read_port_cntr(ppd, j, vl);
8213 read_port_cntr(ppd, j, CNTR_INVALID_VL);
8219 * We want the value in the register. The goal is to keep track
8220 * of the number of "ticks" not the counter value. In other
8221 * words if the register rolls we want to notice it and go ahead
8222 * and force an update.
8224 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8225 dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8228 entry = &dev_cntrs[C_DC_RCV_FLITS];
8229 dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8232 hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8233 dd->unit, dd->last_tx, dd->last_rx);
8236 hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8239 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8242 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
8243 static int init_cntrs(struct hfi1_devdata *dd)
8245 int i, rcv_ctxts, index, j;
8248 char name[C_MAX_NAME];
8249 struct hfi1_pportdata *ppd;
8251 /* set up the stats timer; the add_timer is done at the end */
8252 setup_timer(&dd->synth_stats_timer, update_synth_timer,
8255 /***********************/
8256 /* per device counters */
8257 /***********************/
8259 /* size names and determine how many we have*/
8264 for (i = 0; i < DEV_CNTR_LAST; i++) {
8265 hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8266 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8267 hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8271 if (dev_cntrs[i].flags & CNTR_VL) {
8272 hfi1_dbg_early("\tProcessing VL cntr\n");
8273 dev_cntrs[i].offset = index;
8274 for (j = 0; j < C_VL_COUNT; j++) {
8275 memset(name, '\0', C_MAX_NAME);
8276 snprintf(name, C_MAX_NAME, "%s%d",
8281 hfi1_dbg_early("\t\t%s\n", name);
8286 /* +1 for newline */
8287 sz += strlen(dev_cntrs[i].name) + 1;
8289 dev_cntrs[i].offset = index;
8291 hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8295 /* allocate space for the counter values */
8296 dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8300 dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8305 /* allocate space for the counter names */
8306 dd->cntrnameslen = sz;
8307 dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8311 /* fill in the names */
8312 for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8313 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8316 if (dev_cntrs[i].flags & CNTR_VL) {
8317 for (j = 0; j < C_VL_COUNT; j++) {
8318 memset(name, '\0', C_MAX_NAME);
8319 snprintf(name, C_MAX_NAME, "%s%d",
8322 memcpy(p, name, strlen(name));
8327 memcpy(p, dev_cntrs[i].name,
8328 strlen(dev_cntrs[i].name));
8329 p += strlen(dev_cntrs[i].name);
8336 /*********************/
8337 /* per port counters */
8338 /*********************/
8341 * Go through the counters for the overflows and disable the ones we
8342 * don't need. This varies based on platform so we need to do it
8345 rcv_ctxts = dd->num_rcv_contexts;
8346 for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8347 i <= C_RCV_HDR_OVF_LAST; i++) {
8348 port_cntrs[i].flags |= CNTR_DISABLED;
8351 /* size port counter names and determine how many we have*/
8354 for (i = 0; i < PORT_CNTR_LAST; i++) {
8355 hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8356 if (port_cntrs[i].flags & CNTR_DISABLED) {
8357 hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8361 if (port_cntrs[i].flags & CNTR_VL) {
8362 hfi1_dbg_early("\tProcessing VL cntr\n");
8363 port_cntrs[i].offset = dd->nportcntrs;
8364 for (j = 0; j < C_VL_COUNT; j++) {
8365 memset(name, '\0', C_MAX_NAME);
8366 snprintf(name, C_MAX_NAME, "%s%d",
8371 hfi1_dbg_early("\t\t%s\n", name);
8375 /* +1 for newline */
8376 sz += strlen(port_cntrs[i].name) + 1;
8377 port_cntrs[i].offset = dd->nportcntrs;
8379 hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8383 /* allocate space for the counter names */
8384 dd->portcntrnameslen = sz;
8385 dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8386 if (!dd->portcntrnames)
8389 /* fill in port cntr names */
8390 for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8391 if (port_cntrs[i].flags & CNTR_DISABLED)
8394 if (port_cntrs[i].flags & CNTR_VL) {
8395 for (j = 0; j < C_VL_COUNT; j++) {
8396 memset(name, '\0', C_MAX_NAME);
8397 snprintf(name, C_MAX_NAME, "%s%d",
8400 memcpy(p, name, strlen(name));
8405 memcpy(p, port_cntrs[i].name,
8406 strlen(port_cntrs[i].name));
8407 p += strlen(port_cntrs[i].name);
8412 /* allocate per port storage for counter values */
8413 ppd = (struct hfi1_pportdata *)(dd + 1);
8414 for (i = 0; i < dd->num_pports; i++, ppd++) {
8415 ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8419 ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8424 /* CPU counters need to be allocated and zeroed */
8425 if (init_cpu_counters(dd))
8428 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8436 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8438 switch (chip_lstate) {
8441 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8445 return IB_PORT_DOWN;
8447 return IB_PORT_INIT;
8449 return IB_PORT_ARMED;
8451 return IB_PORT_ACTIVE;
8455 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8457 /* look at the HFI meta-states only */
8458 switch (chip_pstate & 0xf0) {
8460 dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8464 return IB_PORTPHYSSTATE_DISABLED;
8466 return OPA_PORTPHYSSTATE_OFFLINE;
8468 return IB_PORTPHYSSTATE_POLLING;
8470 return IB_PORTPHYSSTATE_TRAINING;
8472 return IB_PORTPHYSSTATE_LINKUP;
8474 return IB_PORTPHYSSTATE_PHY_TEST;
8478 /* return the OPA port logical state name */
8479 const char *opa_lstate_name(u32 lstate)
8481 static const char * const port_logical_names[] = {
8487 "PORT_ACTIVE_DEFER",
8489 if (lstate < ARRAY_SIZE(port_logical_names))
8490 return port_logical_names[lstate];
8494 /* return the OPA port physical state name */
8495 const char *opa_pstate_name(u32 pstate)
8497 static const char * const port_physical_names[] = {
8504 "PHYS_LINK_ERR_RECOVER",
8511 if (pstate < ARRAY_SIZE(port_physical_names))
8512 return port_physical_names[pstate];
8517 * Read the hardware link state and set the driver's cached value of it.
8518 * Return the (new) current value.
8520 u32 get_logical_state(struct hfi1_pportdata *ppd)
8524 new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8525 if (new_state != ppd->lstate) {
8526 dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8527 opa_lstate_name(new_state), new_state);
8528 ppd->lstate = new_state;
8531 * Set port status flags in the page mapped into userspace
8532 * memory. Do it here to ensure a reliable state - this is
8533 * the only function called by all state handling code.
8534 * Always set the flags due to the fact that the cache value
8535 * might have been changed explicitly outside of this
8539 switch (ppd->lstate) {
8542 *ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8543 HFI1_STATUS_IB_READY);
8546 *ppd->statusp |= HFI1_STATUS_IB_CONF;
8548 case IB_PORT_ACTIVE:
8549 *ppd->statusp |= HFI1_STATUS_IB_READY;
8557 * wait_logical_linkstate - wait for an IB link state change to occur
8559 * @state: the state to wait for
8560 * @msecs: the number of milliseconds to wait
8562 * Wait up to msecs milliseconds for IB link state change to occur.
8563 * For now, take the easy polling route.
8564 * Returns 0 if state reached, otherwise -ETIMEDOUT.
8566 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8569 unsigned long timeout;
8571 timeout = jiffies + msecs_to_jiffies(msecs);
8573 if (get_logical_state(ppd) == state)
8575 if (time_after(jiffies, timeout))
8579 dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8584 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8586 static u32 remembered_state = 0xff;
8590 pstate = read_physical_state(ppd->dd);
8591 ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8592 if (remembered_state != ib_pstate) {
8593 dd_dev_info(ppd->dd,
8594 "%s: physical state changed to %s (0x%x), phy 0x%x\n",
8595 __func__, opa_pstate_name(ib_pstate), ib_pstate,
8597 remembered_state = ib_pstate;
8603 * Read/modify/write ASIC_QSFP register bits as selected by mask
8604 * data: 0 or 1 in the positions depending on what needs to be written
8605 * dir: 0 for read, 1 for write
8606 * mask: select by setting
8610 u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8613 u64 qsfp_oe, target_oe;
8615 target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8617 /* We are writing register bits, so lock access */
8621 qsfp_oe = read_csr(dd, target_oe);
8622 qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8623 write_csr(dd, target_oe, qsfp_oe);
8625 /* We are exclusively reading bits here, but it is unlikely
8626 * we'll get valid data when we set the direction of the pin
8627 * in the same call, so read should call this function again
8630 return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8633 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8634 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8636 #define SET_STATIC_RATE_CONTROL_SMASK(r) \
8637 (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8639 int hfi1_init_ctxt(struct send_context *sc)
8642 struct hfi1_devdata *dd = sc->dd;
8644 u8 set = (sc->type == SC_USER ?
8645 HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8646 HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8647 reg = read_kctxt_csr(dd, sc->hw_context,
8648 SEND_CTXT_CHECK_ENABLE);
8650 CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8652 SET_STATIC_RATE_CONTROL_SMASK(reg);
8653 write_kctxt_csr(dd, sc->hw_context,
8654 SEND_CTXT_CHECK_ENABLE, reg);
8659 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8664 if (dd->icode != ICODE_RTL_SILICON) {
8665 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8666 dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8670 reg = read_csr(dd, ASIC_STS_THERM);
8671 temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8672 ASIC_STS_THERM_CURR_TEMP_MASK);
8673 temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8674 ASIC_STS_THERM_LO_TEMP_MASK);
8675 temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8676 ASIC_STS_THERM_HI_TEMP_MASK);
8677 temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8678 ASIC_STS_THERM_CRIT_TEMP_MASK);
8679 /* triggers is a 3-bit value - 1 bit per trigger. */
8680 temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8685 /* ========================================================================= */
8688 * Enable/disable chip from delivering interrupts.
8690 void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8695 * In HFI, the mask needs to be 1 to allow interrupts.
8699 const int qsfp1_int_smask = QSFP1_INT % 64;
8700 const int qsfp2_int_smask = QSFP2_INT % 64;
8702 /* enable all interrupts */
8703 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8704 write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8707 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8708 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8709 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8710 * the index of the appropriate CSR in the CCEIntMask CSR array
8712 cce_int_mask = read_csr(dd, CCE_INT_MASK +
8713 (8*(QSFP1_INT/64)));
8715 cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8716 write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8719 cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8720 write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8724 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8725 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8730 * Clear all interrupt sources on the chip.
8732 static void clear_all_interrupts(struct hfi1_devdata *dd)
8736 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8737 write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8739 write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8740 write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8741 write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8742 write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8743 write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8744 write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8745 write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8746 for (i = 0; i < dd->chip_send_contexts; i++)
8747 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8748 for (i = 0; i < dd->chip_sdma_engines; i++)
8749 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8751 write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8752 write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8753 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8756 /* Move to pcie.c? */
8757 static void disable_intx(struct pci_dev *pdev)
8762 static void clean_up_interrupts(struct hfi1_devdata *dd)
8766 /* remove irqs - must happen before disabling/turning off */
8767 if (dd->num_msix_entries) {
8769 struct hfi1_msix_entry *me = dd->msix_entries;
8771 for (i = 0; i < dd->num_msix_entries; i++, me++) {
8772 if (me->arg == NULL) /* => no irq, no affinity */
8774 irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8776 free_irq(me->msix.vector, me->arg);
8780 if (dd->requested_intx_irq) {
8781 free_irq(dd->pcidev->irq, dd);
8782 dd->requested_intx_irq = 0;
8786 /* turn off interrupts */
8787 if (dd->num_msix_entries) {
8792 disable_intx(dd->pcidev);
8795 /* clean structures */
8796 for (i = 0; i < dd->num_msix_entries; i++)
8797 free_cpumask_var(dd->msix_entries[i].mask);
8798 kfree(dd->msix_entries);
8799 dd->msix_entries = NULL;
8800 dd->num_msix_entries = 0;
8804 * Remap the interrupt source from the general handler to the given MSI-X
8807 static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8812 /* clear from the handled mask of the general interrupt */
8815 dd->gi_mask[m] &= ~((u64)1 << n);
8817 /* direct the chip source to the given MSI-X interrupt */
8820 reg = read_csr(dd, CCE_INT_MAP + (8*m));
8821 reg &= ~((u64)0xff << (8*n));
8822 reg |= ((u64)msix_intr & 0xff) << (8*n);
8823 write_csr(dd, CCE_INT_MAP + (8*m), reg);
8826 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8827 int engine, int msix_intr)
8830 * SDMA engine interrupt sources grouped by type, rather than
8831 * engine. Per-engine interrupts are as follows:
8836 remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8838 remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8840 remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8844 static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8845 int rx, int msix_intr)
8847 remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8850 static int request_intx_irq(struct hfi1_devdata *dd)
8854 snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8856 ret = request_irq(dd->pcidev->irq, general_interrupt,
8857 IRQF_SHARED, dd->intx_name, dd);
8859 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8862 dd->requested_intx_irq = 1;
8866 static int request_msix_irqs(struct hfi1_devdata *dd)
8868 const struct cpumask *local_mask;
8869 cpumask_var_t def, rcv;
8870 bool def_ret, rcv_ret;
8871 int first_general, last_general;
8872 int first_sdma, last_sdma;
8873 int first_rx, last_rx;
8874 int first_cpu, restart_cpu, curr_cpu;
8875 int rcv_cpu, sdma_cpu;
8876 int i, ret = 0, possible;
8879 /* calculate the ranges we are going to use */
8881 first_sdma = last_general = first_general + 1;
8882 first_rx = last_sdma = first_sdma + dd->num_sdma;
8883 last_rx = first_rx + dd->n_krcv_queues;
8886 * Interrupt affinity.
8888 * non-rcv avail gets a default mask that
8889 * starts as possible cpus with threads reset
8890 * and each rcv avail reset.
8892 * rcv avail gets node relative 1 wrapping back
8893 * to the node relative 1 as necessary.
8896 local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8897 /* if first cpu is invalid, use NUMA 0 */
8898 if (cpumask_first(local_mask) >= nr_cpu_ids)
8899 local_mask = topology_core_cpumask(0);
8901 def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8902 rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8903 if (!def_ret || !rcv_ret)
8905 /* use local mask as default */
8906 cpumask_copy(def, local_mask);
8907 possible = cpumask_weight(def);
8908 /* disarm threads from default */
8909 ht = cpumask_weight(
8910 topology_sibling_cpumask(cpumask_first(local_mask)));
8911 for (i = possible/ht; i < possible; i++)
8912 cpumask_clear_cpu(i, def);
8913 /* reset possible */
8914 possible = cpumask_weight(def);
8915 /* def now has full cores on chosen node*/
8916 first_cpu = cpumask_first(def);
8917 if (nr_cpu_ids >= first_cpu)
8919 restart_cpu = first_cpu;
8920 curr_cpu = restart_cpu;
8922 for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8923 cpumask_clear_cpu(curr_cpu, def);
8924 cpumask_set_cpu(curr_cpu, rcv);
8925 if (curr_cpu >= possible)
8926 curr_cpu = restart_cpu;
8930 /* def mask has non-rcv, rcv has recv mask */
8931 rcv_cpu = cpumask_first(rcv);
8932 sdma_cpu = cpumask_first(def);
8935 * Sanity check - the code expects all SDMA chip source
8936 * interrupts to be in the same CSR, starting at bit 0. Verify
8937 * that this is true by checking the bit location of the start.
8939 BUILD_BUG_ON(IS_SDMA_START % 64);
8941 for (i = 0; i < dd->num_msix_entries; i++) {
8942 struct hfi1_msix_entry *me = &dd->msix_entries[i];
8943 const char *err_info;
8944 irq_handler_t handler;
8945 irq_handler_t thread = NULL;
8948 struct hfi1_ctxtdata *rcd = NULL;
8949 struct sdma_engine *sde = NULL;
8951 /* obtain the arguments to request_irq */
8952 if (first_general <= i && i < last_general) {
8953 idx = i - first_general;
8954 handler = general_interrupt;
8956 snprintf(me->name, sizeof(me->name),
8957 DRIVER_NAME"_%d", dd->unit);
8958 err_info = "general";
8959 } else if (first_sdma <= i && i < last_sdma) {
8960 idx = i - first_sdma;
8961 sde = &dd->per_sdma[idx];
8962 handler = sdma_interrupt;
8964 snprintf(me->name, sizeof(me->name),
8965 DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8967 remap_sdma_interrupts(dd, idx, i);
8968 } else if (first_rx <= i && i < last_rx) {
8971 /* no interrupt if no rcd */
8975 * Set the interrupt register and mask for this
8976 * context's interrupt.
8978 rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8979 rcd->imask = ((u64)1) <<
8980 ((IS_RCVAVAIL_START+idx) % 64);
8981 handler = receive_context_interrupt;
8982 thread = receive_context_thread;
8984 snprintf(me->name, sizeof(me->name),
8985 DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8986 err_info = "receive context";
8987 remap_receive_available_interrupt(dd, idx, i);
8989 /* not in our expected range - complain, then
8992 "Unexpected extra MSI-X interrupt %d\n", i);
8995 /* no argument, no interrupt */
8998 /* make sure the name is terminated */
8999 me->name[sizeof(me->name)-1] = 0;
9001 ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9005 "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9006 err_info, me->msix.vector, idx, ret);
9010 * assign arg after request_irq call, so it will be
9015 if (!zalloc_cpumask_var(
9016 &dd->msix_entries[i].mask,
9019 if (handler == sdma_interrupt) {
9020 dd_dev_info(dd, "sdma engine %d cpu %d\n",
9021 sde->this_idx, sdma_cpu);
9022 cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9023 sdma_cpu = cpumask_next(sdma_cpu, def);
9024 if (sdma_cpu >= nr_cpu_ids)
9025 sdma_cpu = cpumask_first(def);
9026 } else if (handler == receive_context_interrupt) {
9027 dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
9028 rcd->ctxt, rcv_cpu);
9029 cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
9030 rcv_cpu = cpumask_next(rcv_cpu, rcv);
9031 if (rcv_cpu >= nr_cpu_ids)
9032 rcv_cpu = cpumask_first(rcv);
9034 /* otherwise first def */
9035 dd_dev_info(dd, "%s cpu %d\n",
9036 err_info, cpumask_first(def));
9038 cpumask_first(def), dd->msix_entries[i].mask);
9040 irq_set_affinity_hint(
9041 dd->msix_entries[i].msix.vector,
9042 dd->msix_entries[i].mask);
9046 free_cpumask_var(def);
9047 free_cpumask_var(rcv);
9055 * Set the general handler to accept all interrupts, remap all
9056 * chip interrupts back to MSI-X 0.
9058 static void reset_interrupts(struct hfi1_devdata *dd)
9062 /* all interrupts handled by the general handler */
9063 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9064 dd->gi_mask[i] = ~(u64)0;
9066 /* all chip interrupts map to MSI-X 0 */
9067 for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9068 write_csr(dd, CCE_INT_MAP + (8*i), 0);
9071 static int set_up_interrupts(struct hfi1_devdata *dd)
9073 struct hfi1_msix_entry *entries;
9076 int single_interrupt = 0; /* we expect to have all the interrupts */
9080 * 1 general, "slow path" interrupt (includes the SDMA engines
9081 * slow source, SDMACleanupDone)
9082 * N interrupts - one per used SDMA engine
9083 * M interrupt - one per kernel receive context
9085 total = 1 + dd->num_sdma + dd->n_krcv_queues;
9087 entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9092 /* 1-1 MSI-X entry assignment */
9093 for (i = 0; i < total; i++)
9094 entries[i].msix.entry = i;
9096 /* ask for MSI-X interrupts */
9098 request_msix(dd, &request, entries);
9102 /* dd->num_msix_entries already zero */
9104 single_interrupt = 1;
9105 dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9108 dd->num_msix_entries = request;
9109 dd->msix_entries = entries;
9111 if (request != total) {
9112 /* using MSI-X, with reduced interrupts */
9115 "cannot handle reduced interrupt case, want %u, got %u\n",
9120 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9123 /* mask all interrupts */
9124 set_intr_state(dd, 0);
9125 /* clear all pending interrupts */
9126 clear_all_interrupts(dd);
9128 /* reset general handler mask, chip MSI-X mappings */
9129 reset_interrupts(dd);
9131 if (single_interrupt)
9132 ret = request_intx_irq(dd);
9134 ret = request_msix_irqs(dd);
9141 clean_up_interrupts(dd);
9146 * Set up context values in dd. Sets:
9148 * num_rcv_contexts - number of contexts being used
9149 * n_krcv_queues - number of kernel contexts
9150 * first_user_ctxt - first non-kernel context in array of contexts
9151 * freectxts - number of free user contexts
9152 * num_send_contexts - number of PIO send contexts being used
9154 static int set_up_context_variables(struct hfi1_devdata *dd)
9156 int num_kernel_contexts;
9157 int num_user_contexts;
9163 * Kernel contexts: (to be fixed later):
9164 * - min or 2 or 1 context/numa
9165 * - Context 0 - default/errors
9166 * - Context 1 - VL15
9169 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9171 num_kernel_contexts = num_online_nodes();
9172 num_kernel_contexts =
9173 max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9175 * Every kernel receive context needs an ACK send context.
9176 * one send context is allocated for each VL{0-7} and VL15
9178 if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9180 "Reducing # kernel rcv contexts to: %d, from %d\n",
9181 (int)(dd->chip_send_contexts - num_vls - 1),
9182 (int)num_kernel_contexts);
9183 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9186 * User contexts: (to be fixed later)
9187 * - set to num_rcv_contexts if non-zero
9188 * - default to 1 user context per CPU
9190 if (num_rcv_contexts)
9191 num_user_contexts = num_rcv_contexts;
9193 num_user_contexts = num_online_cpus();
9195 total_contexts = num_kernel_contexts + num_user_contexts;
9198 * Adjust the counts given a global max.
9200 if (total_contexts > dd->chip_rcv_contexts) {
9202 "Reducing # user receive contexts to: %d, from %d\n",
9203 (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9204 (int)num_user_contexts);
9205 num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9207 total_contexts = num_kernel_contexts + num_user_contexts;
9210 /* the first N are kernel contexts, the rest are user contexts */
9211 dd->num_rcv_contexts = total_contexts;
9212 dd->n_krcv_queues = num_kernel_contexts;
9213 dd->first_user_ctxt = num_kernel_contexts;
9214 dd->freectxts = num_user_contexts;
9216 "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9217 (int)dd->chip_rcv_contexts,
9218 (int)dd->num_rcv_contexts,
9219 (int)dd->n_krcv_queues,
9220 (int)dd->num_rcv_contexts - dd->n_krcv_queues);
9223 * Receive array allocation:
9224 * All RcvArray entries are divided into groups of 8. This
9225 * is required by the hardware and will speed up writes to
9226 * consecutive entries by using write-combining of the entire
9229 * The number of groups are evenly divided among all contexts.
9230 * any left over groups will be given to the first N user
9233 dd->rcv_entries.group_size = RCV_INCREMENT;
9234 ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9235 dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9236 dd->rcv_entries.nctxt_extra = ngroups -
9237 (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9238 dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9239 dd->rcv_entries.ngroups,
9240 dd->rcv_entries.nctxt_extra);
9241 if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9242 MAX_EAGER_ENTRIES * 2) {
9243 dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9244 dd->rcv_entries.group_size;
9246 "RcvArray group count too high, change to %u\n",
9247 dd->rcv_entries.ngroups);
9248 dd->rcv_entries.nctxt_extra = 0;
9253 ret = init_sc_pools_and_sizes(dd);
9254 if (ret >= 0) { /* success */
9255 dd->num_send_contexts = ret;
9258 "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9259 dd->chip_send_contexts,
9260 dd->num_send_contexts,
9261 dd->sc_sizes[SC_KERNEL].count,
9262 dd->sc_sizes[SC_ACK].count,
9263 dd->sc_sizes[SC_USER].count);
9264 ret = 0; /* success */
9271 * Set the device/port partition key table. The MAD code
9272 * will ensure that, at least, the partial management
9273 * partition key is present in the table.
9275 static void set_partition_keys(struct hfi1_pportdata *ppd)
9277 struct hfi1_devdata *dd = ppd->dd;
9281 dd_dev_info(dd, "Setting partition keys\n");
9282 for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9283 reg |= (ppd->pkeys[i] &
9284 RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9286 RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9287 /* Each register holds 4 PKey values. */
9289 write_csr(dd, RCV_PARTITION_KEY +
9290 ((i - 3) * 2), reg);
9295 /* Always enable HW pkeys check when pkeys table is set */
9296 add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9300 * These CSRs and memories are uninitialized on reset and must be
9301 * written before reading to set the ECC/parity bits.
9303 * NOTE: All user context CSRs that are not mmaped write-only
9304 * (e.g. the TID flows) must be initialized even if the driver never
9307 static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9312 for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9313 write_csr(dd, CCE_INT_MAP+(8*i), 0);
9315 /* SendCtxtCreditReturnAddr */
9316 for (i = 0; i < dd->chip_send_contexts; i++)
9317 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9319 /* PIO Send buffers */
9320 /* SDMA Send buffers */
9321 /* These are not normally read, and (presently) have no method
9322 to be read, so are not pre-initialized */
9325 /* RcvHdrTailAddr */
9326 /* RcvTidFlowTable */
9327 for (i = 0; i < dd->chip_rcv_contexts; i++) {
9328 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9329 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9330 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9331 write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9335 for (i = 0; i < dd->chip_rcv_array_count; i++)
9336 write_csr(dd, RCV_ARRAY + (8*i),
9337 RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9340 for (i = 0; i < 32; i++)
9341 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9345 * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9347 static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9350 unsigned long timeout;
9353 /* is the condition present? */
9354 reg = read_csr(dd, CCE_STATUS);
9355 if ((reg & status_bits) == 0)
9358 /* clear the condition */
9359 write_csr(dd, CCE_CTRL, ctrl_bits);
9361 /* wait for the condition to clear */
9362 timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9364 reg = read_csr(dd, CCE_STATUS);
9365 if ((reg & status_bits) == 0)
9367 if (time_after(jiffies, timeout)) {
9369 "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9370 status_bits, reg & status_bits);
9377 /* set CCE CSRs to chip reset defaults */
9378 static void reset_cce_csrs(struct hfi1_devdata *dd)
9382 /* CCE_REVISION read-only */
9383 /* CCE_REVISION2 read-only */
9384 /* CCE_CTRL - bits clear automatically */
9385 /* CCE_STATUS read-only, use CceCtrl to clear */
9386 clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9387 clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9388 clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9389 for (i = 0; i < CCE_NUM_SCRATCH; i++)
9390 write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9391 /* CCE_ERR_STATUS read-only */
9392 write_csr(dd, CCE_ERR_MASK, 0);
9393 write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9394 /* CCE_ERR_FORCE leave alone */
9395 for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9396 write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9397 write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9398 /* CCE_PCIE_CTRL leave alone */
9399 for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9400 write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9401 write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9402 CCE_MSIX_TABLE_UPPER_RESETCSR);
9404 for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9405 /* CCE_MSIX_PBA read-only */
9406 write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9407 write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9409 for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9410 write_csr(dd, CCE_INT_MAP, 0);
9411 for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9412 /* CCE_INT_STATUS read-only */
9413 write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9414 write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9415 /* CCE_INT_FORCE leave alone */
9416 /* CCE_INT_BLOCKED read-only */
9418 for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9419 write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9422 /* set ASIC CSRs to chip reset defaults */
9423 static void reset_asic_csrs(struct hfi1_devdata *dd)
9428 * If the HFIs are shared between separate nodes or VMs,
9429 * then more will need to be done here. One idea is a module
9430 * parameter that returns early, letting the first power-on or
9431 * a known first load do the reset and blocking all others.
9434 if (!(dd->flags & HFI1_DO_INIT_ASIC))
9437 if (dd->icode != ICODE_FPGA_EMULATION) {
9438 /* emulation does not have an SBus - leave these alone */
9440 * All writes to ASIC_CFG_SBUS_REQUEST do something.
9442 * o The reset is not zero if aimed at the core. See the
9443 * SBus documentation for details.
9444 * o If the SBus firmware has been updated (e.g. by the BIOS),
9445 * will the reset revert that?
9447 /* ASIC_CFG_SBUS_REQUEST leave alone */
9448 write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9450 /* ASIC_SBUS_RESULT read-only */
9451 write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9452 for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9453 write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9454 write_csr(dd, ASIC_CFG_MUTEX, 0); /* this will clear it */
9456 /* We might want to retain this state across FLR if we ever use it */
9457 write_csr(dd, ASIC_CFG_DRV_STR, 0);
9459 write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9460 /* ASIC_STS_THERM read-only */
9461 /* ASIC_CFG_RESET leave alone */
9463 write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9464 /* ASIC_PCIE_SD_HOST_STATUS read-only */
9465 write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9466 write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9467 /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9468 write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9469 /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9470 /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9471 for (i = 0; i < 16; i++)
9472 write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9474 /* ASIC_GPIO_IN read-only */
9475 write_csr(dd, ASIC_GPIO_OE, 0);
9476 write_csr(dd, ASIC_GPIO_INVERT, 0);
9477 write_csr(dd, ASIC_GPIO_OUT, 0);
9478 write_csr(dd, ASIC_GPIO_MASK, 0);
9479 /* ASIC_GPIO_STATUS read-only */
9480 write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9481 /* ASIC_GPIO_FORCE leave alone */
9483 /* ASIC_QSFP1_IN read-only */
9484 write_csr(dd, ASIC_QSFP1_OE, 0);
9485 write_csr(dd, ASIC_QSFP1_INVERT, 0);
9486 write_csr(dd, ASIC_QSFP1_OUT, 0);
9487 write_csr(dd, ASIC_QSFP1_MASK, 0);
9488 /* ASIC_QSFP1_STATUS read-only */
9489 write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9490 /* ASIC_QSFP1_FORCE leave alone */
9492 /* ASIC_QSFP2_IN read-only */
9493 write_csr(dd, ASIC_QSFP2_OE, 0);
9494 write_csr(dd, ASIC_QSFP2_INVERT, 0);
9495 write_csr(dd, ASIC_QSFP2_OUT, 0);
9496 write_csr(dd, ASIC_QSFP2_MASK, 0);
9497 /* ASIC_QSFP2_STATUS read-only */
9498 write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9499 /* ASIC_QSFP2_FORCE leave alone */
9501 write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9502 /* this also writes a NOP command, clearing paging mode */
9503 write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9504 write_csr(dd, ASIC_EEP_DATA, 0);
9507 /* set MISC CSRs to chip reset defaults */
9508 static void reset_misc_csrs(struct hfi1_devdata *dd)
9512 for (i = 0; i < 32; i++) {
9513 write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9514 write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9515 write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9517 /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9518 only be written 128-byte chunks */
9519 /* init RSA engine to clear lingering errors */
9520 write_csr(dd, MISC_CFG_RSA_CMD, 1);
9521 write_csr(dd, MISC_CFG_RSA_MU, 0);
9522 write_csr(dd, MISC_CFG_FW_CTRL, 0);
9523 /* MISC_STS_8051_DIGEST read-only */
9524 /* MISC_STS_SBM_DIGEST read-only */
9525 /* MISC_STS_PCIE_DIGEST read-only */
9526 /* MISC_STS_FAB_DIGEST read-only */
9527 /* MISC_ERR_STATUS read-only */
9528 write_csr(dd, MISC_ERR_MASK, 0);
9529 write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9530 /* MISC_ERR_FORCE leave alone */
9533 /* set TXE CSRs to chip reset defaults */
9534 static void reset_txe_csrs(struct hfi1_devdata *dd)
9541 write_csr(dd, SEND_CTRL, 0);
9542 __cm_reset(dd, 0); /* reset CM internal state */
9543 /* SEND_CONTEXTS read-only */
9544 /* SEND_DMA_ENGINES read-only */
9545 /* SEND_PIO_MEM_SIZE read-only */
9546 /* SEND_DMA_MEM_SIZE read-only */
9547 write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9548 pio_reset_all(dd); /* SEND_PIO_INIT_CTXT */
9549 /* SEND_PIO_ERR_STATUS read-only */
9550 write_csr(dd, SEND_PIO_ERR_MASK, 0);
9551 write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9552 /* SEND_PIO_ERR_FORCE leave alone */
9553 /* SEND_DMA_ERR_STATUS read-only */
9554 write_csr(dd, SEND_DMA_ERR_MASK, 0);
9555 write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9556 /* SEND_DMA_ERR_FORCE leave alone */
9557 /* SEND_EGRESS_ERR_STATUS read-only */
9558 write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9559 write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9560 /* SEND_EGRESS_ERR_FORCE leave alone */
9561 write_csr(dd, SEND_BTH_QP, 0);
9562 write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9563 write_csr(dd, SEND_SC2VLT0, 0);
9564 write_csr(dd, SEND_SC2VLT1, 0);
9565 write_csr(dd, SEND_SC2VLT2, 0);
9566 write_csr(dd, SEND_SC2VLT3, 0);
9567 write_csr(dd, SEND_LEN_CHECK0, 0);
9568 write_csr(dd, SEND_LEN_CHECK1, 0);
9569 /* SEND_ERR_STATUS read-only */
9570 write_csr(dd, SEND_ERR_MASK, 0);
9571 write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9572 /* SEND_ERR_FORCE read-only */
9573 for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9574 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9575 for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9576 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9577 for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9578 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9579 for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9580 write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9581 for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9582 write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9583 write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9584 write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9585 SEND_CM_GLOBAL_CREDIT_RESETCSR);
9586 /* SEND_CM_CREDIT_USED_STATUS read-only */
9587 write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9588 write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9589 write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9590 write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9591 write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9592 for (i = 0; i < TXE_NUM_DATA_VL; i++)
9593 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9594 write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9595 /* SEND_CM_CREDIT_USED_VL read-only */
9596 /* SEND_CM_CREDIT_USED_VL15 read-only */
9597 /* SEND_EGRESS_CTXT_STATUS read-only */
9598 /* SEND_EGRESS_SEND_DMA_STATUS read-only */
9599 write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9600 /* SEND_EGRESS_ERR_INFO read-only */
9601 /* SEND_EGRESS_ERR_SOURCE read-only */
9604 * TXE Per-Context CSRs
9606 for (i = 0; i < dd->chip_send_contexts; i++) {
9607 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9608 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9609 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9610 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9611 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9612 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9613 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9614 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9615 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9616 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9617 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9618 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9624 for (i = 0; i < dd->chip_sdma_engines; i++) {
9625 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9626 /* SEND_DMA_STATUS read-only */
9627 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9628 write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9629 write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9630 /* SEND_DMA_HEAD read-only */
9631 write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9632 write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9633 /* SEND_DMA_IDLE_CNT read-only */
9634 write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9635 write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9636 /* SEND_DMA_DESC_FETCHED_CNT read-only */
9637 /* SEND_DMA_ENG_ERR_STATUS read-only */
9638 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9639 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9640 /* SEND_DMA_ENG_ERR_FORCE leave alone */
9641 write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9642 write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9643 write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9644 write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9645 write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9646 write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9647 write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9653 * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9655 static void init_rbufs(struct hfi1_devdata *dd)
9661 * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9666 reg = read_csr(dd, RCV_STATUS);
9667 if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9668 | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9671 * Give up after 1ms - maximum wait time.
9673 * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at
9674 * 250MB/s bandwidth. Lower rate to 66% for overhead to get:
9675 * 148 KB / (66% * 250MB/s) = 920us
9677 if (count++ > 500) {
9679 "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9683 udelay(2); /* do not busy-wait the CSR */
9686 /* start the init - expect RcvCtrl to be 0 */
9687 write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9690 * Read to force the write of Rcvtrl.RxRbufInit. There is a brief
9691 * period after the write before RcvStatus.RxRbufInitDone is valid.
9692 * The delay in the first run through the loop below is sufficient and
9693 * required before the first read of RcvStatus.RxRbufInintDone.
9695 read_csr(dd, RCV_CTRL);
9697 /* wait for the init to finish */
9700 /* delay is required first time through - see above */
9701 udelay(2); /* do not busy-wait the CSR */
9702 reg = read_csr(dd, RCV_STATUS);
9703 if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9706 /* give up after 100us - slowest possible at 33MHz is 73us */
9709 "%s: RcvStatus.RxRbufInit not set, continuing\n",
9716 /* set RXE CSRs to chip reset defaults */
9717 static void reset_rxe_csrs(struct hfi1_devdata *dd)
9724 write_csr(dd, RCV_CTRL, 0);
9726 /* RCV_STATUS read-only */
9727 /* RCV_CONTEXTS read-only */
9728 /* RCV_ARRAY_CNT read-only */
9729 /* RCV_BUF_SIZE read-only */
9730 write_csr(dd, RCV_BTH_QP, 0);
9731 write_csr(dd, RCV_MULTICAST, 0);
9732 write_csr(dd, RCV_BYPASS, 0);
9733 write_csr(dd, RCV_VL15, 0);
9734 /* this is a clear-down */
9735 write_csr(dd, RCV_ERR_INFO,
9736 RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9737 /* RCV_ERR_STATUS read-only */
9738 write_csr(dd, RCV_ERR_MASK, 0);
9739 write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9740 /* RCV_ERR_FORCE leave alone */
9741 for (i = 0; i < 32; i++)
9742 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9743 for (i = 0; i < 4; i++)
9744 write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9745 for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9746 write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9747 for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9748 write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9749 for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9750 write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9751 write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9752 write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9754 for (i = 0; i < 32; i++)
9755 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9758 * RXE Kernel and User Per-Context CSRs
9760 for (i = 0; i < dd->chip_rcv_contexts; i++) {
9762 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9763 /* RCV_CTXT_STATUS read-only */
9764 write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9765 write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9766 write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9767 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9768 write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9769 write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9770 write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9771 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9772 write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9773 write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9776 /* RCV_HDR_TAIL read-only */
9777 write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9778 /* RCV_EGR_INDEX_TAIL read-only */
9779 write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9780 /* RCV_EGR_OFFSET_TAIL read-only */
9781 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9782 write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9791 * They power on to zeros, so to avoid send context errors
9792 * they need to be set:
9794 * SC 0-7 -> VL 0-7 (respectively)
9799 static void init_sc2vl_tables(struct hfi1_devdata *dd)
9802 /* init per architecture spec, constrained by hardware capability */
9804 /* HFI maps sent packets */
9805 write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9811 write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9817 write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9823 write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9830 /* DC maps received packets */
9831 write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9833 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
9834 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9835 write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9837 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9838 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9840 /* initialize the cached sc2vl values consistently with h/w */
9841 for (i = 0; i < 32; i++) {
9842 if (i < 8 || i == 15)
9843 *((u8 *)(dd->sc2vl) + i) = (u8)i;
9845 *((u8 *)(dd->sc2vl) + i) = 0;
9850 * Read chip sizes and then reset parts to sane, disabled, values. We cannot
9851 * depend on the chip going through a power-on reset - a driver may be loaded
9852 * and unloaded many times.
9854 * Do not write any CSR values to the chip in this routine - there may be
9855 * a reset following the (possible) FLR in this routine.
9858 static void init_chip(struct hfi1_devdata *dd)
9863 * Put the HFI CSRs in a known state.
9864 * Combine this with a DC reset.
9866 * Stop the device from doing anything while we do a
9867 * reset. We know there are no other active users of
9868 * the device since we are now in charge. Turn off
9869 * off all outbound and inbound traffic and make sure
9870 * the device does not generate any interrupts.
9873 /* disable send contexts and SDMA engines */
9874 write_csr(dd, SEND_CTRL, 0);
9875 for (i = 0; i < dd->chip_send_contexts; i++)
9876 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9877 for (i = 0; i < dd->chip_sdma_engines; i++)
9878 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9879 /* disable port (turn off RXE inbound traffic) and contexts */
9880 write_csr(dd, RCV_CTRL, 0);
9881 for (i = 0; i < dd->chip_rcv_contexts; i++)
9882 write_csr(dd, RCV_CTXT_CTRL, 0);
9883 /* mask all interrupt sources */
9884 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9885 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9888 * DC Reset: do a full DC reset before the register clear.
9889 * A recommended length of time to hold is one CSR read,
9890 * so reread the CceDcCtrl. Then, hold the DC in reset
9893 write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9894 (void) read_csr(dd, CCE_DC_CTRL);
9898 * A FLR will reset the SPC core and part of the PCIe.
9899 * The parts that need to be restored have already been
9902 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9904 /* do the FLR, the DC reset will remain */
9907 /* restore command and BARs */
9908 restore_pci_variables(dd);
9911 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9913 restore_pci_variables(dd);
9916 reset_asic_csrs(dd);
9918 dd_dev_info(dd, "Resetting CSRs with writes\n");
9922 reset_asic_csrs(dd);
9923 reset_misc_csrs(dd);
9925 /* clear the DC reset */
9926 write_csr(dd, CCE_DC_CTRL, 0);
9928 /* Set the LED off */
9932 * Clear the QSFP reset.
9933 * A0 leaves the out lines floating on power on, then on an FLR
9934 * enforces a 0 on all out pins. The driver does not touch
9935 * ASIC_QSFPn_OUT otherwise. This leaves RESET_N low and
9936 * anything plugged constantly in reset, if it pays attention
9938 * A prime example of this is SiPh. For now, set all pins high.
9939 * I2CCLK and I2CDAT will change per direction, and INT_N and
9940 * MODPRS_N are input only and their value is ignored.
9943 write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9944 write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9948 static void init_early_variables(struct hfi1_devdata *dd)
9952 /* assign link credit variables */
9954 dd->link_credits = CM_GLOBAL_CREDITS;
9957 dd->vcu = cu_to_vcu(hfi1_cu);
9958 /* enough room for 8 MAD packets plus header - 17K */
9959 dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9960 if (dd->vl15_init > dd->link_credits)
9961 dd->vl15_init = dd->link_credits;
9963 write_uninitialized_csrs_and_memories(dd);
9965 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9966 for (i = 0; i < dd->num_pports; i++) {
9967 struct hfi1_pportdata *ppd = &dd->pport[i];
9969 set_partition_keys(ppd);
9971 init_sc2vl_tables(dd);
9974 static void init_kdeth_qp(struct hfi1_devdata *dd)
9976 /* user changed the KDETH_QP */
9977 if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9978 /* out of range or illegal value */
9979 dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9982 if (kdeth_qp == 0) /* not set, or failed range check */
9983 kdeth_qp = DEFAULT_KDETH_QP;
9985 write_csr(dd, SEND_BTH_QP,
9986 (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9987 << SEND_BTH_QP_KDETH_QP_SHIFT);
9989 write_csr(dd, RCV_BTH_QP,
9990 (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9991 << RCV_BTH_QP_KDETH_QP_SHIFT);
9997 * @first_ctxt - first context
9998 * @last_ctxt - first context
10000 * This return sets the qpn mapping table that
10001 * is indexed by qpn[8:1].
10003 * The routine will round robin the 256 settings
10004 * from first_ctxt to last_ctxt.
10006 * The first/last looks ahead to having specialized
10007 * receive contexts for mgmt and bypass. Normal
10008 * verbs traffic will assumed to be on a range
10009 * of receive contexts.
10011 static void init_qpmap_table(struct hfi1_devdata *dd,
10016 u64 regno = RCV_QP_MAP_TABLE;
10018 u64 ctxt = first_ctxt;
10020 for (i = 0; i < 256;) {
10021 if (ctxt == VL15CTXT) {
10023 if (ctxt > last_ctxt)
10027 reg |= ctxt << (8 * (i % 8));
10030 if (ctxt > last_ctxt)
10033 write_csr(dd, regno, reg);
10039 write_csr(dd, regno, reg);
10041 add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10042 | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10046 * init_qos - init RX qos
10047 * @dd - device data
10050 * This routine initializes Rule 0 and the
10051 * RSM map table to implement qos.
10053 * If all of the limit tests succeed,
10054 * qos is applied based on the array
10055 * interpretation of krcvqs where
10058 * The number of vl bits (n) and the number of qpn
10059 * bits (m) are computed to feed both the RSM map table
10060 * and the single rule.
10063 static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10066 unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10069 u8 rxcontext = is_a0(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */
10072 if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10076 for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10077 if (krcvqs[i] > max_by_vl)
10078 max_by_vl = krcvqs[i];
10079 if (max_by_vl > 32)
10081 qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10082 /* determine bits vl */
10083 n = ilog2(num_vls);
10084 /* determine bits for qpn */
10085 m = ilog2(qpns_per_vl);
10088 if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10090 rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10091 memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10092 /* init the local copy of the table */
10093 for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10096 for (qpn = 0, tctxt = ctxt;
10097 krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10098 unsigned idx, regoff, regidx;
10100 /* generate index <= 128 */
10101 idx = (qpn << n) ^ i;
10102 regoff = (idx % 8) * 8;
10104 reg = rsmmap[regidx];
10105 /* replace 0xff with context number */
10106 reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10108 reg |= (u64)(tctxt++) << regoff;
10109 rsmmap[regidx] = reg;
10110 if (tctxt == ctxt + krcvqs[i])
10115 /* flush cached copies to chip */
10116 for (i = 0; i < NUM_MAP_REGS; i++)
10117 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10119 write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10120 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10121 << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10122 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10123 write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10124 LRH_BTH_MATCH_OFFSET
10125 << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10126 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10127 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10128 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10129 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10130 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10131 write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10132 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10133 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10134 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10135 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10137 add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10139 /* map everything else (non-VL15) to context 0 */
10144 dd->qos_shift = n + 1;
10150 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10151 dd->n_krcv_queues - 1);
10154 static void init_rxe(struct hfi1_devdata *dd)
10156 /* enable all receive errors */
10157 write_csr(dd, RCV_ERR_MASK, ~0ull);
10158 /* setup QPN map table - start where VL15 context leaves off */
10161 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10163 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10164 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10165 * space, PciCfgCap2.MaxPayloadSize in HFI). There is only one
10166 * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10167 * Max_PayLoad_Size set to its minimum of 128.
10169 * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10170 * (64 bytes). Max_Payload_Size is possibly modified upward in
10171 * tune_pcie_caps() which is called after this routine.
10175 static void init_other(struct hfi1_devdata *dd)
10177 /* enable all CCE errors */
10178 write_csr(dd, CCE_ERR_MASK, ~0ull);
10179 /* enable *some* Misc errors */
10180 write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10181 /* enable all DC errors, except LCB */
10182 write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10183 write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10187 * Fill out the given AU table using the given CU. A CU is defined in terms
10188 * AUs. The table is a an encoding: given the index, how many AUs does that
10191 * NOTE: Assumes that the register layout is the same for the
10192 * local and remote tables.
10194 static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10195 u32 csr0to3, u32 csr4to7)
10197 write_csr(dd, csr0to3,
10199 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10201 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10203 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10205 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10206 write_csr(dd, csr4to7,
10208 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10210 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10212 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10214 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10218 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10220 assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10221 SEND_CM_LOCAL_AU_TABLE4_TO7);
10224 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10226 assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10227 SEND_CM_REMOTE_AU_TABLE4_TO7);
10230 static void init_txe(struct hfi1_devdata *dd)
10234 /* enable all PIO, SDMA, general, and Egress errors */
10235 write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10236 write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10237 write_csr(dd, SEND_ERR_MASK, ~0ull);
10238 write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10240 /* enable all per-context and per-SDMA engine errors */
10241 for (i = 0; i < dd->chip_send_contexts; i++)
10242 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10243 for (i = 0; i < dd->chip_sdma_engines; i++)
10244 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10246 /* set the local CU to AU mapping */
10247 assign_local_cm_au_table(dd, dd->vcu);
10250 * Set reasonable default for Credit Return Timer
10251 * Don't set on Simulator - causes it to choke.
10253 if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10254 write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10257 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10259 struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10264 if (!rcd || !rcd->sc) {
10268 sctxt = rcd->sc->hw_context;
10269 reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10270 ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10271 SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10272 /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10273 if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10274 reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10275 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10277 * Enable send-side J_KEY integrity check, unless this is A0 h/w
10278 * (due to A0 erratum).
10281 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10282 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10283 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10286 /* Enable J_KEY check on receive context. */
10287 reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10288 ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10289 RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10290 write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10295 int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10297 struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10302 if (!rcd || !rcd->sc) {
10306 sctxt = rcd->sc->hw_context;
10307 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10309 * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10310 * This check would not have been enabled for A0 h/w, see
10314 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10315 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10316 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10318 /* Turn off the J_KEY on the receive side */
10319 write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10324 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10326 struct hfi1_ctxtdata *rcd;
10331 if (ctxt < dd->num_rcv_contexts)
10332 rcd = dd->rcd[ctxt];
10337 if (!rcd || !rcd->sc) {
10341 sctxt = rcd->sc->hw_context;
10342 reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10343 SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10344 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10345 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10346 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10347 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10352 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10354 struct hfi1_ctxtdata *rcd;
10359 if (ctxt < dd->num_rcv_contexts)
10360 rcd = dd->rcd[ctxt];
10365 if (!rcd || !rcd->sc) {
10369 sctxt = rcd->sc->hw_context;
10370 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10371 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10372 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10373 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10379 * Start doing the clean up the the chip. Our clean up happens in multiple
10380 * stages and this is just the first.
10382 void hfi1_start_cleanup(struct hfi1_devdata *dd)
10386 clean_up_interrupts(dd);
10389 #define HFI_BASE_GUID(dev) \
10390 ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10393 * Certain chip functions need to be initialized only once per asic
10394 * instead of per-device. This function finds the peer device and
10395 * checks whether that chip initialization needs to be done by this
10398 static void asic_should_init(struct hfi1_devdata *dd)
10400 unsigned long flags;
10401 struct hfi1_devdata *tmp, *peer = NULL;
10403 spin_lock_irqsave(&hfi1_devs_lock, flags);
10404 /* Find our peer device */
10405 list_for_each_entry(tmp, &hfi1_dev_list, list) {
10406 if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10407 dd->unit != tmp->unit) {
10414 * "Claim" the ASIC for initialization if it hasn't been
10417 if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10418 dd->flags |= HFI1_DO_INIT_ASIC;
10419 spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10423 * Allocate and initialize the device structure for the hfi.
10424 * @dev: the pci_dev for hfi1_ib device
10425 * @ent: pci_device_id struct for this dev
10427 * Also allocates, initializes, and returns the devdata struct for this
10430 * This is global, and is called directly at init to set up the
10431 * chip-specific function pointers for later use.
10433 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10434 const struct pci_device_id *ent)
10436 struct hfi1_devdata *dd;
10437 struct hfi1_pportdata *ppd;
10440 static const char * const inames[] = { /* implementation names */
10442 "RTL VCS simulation",
10443 "RTL FPGA emulation",
10444 "Functional simulator"
10447 dd = hfi1_alloc_devdata(pdev,
10448 NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10452 for (i = 0; i < dd->num_pports; i++, ppd++) {
10454 /* init common fields */
10455 hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10456 /* DC supports 4 link widths */
10457 ppd->link_width_supported =
10458 OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10459 OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10460 ppd->link_width_downgrade_supported =
10461 ppd->link_width_supported;
10462 /* start out enabling only 4X */
10463 ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10464 ppd->link_width_downgrade_enabled =
10465 ppd->link_width_downgrade_supported;
10466 /* link width active is 0 when link is down */
10467 /* link width downgrade active is 0 when link is down */
10469 if (num_vls < HFI1_MIN_VLS_SUPPORTED
10470 || num_vls > HFI1_MAX_VLS_SUPPORTED) {
10471 hfi1_early_err(&pdev->dev,
10472 "Invalid num_vls %u, using %u VLs\n",
10473 num_vls, HFI1_MAX_VLS_SUPPORTED);
10474 num_vls = HFI1_MAX_VLS_SUPPORTED;
10476 ppd->vls_supported = num_vls;
10477 ppd->vls_operational = ppd->vls_supported;
10478 /* Set the default MTU. */
10479 for (vl = 0; vl < num_vls; vl++)
10480 dd->vld[vl].mtu = hfi1_max_mtu;
10481 dd->vld[15].mtu = MAX_MAD_PACKET;
10483 * Set the initial values to reasonable default, will be set
10484 * for real when link is up.
10486 ppd->lstate = IB_PORT_DOWN;
10487 ppd->overrun_threshold = 0x4;
10488 ppd->phy_error_threshold = 0xf;
10489 ppd->port_crc_mode_enabled = link_crc_mask;
10490 /* initialize supported LTP CRC mode */
10491 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10492 /* initialize enabled LTP CRC mode */
10493 ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10494 /* start in offline */
10495 ppd->host_link_state = HLS_DN_OFFLINE;
10496 init_vl_arb_caches(ppd);
10499 dd->link_default = HLS_DN_POLL;
10502 * Do remaining PCIe setup and save PCIe values in dd.
10503 * Any error printing is already done by the init code.
10504 * On return, we have the chip mapped.
10506 ret = hfi1_pcie_ddinit(dd, pdev, ent);
10510 /* verify that reads actually work, save revision for reset check */
10511 dd->revision = read_csr(dd, CCE_REVISION);
10512 if (dd->revision == ~(u64)0) {
10513 dd_dev_err(dd, "cannot read chip CSRs\n");
10517 dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10518 & CCE_REVISION_CHIP_REV_MAJOR_MASK;
10519 dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10520 & CCE_REVISION_CHIP_REV_MINOR_MASK;
10522 /* obtain the hardware ID - NOT related to unit, which is a
10523 software enumeration */
10524 reg = read_csr(dd, CCE_REVISION2);
10525 dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10526 & CCE_REVISION2_HFI_ID_MASK;
10527 /* the variable size will remove unwanted bits */
10528 dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10529 dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10530 dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10531 dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10534 /* speeds the hardware can support */
10535 dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10536 /* speeds allowed to run at */
10537 dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10538 /* give a reasonable active value, will be set on link up */
10539 dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10541 dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10542 dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10543 dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10544 dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10545 dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10546 /* fix up link widths for emulation _p */
10548 if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10549 ppd->link_width_supported =
10550 ppd->link_width_enabled =
10551 ppd->link_width_downgrade_supported =
10552 ppd->link_width_downgrade_enabled =
10555 /* insure num_vls isn't larger than number of sdma engines */
10556 if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10557 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10558 num_vls, HFI1_MAX_VLS_SUPPORTED);
10559 ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10560 ppd->vls_operational = ppd->vls_supported;
10564 * Convert the ns parameter to the 64 * cclocks used in the CSR.
10565 * Limit the max if larger than the field holds. If timeout is
10566 * non-zero, then the calculated field will be at least 1.
10568 * Must be after icode is set up - the cclock rate depends
10569 * on knowing the hardware being used.
10571 dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10572 if (dd->rcv_intr_timeout_csr >
10573 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10574 dd->rcv_intr_timeout_csr =
10575 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10576 else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10577 dd->rcv_intr_timeout_csr = 1;
10579 /* needs to be done before we look for the peer device */
10582 /* should this device init the ASIC block? */
10583 asic_should_init(dd);
10585 /* obtain chip sizes, reset chip CSRs */
10588 /* read in the PCIe link speed information */
10589 ret = pcie_speeds(dd);
10593 /* read in firmware */
10594 ret = hfi1_firmware_init(dd);
10599 * In general, the PCIe Gen3 transition must occur after the
10600 * chip has been idled (so it won't initiate any PCIe transactions
10601 * e.g. an interrupt) and before the driver changes any registers
10602 * (the transition will reset the registers).
10604 * In particular, place this call after:
10605 * - init_chip() - the chip will not initiate any PCIe transactions
10606 * - pcie_speeds() - reads the current link speed
10607 * - hfi1_firmware_init() - the needed firmware is ready to be
10610 ret = do_pcie_gen3_transition(dd);
10614 /* start setting dd values and adjusting CSRs */
10615 init_early_variables(dd);
10617 parse_platform_config(dd);
10619 /* add board names as they are defined */
10620 dd->boardname = kmalloc(64, GFP_KERNEL);
10621 if (!dd->boardname)
10623 snprintf(dd->boardname, 64, "Board ID 0x%llx",
10624 dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10625 & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10627 snprintf(dd->boardversion, BOARD_VERS_MAX,
10628 "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10629 HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10633 (dd->revision >> CCE_REVISION_SW_SHIFT)
10634 & CCE_REVISION_SW_MASK);
10636 ret = set_up_context_variables(dd);
10640 /* set initial RXE CSRs */
10642 /* set initial TXE CSRs */
10644 /* set initial non-RXE, non-TXE CSRs */
10646 /* set up KDETH QP prefix in both RX and TX CSRs */
10649 /* send contexts must be set up before receive contexts */
10650 ret = init_send_contexts(dd);
10654 ret = hfi1_create_ctxts(dd);
10658 dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10660 * rcd[0] is guaranteed to be valid by this point. Also, all
10661 * context are using the same value, as per the module parameter.
10663 dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10665 ret = init_pervl_scs(dd);
10670 for (i = 0; i < dd->num_pports; ++i) {
10671 ret = sdma_init(dd, i);
10676 /* use contexts created by hfi1_create_ctxts */
10677 ret = set_up_interrupts(dd);
10681 /* set up LCB access - must be after set_up_interrupts() */
10682 init_lcb_access(dd);
10684 snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10685 dd->base_guid & 0xFFFFFF);
10687 dd->oui1 = dd->base_guid >> 56 & 0xFF;
10688 dd->oui2 = dd->base_guid >> 48 & 0xFF;
10689 dd->oui3 = dd->base_guid >> 40 & 0xFF;
10691 ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10693 goto bail_clear_intr;
10694 check_fabric_firmware_versions(dd);
10698 ret = init_cntrs(dd);
10700 goto bail_clear_intr;
10702 ret = init_rcverr(dd);
10704 goto bail_free_cntrs;
10706 ret = eprom_init(dd);
10708 goto bail_free_rcverr;
10717 clean_up_interrupts(dd);
10719 hfi1_pcie_ddcleanup(dd);
10721 hfi1_free_devdata(dd);
10727 static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10731 u32 current_egress_rate = ppd->current_egress_rate;
10732 /* rates here are in units of 10^6 bits/sec */
10734 if (desired_egress_rate == -1)
10735 return 0; /* shouldn't happen */
10737 if (desired_egress_rate >= current_egress_rate)
10738 return 0; /* we can't help go faster, only slower */
10740 delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10741 egress_cycles(dw_len * 4, current_egress_rate);
10743 return (u16)delta_cycles;
10748 * create_pbc - build a pbc for transmission
10749 * @flags: special case flags or-ed in built pbc
10750 * @srate: static rate
10752 * @dwlen: dword length (header words + data words + pbc words)
10754 * Create a PBC with the given flags, rate, VL, and length.
10756 * NOTE: The PBC created will not insert any HCRC - all callers but one are
10757 * for verbs, which does not use this PSM feature. The lone other caller
10758 * is for the diagnostic interface which calls this if the user does not
10759 * supply their own PBC.
10761 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10764 u64 pbc, delay = 0;
10766 if (unlikely(srate_mbs))
10767 delay = delay_cycles(ppd, srate_mbs, dw_len);
10770 | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10771 | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10772 | (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10773 | (dw_len & PBC_LENGTH_DWS_MASK)
10774 << PBC_LENGTH_DWS_SHIFT;
10779 #define SBUS_THERMAL 0x4f
10780 #define SBUS_THERM_MONITOR_MODE 0x1
10782 #define THERM_FAILURE(dev, ret, reason) \
10784 "Thermal sensor initialization failed: %s (%d)\n", \
10788 * Initialize the Avago Thermal sensor.
10790 * After initialization, enable polling of thermal sensor through
10791 * SBus interface. In order for this to work, the SBus Master
10792 * firmware has to be loaded due to the fact that the HW polling
10793 * logic uses SBus interrupts, which are not supported with
10794 * default firmware. Otherwise, no data will be returned through
10795 * the ASIC_STS_THERM CSR.
10797 static int thermal_init(struct hfi1_devdata *dd)
10801 if (dd->icode != ICODE_RTL_SILICON ||
10802 !(dd->flags & HFI1_DO_INIT_ASIC))
10805 acquire_hw_mutex(dd);
10806 dd_dev_info(dd, "Initializing thermal sensor\n");
10808 /* Thermal Sensor Initialization */
10809 /* Step 1: Reset the Thermal SBus Receiver */
10810 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10811 RESET_SBUS_RECEIVER, 0);
10813 THERM_FAILURE(dd, ret, "Bus Reset");
10816 /* Step 2: Set Reset bit in Thermal block */
10817 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10818 WRITE_SBUS_RECEIVER, 0x1);
10820 THERM_FAILURE(dd, ret, "Therm Block Reset");
10823 /* Step 3: Write clock divider value (100MHz -> 2MHz) */
10824 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10825 WRITE_SBUS_RECEIVER, 0x32);
10827 THERM_FAILURE(dd, ret, "Write Clock Div");
10830 /* Step 4: Select temperature mode */
10831 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10832 WRITE_SBUS_RECEIVER,
10833 SBUS_THERM_MONITOR_MODE);
10835 THERM_FAILURE(dd, ret, "Write Mode Sel");
10838 /* Step 5: De-assert block reset and start conversion */
10839 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10840 WRITE_SBUS_RECEIVER, 0x2);
10842 THERM_FAILURE(dd, ret, "Write Reset Deassert");
10845 /* Step 5.1: Wait for first conversion (21.5ms per spec) */
10848 /* Enable polling of thermal readings */
10849 write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10851 release_hw_mutex(dd);
10855 static void handle_temp_err(struct hfi1_devdata *dd)
10857 struct hfi1_pportdata *ppd = &dd->pport[0];
10859 * Thermal Critical Interrupt
10860 * Put the device into forced freeze mode, take link down to
10861 * offline, and put DC into reset.
10864 "Critical temperature reached! Forcing device into freeze mode!\n");
10865 dd->flags |= HFI1_FORCED_FREEZE;
10866 start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10868 * Shut DC down as much and as quickly as possible.
10870 * Step 1: Take the link down to OFFLINE. This will cause the
10871 * 8051 to put the Serdes in reset. However, we don't want to
10872 * go through the entire link state machine since we want to
10873 * shutdown ASAP. Furthermore, this is not a graceful shutdown
10874 * but rather an attempt to save the chip.
10875 * Code below is almost the same as quiet_serdes() but avoids
10876 * all the extra work and the sleeps.
10878 ppd->driver_link_ready = 0;
10879 ppd->link_enabled = 0;
10880 set_physical_link_state(dd, PLS_OFFLINE |
10881 (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10883 * Step 2: Shutdown LCB and 8051
10884 * After shutdown, do not restore DC_CFG_RESET value.