GNU Linux-libre 6.7.9-gnu
[releases.git] / arch / s390 / pci / pci_event.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Copyright IBM Corp. 2012
4  *
5  *  Author(s):
6  *    Jan Glauber <jang@linux.vnet.ibm.com>
7  */
8
9 #define KMSG_COMPONENT "zpci"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11
12 #include <linux/kernel.h>
13 #include <linux/pci.h>
14 #include <asm/pci_debug.h>
15 #include <asm/pci_dma.h>
16 #include <asm/sclp.h>
17
18 #include "pci_bus.h"
19
20 /* Content Code Description for PCI Function Error */
21 struct zpci_ccdf_err {
22         u32 reserved1;
23         u32 fh;                         /* function handle */
24         u32 fid;                        /* function id */
25         u32 ett         :  4;           /* expected table type */
26         u32 mvn         : 12;           /* MSI vector number */
27         u32 dmaas       :  8;           /* DMA address space */
28         u32             :  6;
29         u32 q           :  1;           /* event qualifier */
30         u32 rw          :  1;           /* read/write */
31         u64 faddr;                      /* failing address */
32         u32 reserved3;
33         u16 reserved4;
34         u16 pec;                        /* PCI event code */
35 } __packed;
36
37 /* Content Code Description for PCI Function Availability */
38 struct zpci_ccdf_avail {
39         u32 reserved1;
40         u32 fh;                         /* function handle */
41         u32 fid;                        /* function id */
42         u32 reserved2;
43         u32 reserved3;
44         u32 reserved4;
45         u32 reserved5;
46         u16 reserved6;
47         u16 pec;                        /* PCI event code */
48 } __packed;
49
50 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
51 {
52         switch (ers_res) {
53         case PCI_ERS_RESULT_CAN_RECOVER:
54         case PCI_ERS_RESULT_RECOVERED:
55         case PCI_ERS_RESULT_NEED_RESET:
56                 return false;
57         default:
58                 return true;
59         }
60 }
61
62 static bool is_passed_through(struct pci_dev *pdev)
63 {
64         struct zpci_dev *zdev = to_zpci(pdev);
65         bool ret;
66
67         mutex_lock(&zdev->kzdev_lock);
68         ret = !!zdev->kzdev;
69         mutex_unlock(&zdev->kzdev_lock);
70
71         return ret;
72 }
73
74 static bool is_driver_supported(struct pci_driver *driver)
75 {
76         if (!driver || !driver->err_handler)
77                 return false;
78         if (!driver->err_handler->error_detected)
79                 return false;
80         if (!driver->err_handler->slot_reset)
81                 return false;
82         if (!driver->err_handler->resume)
83                 return false;
84         return true;
85 }
86
87 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
88                                                          struct pci_driver *driver)
89 {
90         pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
91
92         ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
93         if (ers_result_indicates_abort(ers_res))
94                 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
95         else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
96                 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
97
98         return ers_res;
99 }
100
101 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
102                                                         struct pci_driver *driver)
103 {
104         pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
105         struct zpci_dev *zdev = to_zpci(pdev);
106         int rc;
107
108         pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
109         rc = zpci_reset_load_store_blocked(zdev);
110         if (rc) {
111                 pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
112                 /* Let's try a full reset instead */
113                 return PCI_ERS_RESULT_NEED_RESET;
114         }
115
116         if (driver->err_handler->mmio_enabled) {
117                 ers_res = driver->err_handler->mmio_enabled(pdev);
118                 if (ers_result_indicates_abort(ers_res)) {
119                         pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
120                                 pci_name(pdev));
121                         return ers_res;
122                 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
123                         pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
124                         return ers_res;
125                 }
126         }
127
128         pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
129         rc = zpci_clear_error_state(zdev);
130         if (!rc) {
131                 pdev->error_state = pci_channel_io_normal;
132         } else {
133                 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
134                 /* Let's try a full reset instead */
135                 return PCI_ERS_RESULT_NEED_RESET;
136         }
137
138         return ers_res;
139 }
140
141 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
142                                             struct pci_driver *driver)
143 {
144         pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
145
146         pr_info("%s: Initiating reset\n", pci_name(pdev));
147         if (zpci_hot_reset_device(to_zpci(pdev))) {
148                 pr_err("%s: The reset request failed\n", pci_name(pdev));
149                 return ers_res;
150         }
151         pdev->error_state = pci_channel_io_normal;
152         ers_res = driver->err_handler->slot_reset(pdev);
153         if (ers_result_indicates_abort(ers_res)) {
154                 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
155                 return ers_res;
156         }
157
158         return ers_res;
159 }
160
161 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
162  * @pdev: PCI function to recover currently in the error state
163  *
164  * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
165  * With the simplification that recovery always happens per function
166  * and the platform determines which functions are affected for
167  * multi-function devices.
168  */
169 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
170 {
171         pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
172         struct pci_driver *driver;
173
174         /*
175          * Ensure that the PCI function is not removed concurrently, no driver
176          * is unbound or probed and that userspace can't access its
177          * configuration space while we perform recovery.
178          */
179         pci_dev_lock(pdev);
180         if (pdev->error_state == pci_channel_io_perm_failure) {
181                 ers_res = PCI_ERS_RESULT_DISCONNECT;
182                 goto out_unlock;
183         }
184         pdev->error_state = pci_channel_io_frozen;
185
186         if (is_passed_through(pdev)) {
187                 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
188                         pci_name(pdev));
189                 goto out_unlock;
190         }
191
192         driver = to_pci_driver(pdev->dev.driver);
193         if (!is_driver_supported(driver)) {
194                 if (!driver)
195                         pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
196                                 pci_name(pdev));
197                 else
198                         pr_info("%s: The %s driver bound to the device does not support error recovery\n",
199                                 pci_name(pdev),
200                                 driver->name);
201                 goto out_unlock;
202         }
203
204         ers_res = zpci_event_notify_error_detected(pdev, driver);
205         if (ers_result_indicates_abort(ers_res))
206                 goto out_unlock;
207
208         if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
209                 ers_res = zpci_event_do_error_state_clear(pdev, driver);
210                 if (ers_result_indicates_abort(ers_res))
211                         goto out_unlock;
212         }
213
214         if (ers_res == PCI_ERS_RESULT_NEED_RESET)
215                 ers_res = zpci_event_do_reset(pdev, driver);
216
217         if (ers_res != PCI_ERS_RESULT_RECOVERED) {
218                 pr_err("%s: Automatic recovery failed; operator intervention is required\n",
219                        pci_name(pdev));
220                 goto out_unlock;
221         }
222
223         pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
224         if (driver->err_handler->resume)
225                 driver->err_handler->resume(pdev);
226 out_unlock:
227         pci_dev_unlock(pdev);
228
229         return ers_res;
230 }
231
232 /* zpci_event_io_failure - Report PCI channel failure state to driver
233  * @pdev: PCI function for which to report
234  * @es: PCI channel failure state to report
235  */
236 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
237 {
238         struct pci_driver *driver;
239
240         pci_dev_lock(pdev);
241         pdev->error_state = es;
242         /**
243          * While vfio-pci's error_detected callback notifies user-space QEMU
244          * reacts to this by freezing the guest. In an s390 environment PCI
245          * errors are rarely fatal so this is overkill. Instead in the future
246          * we will inject the error event and let the guest recover the device
247          * itself.
248          */
249         if (is_passed_through(pdev))
250                 goto out;
251         driver = to_pci_driver(pdev->dev.driver);
252         if (driver && driver->err_handler && driver->err_handler->error_detected)
253                 driver->err_handler->error_detected(pdev, pdev->error_state);
254 out:
255         pci_dev_unlock(pdev);
256 }
257
258 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
259 {
260         struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
261         struct pci_dev *pdev = NULL;
262         pci_ers_result_t ers_res;
263
264         zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
265                  ccdf->fid, ccdf->fh, ccdf->pec);
266         zpci_err("error CCDF:\n");
267         zpci_err_hex(ccdf, sizeof(*ccdf));
268
269         if (zdev) {
270                 zpci_update_fh(zdev, ccdf->fh);
271                 if (zdev->zbus->bus)
272                         pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
273         }
274
275         pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
276                pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
277
278         if (!pdev)
279                 goto no_pdev;
280
281         switch (ccdf->pec) {
282         case 0x003a: /* Service Action or Error Recovery Successful */
283                 ers_res = zpci_event_attempt_error_recovery(pdev);
284                 if (ers_res != PCI_ERS_RESULT_RECOVERED)
285                         zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
286                 break;
287         default:
288                 /*
289                  * Mark as frozen not permanently failed because the device
290                  * could be subsequently recovered by the platform.
291                  */
292                 zpci_event_io_failure(pdev, pci_channel_io_frozen);
293                 break;
294         }
295         pci_dev_put(pdev);
296 no_pdev:
297         zpci_zdev_put(zdev);
298 }
299
300 void zpci_event_error(void *data)
301 {
302         if (zpci_is_enabled())
303                 __zpci_event_error(data);
304 }
305
306 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
307 {
308         zpci_update_fh(zdev, fh);
309         /* Give the driver a hint that the function is
310          * already unusable.
311          */
312         zpci_bus_remove_device(zdev, true);
313         /* Even though the device is already gone we still
314          * need to free zPCI resources as part of the disable.
315          */
316         if (zdev_enabled(zdev))
317                 zpci_disable_device(zdev);
318         zdev->state = ZPCI_FN_STATE_STANDBY;
319 }
320
321 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
322 {
323         struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
324         bool existing_zdev = !!zdev;
325         enum zpci_state state;
326
327         zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
328                  ccdf->fid, ccdf->fh, ccdf->pec);
329         switch (ccdf->pec) {
330         case 0x0301: /* Reserved|Standby -> Configured */
331                 if (!zdev) {
332                         zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
333                         if (IS_ERR(zdev))
334                                 break;
335                 } else {
336                         /* the configuration request may be stale */
337                         if (zdev->state != ZPCI_FN_STATE_STANDBY)
338                                 break;
339                         zdev->state = ZPCI_FN_STATE_CONFIGURED;
340                 }
341                 zpci_scan_configured_device(zdev, ccdf->fh);
342                 break;
343         case 0x0302: /* Reserved -> Standby */
344                 if (!zdev)
345                         zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
346                 else
347                         zpci_update_fh(zdev, ccdf->fh);
348                 break;
349         case 0x0303: /* Deconfiguration requested */
350                 if (zdev) {
351                         /* The event may have been queued before we confirgured
352                          * the device.
353                          */
354                         if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
355                                 break;
356                         zpci_update_fh(zdev, ccdf->fh);
357                         zpci_deconfigure_device(zdev);
358                 }
359                 break;
360         case 0x0304: /* Configured -> Standby|Reserved */
361                 if (zdev) {
362                         /* The event may have been queued before we confirgured
363                          * the device.:
364                          */
365                         if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
366                                 zpci_event_hard_deconfigured(zdev, ccdf->fh);
367                         /* The 0x0304 event may immediately reserve the device */
368                         if (!clp_get_state(zdev->fid, &state) &&
369                             state == ZPCI_FN_STATE_RESERVED) {
370                                 zpci_device_reserved(zdev);
371                         }
372                 }
373                 break;
374         case 0x0306: /* 0x308 or 0x302 for multiple devices */
375                 zpci_remove_reserved_devices();
376                 clp_scan_pci_devices();
377                 break;
378         case 0x0308: /* Standby -> Reserved */
379                 if (!zdev)
380                         break;
381                 zpci_device_reserved(zdev);
382                 break;
383         default:
384                 break;
385         }
386         if (existing_zdev)
387                 zpci_zdev_put(zdev);
388 }
389
390 void zpci_event_availability(void *data)
391 {
392         if (zpci_is_enabled())
393                 __zpci_event_availability(data);
394 }