GNU Linux-libre 5.19-rc6-gnu
[releases.git] / drivers / block / mtip32xx / mtip32xx.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Driver for the Micron P320 SSD
4  *   Copyright (C) 2011 Micron Technology, Inc.
5  *
6  * Portions of this code were derived from works subjected to the
7  * following copyright:
8  *    Copyright (C) 2009 Integrated Device Technology, Inc.
9  */
10
11 #include <linux/pci.h>
12 #include <linux/interrupt.h>
13 #include <linux/ata.h>
14 #include <linux/delay.h>
15 #include <linux/hdreg.h>
16 #include <linux/uaccess.h>
17 #include <linux/random.h>
18 #include <linux/smp.h>
19 #include <linux/compat.h>
20 #include <linux/fs.h>
21 #include <linux/module.h>
22 #include <linux/blkdev.h>
23 #include <linux/blk-mq.h>
24 #include <linux/bio.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/idr.h>
27 #include <linux/kthread.h>
28 #include <../drivers/ata/ahci.h>
29 #include <linux/export.h>
30 #include <linux/debugfs.h>
31 #include <linux/prefetch.h>
32 #include <linux/numa.h>
33 #include "mtip32xx.h"
34
35 #define HW_CMD_SLOT_SZ          (MTIP_MAX_COMMAND_SLOTS * 32)
36
37 /* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
38 #define AHCI_RX_FIS_SZ          0x100
39 #define AHCI_RX_FIS_OFFSET      0x0
40 #define AHCI_IDFY_SZ            ATA_SECT_SIZE
41 #define AHCI_IDFY_OFFSET        0x400
42 #define AHCI_SECTBUF_SZ         ATA_SECT_SIZE
43 #define AHCI_SECTBUF_OFFSET     0x800
44 #define AHCI_SMARTBUF_SZ        ATA_SECT_SIZE
45 #define AHCI_SMARTBUF_OFFSET    0xC00
46 /* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
47 #define BLOCK_DMA_ALLOC_SZ      4096
48
49 /* DMA region containing command table (should be 8192 bytes) */
50 #define AHCI_CMD_SLOT_SZ        sizeof(struct mtip_cmd_hdr)
51 #define AHCI_CMD_TBL_SZ         (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
52 #define AHCI_CMD_TBL_OFFSET     0x0
53
54 /* DMA region per command (contains header and SGL) */
55 #define AHCI_CMD_TBL_HDR_SZ     0x80
56 #define AHCI_CMD_TBL_HDR_OFFSET 0x0
57 #define AHCI_CMD_TBL_SGL_SZ     (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
58 #define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
59 #define CMD_DMA_ALLOC_SZ        (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
60
61
62 #define HOST_CAP_NZDMA          (1 << 19)
63 #define HOST_HSORG              0xFC
64 #define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
65 #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
66 #define HSORG_HWREV             0xFF00
67 #define HSORG_STYLE             0x8
68 #define HSORG_SLOTGROUPS        0x7
69
70 #define PORT_COMMAND_ISSUE      0x38
71 #define PORT_SDBV               0x7C
72
73 #define PORT_OFFSET             0x100
74 #define PORT_MEM_SIZE           0x80
75
76 #define PORT_IRQ_ERR \
77         (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
78          PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
79          PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
80          PORT_IRQ_OVERFLOW)
81 #define PORT_IRQ_LEGACY \
82         (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
83 #define PORT_IRQ_HANDLED \
84         (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
85          PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
86          PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
87 #define DEF_PORT_IRQ \
88         (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
89
90 /* product numbers */
91 #define MTIP_PRODUCT_UNKNOWN    0x00
92 #define MTIP_PRODUCT_ASICFPGA   0x11
93
94 /* Device instance number, incremented each time a device is probed. */
95 static int instance;
96
97 static LIST_HEAD(online_list);
98 static LIST_HEAD(removing_list);
99 static DEFINE_SPINLOCK(dev_lock);
100
101 /*
102  * Global variable used to hold the major block device number
103  * allocated in mtip_init().
104  */
105 static int mtip_major;
106 static struct dentry *dfs_parent;
107 static struct dentry *dfs_device_status;
108
109 static u32 cpu_use[NR_CPUS];
110
111 static DEFINE_IDA(rssd_index_ida);
112
113 static int mtip_block_initialize(struct driver_data *dd);
114
115 #ifdef CONFIG_COMPAT
116 struct mtip_compat_ide_task_request_s {
117         __u8            io_ports[8];
118         __u8            hob_ports[8];
119         ide_reg_valid_t out_flags;
120         ide_reg_valid_t in_flags;
121         int             data_phase;
122         int             req_cmd;
123         compat_ulong_t  out_size;
124         compat_ulong_t  in_size;
125 };
126 #endif
127
128 /*
129  * This function check_for_surprise_removal is called
130  * while card is removed from the system and it will
131  * read the vendor id from the configuration space
132  *
133  * @pdev Pointer to the pci_dev structure.
134  *
135  * return value
136  *       true if device removed, else false
137  */
138 static bool mtip_check_surprise_removal(struct driver_data *dd)
139 {
140         u16 vendor_id = 0;
141
142         if (dd->sr)
143                 return true;
144
145        /* Read the vendorID from the configuration space */
146         pci_read_config_word(dd->pdev, 0x00, &vendor_id);
147         if (vendor_id == 0xFFFF) {
148                 dd->sr = true;
149                 if (dd->queue)
150                         blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue);
151                 else
152                         dev_warn(&dd->pdev->dev,
153                                 "%s: dd->queue is NULL\n", __func__);
154                 return true; /* device removed */
155         }
156
157         return false; /* device present */
158 }
159
160 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
161                                           unsigned int tag)
162 {
163         return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(dd->tags.tags[0], tag));
164 }
165
166 /*
167  * Reset the HBA (without sleeping)
168  *
169  * @dd Pointer to the driver data structure.
170  *
171  * return value
172  *      0       The reset was successful.
173  *      -1      The HBA Reset bit did not clear.
174  */
175 static int mtip_hba_reset(struct driver_data *dd)
176 {
177         unsigned long timeout;
178
179         /* Set the reset bit */
180         writel(HOST_RESET, dd->mmio + HOST_CTL);
181
182         /* Flush */
183         readl(dd->mmio + HOST_CTL);
184
185         /*
186          * Spin for up to 10 seconds waiting for reset acknowledgement. Spec
187          * is 1 sec but in LUN failure conditions, up to 10 secs are required
188          */
189         timeout = jiffies + msecs_to_jiffies(10000);
190         do {
191                 mdelay(10);
192                 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
193                         return -1;
194
195         } while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
196                  && time_before(jiffies, timeout));
197
198         if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
199                 return -1;
200
201         return 0;
202 }
203
204 /*
205  * Issue a command to the hardware.
206  *
207  * Set the appropriate bit in the s_active and Command Issue hardware
208  * registers, causing hardware command processing to begin.
209  *
210  * @port Pointer to the port structure.
211  * @tag  The tag of the command to be issued.
212  *
213  * return value
214  *      None
215  */
216 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
217 {
218         int group = tag >> 5;
219
220         /* guard SACT and CI registers */
221         spin_lock(&port->cmd_issue_lock[group]);
222         writel((1 << MTIP_TAG_BIT(tag)),
223                         port->s_active[MTIP_TAG_INDEX(tag)]);
224         writel((1 << MTIP_TAG_BIT(tag)),
225                         port->cmd_issue[MTIP_TAG_INDEX(tag)]);
226         spin_unlock(&port->cmd_issue_lock[group]);
227 }
228
229 /*
230  * Enable/disable the reception of FIS
231  *
232  * @port   Pointer to the port data structure
233  * @enable 1 to enable, 0 to disable
234  *
235  * return value
236  *      Previous state: 1 enabled, 0 disabled
237  */
238 static int mtip_enable_fis(struct mtip_port *port, int enable)
239 {
240         u32 tmp;
241
242         /* enable FIS reception */
243         tmp = readl(port->mmio + PORT_CMD);
244         if (enable)
245                 writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
246         else
247                 writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
248
249         /* Flush */
250         readl(port->mmio + PORT_CMD);
251
252         return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
253 }
254
255 /*
256  * Enable/disable the DMA engine
257  *
258  * @port   Pointer to the port data structure
259  * @enable 1 to enable, 0 to disable
260  *
261  * return value
262  *      Previous state: 1 enabled, 0 disabled.
263  */
264 static int mtip_enable_engine(struct mtip_port *port, int enable)
265 {
266         u32 tmp;
267
268         /* enable FIS reception */
269         tmp = readl(port->mmio + PORT_CMD);
270         if (enable)
271                 writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
272         else
273                 writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
274
275         readl(port->mmio + PORT_CMD);
276         return (((tmp & PORT_CMD_START) == PORT_CMD_START));
277 }
278
279 /*
280  * Enables the port DMA engine and FIS reception.
281  *
282  * return value
283  *      None
284  */
285 static inline void mtip_start_port(struct mtip_port *port)
286 {
287         /* Enable FIS reception */
288         mtip_enable_fis(port, 1);
289
290         /* Enable the DMA engine */
291         mtip_enable_engine(port, 1);
292 }
293
294 /*
295  * Deinitialize a port by disabling port interrupts, the DMA engine,
296  * and FIS reception.
297  *
298  * @port Pointer to the port structure
299  *
300  * return value
301  *      None
302  */
303 static inline void mtip_deinit_port(struct mtip_port *port)
304 {
305         /* Disable interrupts on this port */
306         writel(0, port->mmio + PORT_IRQ_MASK);
307
308         /* Disable the DMA engine */
309         mtip_enable_engine(port, 0);
310
311         /* Disable FIS reception */
312         mtip_enable_fis(port, 0);
313 }
314
315 /*
316  * Initialize a port.
317  *
318  * This function deinitializes the port by calling mtip_deinit_port() and
319  * then initializes it by setting the command header and RX FIS addresses,
320  * clearing the SError register and any pending port interrupts before
321  * re-enabling the default set of port interrupts.
322  *
323  * @port Pointer to the port structure.
324  *
325  * return value
326  *      None
327  */
328 static void mtip_init_port(struct mtip_port *port)
329 {
330         int i;
331         mtip_deinit_port(port);
332
333         /* Program the command list base and FIS base addresses */
334         if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
335                 writel((port->command_list_dma >> 16) >> 16,
336                          port->mmio + PORT_LST_ADDR_HI);
337                 writel((port->rxfis_dma >> 16) >> 16,
338                          port->mmio + PORT_FIS_ADDR_HI);
339                 set_bit(MTIP_PF_HOST_CAP_64, &port->flags);
340         }
341
342         writel(port->command_list_dma & 0xFFFFFFFF,
343                         port->mmio + PORT_LST_ADDR);
344         writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
345
346         /* Clear SError */
347         writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
348
349         /* reset the completed registers.*/
350         for (i = 0; i < port->dd->slot_groups; i++)
351                 writel(0xFFFFFFFF, port->completed[i]);
352
353         /* Clear any pending interrupts for this port */
354         writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
355
356         /* Clear any pending interrupts on the HBA. */
357         writel(readl(port->dd->mmio + HOST_IRQ_STAT),
358                                         port->dd->mmio + HOST_IRQ_STAT);
359
360         /* Enable port interrupts */
361         writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
362 }
363
364 /*
365  * Restart a port
366  *
367  * @port Pointer to the port data structure.
368  *
369  * return value
370  *      None
371  */
372 static void mtip_restart_port(struct mtip_port *port)
373 {
374         unsigned long timeout;
375
376         /* Disable the DMA engine */
377         mtip_enable_engine(port, 0);
378
379         /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
380         timeout = jiffies + msecs_to_jiffies(500);
381         while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
382                  && time_before(jiffies, timeout))
383                 ;
384
385         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
386                 return;
387
388         /*
389          * Chip quirk: escalate to hba reset if
390          * PxCMD.CR not clear after 500 ms
391          */
392         if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
393                 dev_warn(&port->dd->pdev->dev,
394                         "PxCMD.CR not clear, escalating reset\n");
395
396                 if (mtip_hba_reset(port->dd))
397                         dev_err(&port->dd->pdev->dev,
398                                 "HBA reset escalation failed.\n");
399
400                 /* 30 ms delay before com reset to quiesce chip */
401                 mdelay(30);
402         }
403
404         dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
405
406         /* Set PxSCTL.DET */
407         writel(readl(port->mmio + PORT_SCR_CTL) |
408                          1, port->mmio + PORT_SCR_CTL);
409         readl(port->mmio + PORT_SCR_CTL);
410
411         /* Wait 1 ms to quiesce chip function */
412         timeout = jiffies + msecs_to_jiffies(1);
413         while (time_before(jiffies, timeout))
414                 ;
415
416         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
417                 return;
418
419         /* Clear PxSCTL.DET */
420         writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
421                          port->mmio + PORT_SCR_CTL);
422         readl(port->mmio + PORT_SCR_CTL);
423
424         /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
425         timeout = jiffies + msecs_to_jiffies(500);
426         while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
427                          && time_before(jiffies, timeout))
428                 ;
429
430         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
431                 return;
432
433         if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
434                 dev_warn(&port->dd->pdev->dev,
435                         "COM reset failed\n");
436
437         mtip_init_port(port);
438         mtip_start_port(port);
439
440 }
441
442 static int mtip_device_reset(struct driver_data *dd)
443 {
444         int rv = 0;
445
446         if (mtip_check_surprise_removal(dd))
447                 return 0;
448
449         if (mtip_hba_reset(dd) < 0)
450                 rv = -EFAULT;
451
452         mdelay(1);
453         mtip_init_port(dd->port);
454         mtip_start_port(dd->port);
455
456         /* Enable interrupts on the HBA. */
457         writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
458                                         dd->mmio + HOST_CTL);
459         return rv;
460 }
461
462 /*
463  * Helper function for tag logging
464  */
465 static void print_tags(struct driver_data *dd,
466                         char *msg,
467                         unsigned long *tagbits,
468                         int cnt)
469 {
470         unsigned char tagmap[128];
471         int group, tagmap_len = 0;
472
473         memset(tagmap, 0, sizeof(tagmap));
474         for (group = SLOTBITS_IN_LONGS; group > 0; group--)
475                 tagmap_len += sprintf(tagmap + tagmap_len, "%016lX ",
476                                                 tagbits[group-1]);
477         dev_warn(&dd->pdev->dev,
478                         "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
479 }
480
481 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
482                                 dma_addr_t buffer_dma, unsigned int sectors);
483 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
484                                                 struct smart_attr *attrib);
485
486 static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
487 {
488         struct request *req = blk_mq_rq_from_pdu(cmd);
489
490         cmd->status = status;
491         if (likely(!blk_should_fake_timeout(req->q)))
492                 blk_mq_complete_request(req);
493 }
494
495 /*
496  * Handle an error.
497  *
498  * @dd Pointer to the DRIVER_DATA structure.
499  *
500  * return value
501  *      None
502  */
503 static void mtip_handle_tfe(struct driver_data *dd)
504 {
505         int group, tag, bit, reissue, rv;
506         struct mtip_port *port;
507         struct mtip_cmd  *cmd;
508         u32 completed;
509         struct host_to_dev_fis *fis;
510         unsigned long tagaccum[SLOTBITS_IN_LONGS];
511         unsigned int cmd_cnt = 0;
512         unsigned char *buf;
513         char *fail_reason = NULL;
514         int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
515
516         dev_warn(&dd->pdev->dev, "Taskfile error\n");
517
518         port = dd->port;
519
520         if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
521                 cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
522                 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
523                 mtip_complete_command(cmd, BLK_STS_IOERR);
524                 return;
525         }
526
527         /* clear the tag accumulator */
528         memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
529
530         /* Loop through all the groups */
531         for (group = 0; group < dd->slot_groups; group++) {
532                 completed = readl(port->completed[group]);
533
534                 dev_warn(&dd->pdev->dev, "g=%u, comp=%x\n", group, completed);
535
536                 /* clear completed status register in the hardware.*/
537                 writel(completed, port->completed[group]);
538
539                 /* Process successfully completed commands */
540                 for (bit = 0; bit < 32 && completed; bit++) {
541                         if (!(completed & (1<<bit)))
542                                 continue;
543                         tag = (group << 5) + bit;
544
545                         /* Skip the internal command slot */
546                         if (tag == MTIP_TAG_INTERNAL)
547                                 continue;
548
549                         cmd = mtip_cmd_from_tag(dd, tag);
550                         mtip_complete_command(cmd, 0);
551                         set_bit(tag, tagaccum);
552                         cmd_cnt++;
553                 }
554         }
555
556         print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
557
558         /* Restart the port */
559         mdelay(20);
560         mtip_restart_port(port);
561
562         /* Trying to determine the cause of the error */
563         rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
564                                 dd->port->log_buf,
565                                 dd->port->log_buf_dma, 1);
566         if (rv) {
567                 dev_warn(&dd->pdev->dev,
568                         "Error in READ LOG EXT (10h) command\n");
569                 /* non-critical error, don't fail the load */
570         } else {
571                 buf = (unsigned char *)dd->port->log_buf;
572                 if (buf[259] & 0x1) {
573                         dev_info(&dd->pdev->dev,
574                                 "Write protect bit is set.\n");
575                         set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
576                         fail_all_ncq_write = 1;
577                         fail_reason = "write protect";
578                 }
579                 if (buf[288] == 0xF7) {
580                         dev_info(&dd->pdev->dev,
581                                 "Exceeded Tmax, drive in thermal shutdown.\n");
582                         set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
583                         fail_all_ncq_cmds = 1;
584                         fail_reason = "thermal shutdown";
585                 }
586                 if (buf[288] == 0xBF) {
587                         set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag);
588                         dev_info(&dd->pdev->dev,
589                                 "Drive indicates rebuild has failed. Secure erase required.\n");
590                         fail_all_ncq_cmds = 1;
591                         fail_reason = "rebuild failed";
592                 }
593         }
594
595         /* clear the tag accumulator */
596         memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
597
598         /* Loop through all the groups */
599         for (group = 0; group < dd->slot_groups; group++) {
600                 for (bit = 0; bit < 32; bit++) {
601                         reissue = 1;
602                         tag = (group << 5) + bit;
603                         cmd = mtip_cmd_from_tag(dd, tag);
604
605                         fis = (struct host_to_dev_fis *)cmd->command;
606
607                         /* Should re-issue? */
608                         if (tag == MTIP_TAG_INTERNAL ||
609                             fis->command == ATA_CMD_SET_FEATURES)
610                                 reissue = 0;
611                         else {
612                                 if (fail_all_ncq_cmds ||
613                                         (fail_all_ncq_write &&
614                                         fis->command == ATA_CMD_FPDMA_WRITE)) {
615                                         dev_warn(&dd->pdev->dev,
616                                         "  Fail: %s w/tag %d [%s].\n",
617                                         fis->command == ATA_CMD_FPDMA_WRITE ?
618                                                 "write" : "read",
619                                         tag,
620                                         fail_reason != NULL ?
621                                                 fail_reason : "unknown");
622                                         mtip_complete_command(cmd, BLK_STS_MEDIUM);
623                                         continue;
624                                 }
625                         }
626
627                         /*
628                          * First check if this command has
629                          *  exceeded its retries.
630                          */
631                         if (reissue && (cmd->retries-- > 0)) {
632
633                                 set_bit(tag, tagaccum);
634
635                                 /* Re-issue the command. */
636                                 mtip_issue_ncq_command(port, tag);
637
638                                 continue;
639                         }
640
641                         /* Retire a command that will not be reissued */
642                         dev_warn(&port->dd->pdev->dev,
643                                 "retiring tag %d\n", tag);
644
645                         mtip_complete_command(cmd, BLK_STS_IOERR);
646                 }
647         }
648         print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
649 }
650
651 /*
652  * Handle a set device bits interrupt
653  */
654 static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
655                                                         u32 completed)
656 {
657         struct driver_data *dd = port->dd;
658         int tag, bit;
659         struct mtip_cmd *command;
660
661         if (!completed) {
662                 WARN_ON_ONCE(!completed);
663                 return;
664         }
665         /* clear completed status register in the hardware.*/
666         writel(completed, port->completed[group]);
667
668         /* Process completed commands. */
669         for (bit = 0; (bit < 32) && completed; bit++) {
670                 if (completed & 0x01) {
671                         tag = (group << 5) | bit;
672
673                         /* skip internal command slot. */
674                         if (unlikely(tag == MTIP_TAG_INTERNAL))
675                                 continue;
676
677                         command = mtip_cmd_from_tag(dd, tag);
678                         mtip_complete_command(command, 0);
679                 }
680                 completed >>= 1;
681         }
682
683         /* If last, re-enable interrupts */
684         if (atomic_dec_return(&dd->irq_workers_active) == 0)
685                 writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
686 }
687
688 /*
689  * Process legacy pio and d2h interrupts
690  */
691 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
692 {
693         struct mtip_port *port = dd->port;
694         struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
695
696         if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && cmd) {
697                 int group = MTIP_TAG_INDEX(MTIP_TAG_INTERNAL);
698                 int status = readl(port->cmd_issue[group]);
699
700                 if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL))))
701                         mtip_complete_command(cmd, 0);
702         }
703 }
704
705 /*
706  * Demux and handle errors
707  */
708 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
709 {
710         if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
711                 dev_warn(&dd->pdev->dev,
712                         "Clearing PxSERR.DIAG.x\n");
713                 writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
714         }
715
716         if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
717                 dev_warn(&dd->pdev->dev,
718                         "Clearing PxSERR.DIAG.n\n");
719                 writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
720         }
721
722         if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
723                 dev_warn(&dd->pdev->dev,
724                         "Port stat errors %x unhandled\n",
725                         (port_stat & ~PORT_IRQ_HANDLED));
726                 if (mtip_check_surprise_removal(dd))
727                         return;
728         }
729         if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) {
730                 set_bit(MTIP_PF_EH_ACTIVE_BIT, &dd->port->flags);
731                 wake_up_interruptible(&dd->port->svc_wait);
732         }
733 }
734
735 static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
736 {
737         struct driver_data *dd = (struct driver_data *) data;
738         struct mtip_port *port = dd->port;
739         u32 hba_stat, port_stat;
740         int rv = IRQ_NONE;
741         int do_irq_enable = 1, i, workers;
742         struct mtip_work *twork;
743
744         hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
745         if (hba_stat) {
746                 rv = IRQ_HANDLED;
747
748                 /* Acknowledge the interrupt status on the port.*/
749                 port_stat = readl(port->mmio + PORT_IRQ_STAT);
750                 if (unlikely(port_stat == 0xFFFFFFFF)) {
751                         mtip_check_surprise_removal(dd);
752                         return IRQ_HANDLED;
753                 }
754                 writel(port_stat, port->mmio + PORT_IRQ_STAT);
755
756                 /* Demux port status */
757                 if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
758                         do_irq_enable = 0;
759                         WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
760
761                         /* Start at 1: group zero is always local? */
762                         for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
763                                                                         i++) {
764                                 twork = &dd->work[i];
765                                 twork->completed = readl(port->completed[i]);
766                                 if (twork->completed)
767                                         workers++;
768                         }
769
770                         atomic_set(&dd->irq_workers_active, workers);
771                         if (workers) {
772                                 for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
773                                         twork = &dd->work[i];
774                                         if (twork->completed)
775                                                 queue_work_on(
776                                                         twork->cpu_binding,
777                                                         dd->isr_workq,
778                                                         &twork->work);
779                                 }
780
781                                 if (likely(dd->work[0].completed))
782                                         mtip_workq_sdbfx(port, 0,
783                                                         dd->work[0].completed);
784
785                         } else {
786                                 /*
787                                  * Chip quirk: SDB interrupt but nothing
788                                  * to complete
789                                  */
790                                 do_irq_enable = 1;
791                         }
792                 }
793
794                 if (unlikely(port_stat & PORT_IRQ_ERR)) {
795                         if (unlikely(mtip_check_surprise_removal(dd))) {
796                                 /* don't proceed further */
797                                 return IRQ_HANDLED;
798                         }
799                         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
800                                                         &dd->dd_flag))
801                                 return rv;
802
803                         mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
804                 }
805
806                 if (unlikely(port_stat & PORT_IRQ_LEGACY))
807                         mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
808         }
809
810         /* acknowledge interrupt */
811         if (unlikely(do_irq_enable))
812                 writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
813
814         return rv;
815 }
816
817 /*
818  * HBA interrupt subroutine.
819  *
820  * @irq         IRQ number.
821  * @instance    Pointer to the driver data structure.
822  *
823  * return value
824  *      IRQ_HANDLED     A HBA interrupt was pending and handled.
825  *      IRQ_NONE        This interrupt was not for the HBA.
826  */
827 static irqreturn_t mtip_irq_handler(int irq, void *instance)
828 {
829         struct driver_data *dd = instance;
830
831         return mtip_handle_irq(dd);
832 }
833
834 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
835 {
836         writel(1 << MTIP_TAG_BIT(tag), port->cmd_issue[MTIP_TAG_INDEX(tag)]);
837 }
838
839 static bool mtip_pause_ncq(struct mtip_port *port,
840                                 struct host_to_dev_fis *fis)
841 {
842         unsigned long task_file_data;
843
844         task_file_data = readl(port->mmio+PORT_TFDATA);
845         if ((task_file_data & 1))
846                 return false;
847
848         if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
849                 port->ic_pause_timer = jiffies;
850                 return true;
851         } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
852                                         (fis->features == 0x03)) {
853                 set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
854                 port->ic_pause_timer = jiffies;
855                 return true;
856         } else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) ||
857                 ((fis->command == 0xFC) &&
858                         (fis->features == 0x27 || fis->features == 0x72 ||
859                          fis->features == 0x62 || fis->features == 0x26))) {
860                 clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
861                 clear_bit(MTIP_DDF_REBUILD_FAILED_BIT, &port->dd->dd_flag);
862                 /* Com reset after secure erase or lowlevel format */
863                 mtip_restart_port(port);
864                 clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
865                 return false;
866         }
867
868         return false;
869 }
870
871 static bool mtip_commands_active(struct mtip_port *port)
872 {
873         unsigned int active;
874         unsigned int n;
875
876         /*
877          * Ignore s_active bit 0 of array element 0.
878          * This bit will always be set
879          */
880         active = readl(port->s_active[0]) & 0xFFFFFFFE;
881         for (n = 1; n < port->dd->slot_groups; n++)
882                 active |= readl(port->s_active[n]);
883
884         return active != 0;
885 }
886
887 /*
888  * Wait for port to quiesce
889  *
890  * @port    Pointer to port data structure
891  * @timeout Max duration to wait (ms)
892  *
893  * return value
894  *      0       Success
895  *      -EBUSY  Commands still active
896  */
897 static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
898 {
899         unsigned long to;
900         bool active = true;
901
902         blk_mq_quiesce_queue(port->dd->queue);
903
904         to = jiffies + msecs_to_jiffies(timeout);
905         do {
906                 if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
907                         test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
908                         msleep(20);
909                         continue; /* svc thd is actively issuing commands */
910                 }
911
912                 msleep(100);
913
914                 if (mtip_check_surprise_removal(port->dd))
915                         goto err_fault;
916
917                 active = mtip_commands_active(port);
918                 if (!active)
919                         break;
920         } while (time_before(jiffies, to));
921
922         blk_mq_unquiesce_queue(port->dd->queue);
923         return active ? -EBUSY : 0;
924 err_fault:
925         blk_mq_unquiesce_queue(port->dd->queue);
926         return -EFAULT;
927 }
928
929 struct mtip_int_cmd {
930         int fis_len;
931         dma_addr_t buffer;
932         int buf_len;
933         u32 opts;
934 };
935
936 /*
937  * Execute an internal command and wait for the completion.
938  *
939  * @port    Pointer to the port data structure.
940  * @fis     Pointer to the FIS that describes the command.
941  * @fis_len  Length in WORDS of the FIS.
942  * @buffer  DMA accessible for command data.
943  * @buf_len  Length, in bytes, of the data buffer.
944  * @opts    Command header options, excluding the FIS length
945  *             and the number of PRD entries.
946  * @timeout Time in ms to wait for the command to complete.
947  *
948  * return value
949  *      0        Command completed successfully.
950  *      -EFAULT  The buffer address is not correctly aligned.
951  *      -EBUSY   Internal command or other IO in progress.
952  *      -EAGAIN  Time out waiting for command to complete.
953  */
954 static int mtip_exec_internal_command(struct mtip_port *port,
955                                         struct host_to_dev_fis *fis,
956                                         int fis_len,
957                                         dma_addr_t buffer,
958                                         int buf_len,
959                                         u32 opts,
960                                         unsigned long timeout)
961 {
962         struct mtip_cmd *int_cmd;
963         struct driver_data *dd = port->dd;
964         struct request *rq;
965         struct mtip_int_cmd icmd = {
966                 .fis_len = fis_len,
967                 .buffer = buffer,
968                 .buf_len = buf_len,
969                 .opts = opts
970         };
971         int rv = 0;
972
973         /* Make sure the buffer is 8 byte aligned. This is asic specific. */
974         if (buffer & 0x00000007) {
975                 dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n");
976                 return -EFAULT;
977         }
978
979         if (mtip_check_surprise_removal(dd))
980                 return -EFAULT;
981
982         rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
983         if (IS_ERR(rq)) {
984                 dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n");
985                 return -EFAULT;
986         }
987
988         set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
989
990         if (fis->command == ATA_CMD_SEC_ERASE_PREP)
991                 set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
992
993         clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
994
995         if (fis->command != ATA_CMD_STANDBYNOW1) {
996                 /* wait for io to complete if non atomic */
997                 if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) {
998                         dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n");
999                         blk_mq_free_request(rq);
1000                         clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1001                         wake_up_interruptible(&port->svc_wait);
1002                         return -EBUSY;
1003                 }
1004         }
1005
1006         /* Copy the command to the command table */
1007         int_cmd = blk_mq_rq_to_pdu(rq);
1008         int_cmd->icmd = &icmd;
1009         memcpy(int_cmd->command, fis, fis_len*4);
1010
1011         rq->timeout = timeout;
1012
1013         /* insert request and run queue */
1014         blk_execute_rq(rq, true);
1015
1016         if (int_cmd->status) {
1017                 dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
1018                                 fis->command, int_cmd->status);
1019                 rv = -EIO;
1020
1021                 if (mtip_check_surprise_removal(dd) ||
1022                         test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1023                                         &dd->dd_flag)) {
1024                         dev_err(&dd->pdev->dev,
1025                                 "Internal command [%02X] wait returned due to SR\n",
1026                                 fis->command);
1027                         rv = -ENXIO;
1028                         goto exec_ic_exit;
1029                 }
1030                 mtip_device_reset(dd); /* recover from timeout issue */
1031                 rv = -EAGAIN;
1032                 goto exec_ic_exit;
1033         }
1034
1035         if (readl(port->cmd_issue[MTIP_TAG_INDEX(MTIP_TAG_INTERNAL)])
1036                         & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL))) {
1037                 rv = -ENXIO;
1038                 if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
1039                         mtip_device_reset(dd);
1040                         rv = -EAGAIN;
1041                 }
1042         }
1043 exec_ic_exit:
1044         /* Clear the allocated and active bits for the internal command. */
1045         blk_mq_free_request(rq);
1046         clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1047         if (rv >= 0 && mtip_pause_ncq(port, fis)) {
1048                 /* NCQ paused */
1049                 return rv;
1050         }
1051         wake_up_interruptible(&port->svc_wait);
1052
1053         return rv;
1054 }
1055
1056 /*
1057  * Byte-swap ATA ID strings.
1058  *
1059  * ATA identify data contains strings in byte-swapped 16-bit words.
1060  * They must be swapped (on all architectures) to be usable as C strings.
1061  * This function swaps bytes in-place.
1062  *
1063  * @buf The buffer location of the string
1064  * @len The number of bytes to swap
1065  *
1066  * return value
1067  *      None
1068  */
1069 static inline void ata_swap_string(u16 *buf, unsigned int len)
1070 {
1071         int i;
1072         for (i = 0; i < (len/2); i++)
1073                 be16_to_cpus(&buf[i]);
1074 }
1075
1076 static void mtip_set_timeout(struct driver_data *dd,
1077                                         struct host_to_dev_fis *fis,
1078                                         unsigned int *timeout, u8 erasemode)
1079 {
1080         switch (fis->command) {
1081         case ATA_CMD_DOWNLOAD_MICRO:
1082                 *timeout = 120000; /* 2 minutes */
1083                 break;
1084         case ATA_CMD_SEC_ERASE_UNIT:
1085         case 0xFC:
1086                 if (erasemode)
1087                         *timeout = ((*(dd->port->identify + 90) * 2) * 60000);
1088                 else
1089                         *timeout = ((*(dd->port->identify + 89) * 2) * 60000);
1090                 break;
1091         case ATA_CMD_STANDBYNOW1:
1092                 *timeout = 120000;  /* 2 minutes */
1093                 break;
1094         case 0xF7:
1095         case 0xFA:
1096                 *timeout = 60000;  /* 60 seconds */
1097                 break;
1098         case ATA_CMD_SMART:
1099                 *timeout = 15000;  /* 15 seconds */
1100                 break;
1101         default:
1102                 *timeout = MTIP_IOCTL_CMD_TIMEOUT_MS;
1103                 break;
1104         }
1105 }
1106
1107 /*
1108  * Request the device identity information.
1109  *
1110  * If a user space buffer is not specified, i.e. is NULL, the
1111  * identify information is still read from the drive and placed
1112  * into the identify data buffer (@e port->identify) in the
1113  * port data structure.
1114  * When the identify buffer contains valid identify information @e
1115  * port->identify_valid is non-zero.
1116  *
1117  * @port         Pointer to the port structure.
1118  * @user_buffer  A user space buffer where the identify data should be
1119  *                    copied.
1120  *
1121  * return value
1122  *      0       Command completed successfully.
1123  *      -EFAULT An error occurred while coping data to the user buffer.
1124  *      -1      Command failed.
1125  */
1126 static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1127 {
1128         int rv = 0;
1129         struct host_to_dev_fis fis;
1130
1131         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1132                 return -EFAULT;
1133
1134         /* Build the FIS. */
1135         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1136         fis.type        = 0x27;
1137         fis.opts        = 1 << 7;
1138         fis.command     = ATA_CMD_ID_ATA;
1139
1140         /* Set the identify information as invalid. */
1141         port->identify_valid = 0;
1142
1143         /* Clear the identify information. */
1144         memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
1145
1146         /* Execute the command. */
1147         if (mtip_exec_internal_command(port,
1148                                 &fis,
1149                                 5,
1150                                 port->identify_dma,
1151                                 sizeof(u16) * ATA_ID_WORDS,
1152                                 0,
1153                                 MTIP_INT_CMD_TIMEOUT_MS)
1154                                 < 0) {
1155                 rv = -1;
1156                 goto out;
1157         }
1158
1159         /*
1160          * Perform any necessary byte-swapping.  Yes, the kernel does in fact
1161          * perform field-sensitive swapping on the string fields.
1162          * See the kernel use of ata_id_string() for proof of this.
1163          */
1164 #ifdef __LITTLE_ENDIAN
1165         ata_swap_string(port->identify + 27, 40);  /* model string*/
1166         ata_swap_string(port->identify + 23, 8);   /* firmware string*/
1167         ata_swap_string(port->identify + 10, 20);  /* serial# string*/
1168 #else
1169         {
1170                 int i;
1171                 for (i = 0; i < ATA_ID_WORDS; i++)
1172                         port->identify[i] = le16_to_cpu(port->identify[i]);
1173         }
1174 #endif
1175
1176         /* Check security locked state */
1177         if (port->identify[128] & 0x4)
1178                 set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1179         else
1180                 clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1181
1182         /* Set the identify buffer as valid. */
1183         port->identify_valid = 1;
1184
1185         if (user_buffer) {
1186                 if (copy_to_user(
1187                         user_buffer,
1188                         port->identify,
1189                         ATA_ID_WORDS * sizeof(u16))) {
1190                         rv = -EFAULT;
1191                         goto out;
1192                 }
1193         }
1194
1195 out:
1196         return rv;
1197 }
1198
1199 /*
1200  * Issue a standby immediate command to the device.
1201  *
1202  * @port Pointer to the port structure.
1203  *
1204  * return value
1205  *      0       Command was executed successfully.
1206  *      -1      An error occurred while executing the command.
1207  */
1208 static int mtip_standby_immediate(struct mtip_port *port)
1209 {
1210         int rv;
1211         struct host_to_dev_fis  fis;
1212         unsigned long __maybe_unused start;
1213         unsigned int timeout;
1214
1215         /* Build the FIS. */
1216         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1217         fis.type        = 0x27;
1218         fis.opts        = 1 << 7;
1219         fis.command     = ATA_CMD_STANDBYNOW1;
1220
1221         mtip_set_timeout(port->dd, &fis, &timeout, 0);
1222
1223         start = jiffies;
1224         rv = mtip_exec_internal_command(port,
1225                                         &fis,
1226                                         5,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                         timeout);
1231         dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
1232                         jiffies_to_msecs(jiffies - start));
1233         if (rv)
1234                 dev_warn(&port->dd->pdev->dev,
1235                         "STANDBY IMMEDIATE command failed.\n");
1236
1237         return rv;
1238 }
1239
1240 /*
1241  * Issue a READ LOG EXT command to the device.
1242  *
1243  * @port        pointer to the port structure.
1244  * @page        page number to fetch
1245  * @buffer      pointer to buffer
1246  * @buffer_dma  dma address corresponding to @buffer
1247  * @sectors     page length to fetch, in sectors
1248  *
1249  * return value
1250  *      @rv     return value from mtip_exec_internal_command()
1251  */
1252 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
1253                                 dma_addr_t buffer_dma, unsigned int sectors)
1254 {
1255         struct host_to_dev_fis fis;
1256
1257         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1258         fis.type        = 0x27;
1259         fis.opts        = 1 << 7;
1260         fis.command     = ATA_CMD_READ_LOG_EXT;
1261         fis.sect_count  = sectors & 0xFF;
1262         fis.sect_cnt_ex = (sectors >> 8) & 0xFF;
1263         fis.lba_low     = page;
1264         fis.lba_mid     = 0;
1265         fis.device      = ATA_DEVICE_OBS;
1266
1267         memset(buffer, 0, sectors * ATA_SECT_SIZE);
1268
1269         return mtip_exec_internal_command(port,
1270                                         &fis,
1271                                         5,
1272                                         buffer_dma,
1273                                         sectors * ATA_SECT_SIZE,
1274                                         0,
1275                                         MTIP_INT_CMD_TIMEOUT_MS);
1276 }
1277
1278 /*
1279  * Issue a SMART READ DATA command to the device.
1280  *
1281  * @port        pointer to the port structure.
1282  * @buffer      pointer to buffer
1283  * @buffer_dma  dma address corresponding to @buffer
1284  *
1285  * return value
1286  *      @rv     return value from mtip_exec_internal_command()
1287  */
1288 static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
1289                                         dma_addr_t buffer_dma)
1290 {
1291         struct host_to_dev_fis fis;
1292
1293         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1294         fis.type        = 0x27;
1295         fis.opts        = 1 << 7;
1296         fis.command     = ATA_CMD_SMART;
1297         fis.features    = 0xD0;
1298         fis.sect_count  = 1;
1299         fis.lba_mid     = 0x4F;
1300         fis.lba_hi      = 0xC2;
1301         fis.device      = ATA_DEVICE_OBS;
1302
1303         return mtip_exec_internal_command(port,
1304                                         &fis,
1305                                         5,
1306                                         buffer_dma,
1307                                         ATA_SECT_SIZE,
1308                                         0,
1309                                         15000);
1310 }
1311
1312 /*
1313  * Get the value of a smart attribute
1314  *
1315  * @port        pointer to the port structure
1316  * @id          attribute number
1317  * @attrib      pointer to return attrib information corresponding to @id
1318  *
1319  * return value
1320  *      -EINVAL NULL buffer passed or unsupported attribute @id.
1321  *      -EPERM  Identify data not valid, SMART not supported or not enabled
1322  */
1323 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
1324                                                 struct smart_attr *attrib)
1325 {
1326         int rv, i;
1327         struct smart_attr *pattr;
1328
1329         if (!attrib)
1330                 return -EINVAL;
1331
1332         if (!port->identify_valid) {
1333                 dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n");
1334                 return -EPERM;
1335         }
1336         if (!(port->identify[82] & 0x1)) {
1337                 dev_warn(&port->dd->pdev->dev, "SMART not supported\n");
1338                 return -EPERM;
1339         }
1340         if (!(port->identify[85] & 0x1)) {
1341                 dev_warn(&port->dd->pdev->dev, "SMART not enabled\n");
1342                 return -EPERM;
1343         }
1344
1345         memset(port->smart_buf, 0, ATA_SECT_SIZE);
1346         rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma);
1347         if (rv) {
1348                 dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n");
1349                 return rv;
1350         }
1351
1352         pattr = (struct smart_attr *)(port->smart_buf + 2);
1353         for (i = 0; i < 29; i++, pattr++)
1354                 if (pattr->attr_id == id) {
1355                         memcpy(attrib, pattr, sizeof(struct smart_attr));
1356                         break;
1357                 }
1358
1359         if (i == 29) {
1360                 dev_warn(&port->dd->pdev->dev,
1361                         "Query for invalid SMART attribute ID\n");
1362                 rv = -EINVAL;
1363         }
1364
1365         return rv;
1366 }
1367
1368 /*
1369  * Get the drive capacity.
1370  *
1371  * @dd      Pointer to the device data structure.
1372  * @sectors Pointer to the variable that will receive the sector count.
1373  *
1374  * return value
1375  *      1 Capacity was returned successfully.
1376  *      0 The identify information is invalid.
1377  */
1378 static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
1379 {
1380         struct mtip_port *port = dd->port;
1381         u64 total, raw0, raw1, raw2, raw3;
1382         raw0 = port->identify[100];
1383         raw1 = port->identify[101];
1384         raw2 = port->identify[102];
1385         raw3 = port->identify[103];
1386         total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
1387         *sectors = total;
1388         return (bool) !!port->identify_valid;
1389 }
1390
1391 /*
1392  * Display the identify command data.
1393  *
1394  * @port Pointer to the port data structure.
1395  *
1396  * return value
1397  *      None
1398  */
1399 static void mtip_dump_identify(struct mtip_port *port)
1400 {
1401         sector_t sectors;
1402         unsigned short revid;
1403         char cbuf[42];
1404
1405         if (!port->identify_valid)
1406                 return;
1407
1408         strlcpy(cbuf, (char *)(port->identify+10), 21);
1409         dev_info(&port->dd->pdev->dev,
1410                 "Serial No.: %s\n", cbuf);
1411
1412         strlcpy(cbuf, (char *)(port->identify+23), 9);
1413         dev_info(&port->dd->pdev->dev,
1414                 "Firmware Ver.: %s\n", cbuf);
1415
1416         strlcpy(cbuf, (char *)(port->identify+27), 41);
1417         dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1418
1419         dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
1420                 port->identify[128],
1421                 port->identify[128] & 0x4 ? "(LOCKED)" : "");
1422
1423         if (mtip_hw_get_capacity(port->dd, &sectors))
1424                 dev_info(&port->dd->pdev->dev,
1425                         "Capacity: %llu sectors (%llu MB)\n",
1426                          (u64)sectors,
1427                          ((u64)sectors) * ATA_SECT_SIZE >> 20);
1428
1429         pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
1430         switch (revid & 0xFF) {
1431         case 0x1:
1432                 strlcpy(cbuf, "A0", 3);
1433                 break;
1434         case 0x3:
1435                 strlcpy(cbuf, "A2", 3);
1436                 break;
1437         default:
1438                 strlcpy(cbuf, "?", 2);
1439                 break;
1440         }
1441         dev_info(&port->dd->pdev->dev,
1442                 "Card Type: %s\n", cbuf);
1443 }
1444
1445 /*
1446  * Map the commands scatter list into the command table.
1447  *
1448  * @command Pointer to the command.
1449  * @nents Number of scatter list entries.
1450  *
1451  * return value
1452  *      None
1453  */
1454 static inline void fill_command_sg(struct driver_data *dd,
1455                                 struct mtip_cmd *command,
1456                                 int nents)
1457 {
1458         int n;
1459         unsigned int dma_len;
1460         struct mtip_cmd_sg *command_sg;
1461         struct scatterlist *sg;
1462
1463         command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
1464
1465         for_each_sg(command->sg, sg, nents, n) {
1466                 dma_len = sg_dma_len(sg);
1467                 if (dma_len > 0x400000)
1468                         dev_err(&dd->pdev->dev,
1469                                 "DMA segment length truncated\n");
1470                 command_sg->info = cpu_to_le32((dma_len-1) & 0x3FFFFF);
1471                 command_sg->dba =  cpu_to_le32(sg_dma_address(sg));
1472                 command_sg->dba_upper =
1473                         cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
1474                 command_sg++;
1475         }
1476 }
1477
1478 /*
1479  * @brief Execute a drive command.
1480  *
1481  * return value 0 The command completed successfully.
1482  * return value -1 An error occurred while executing the command.
1483  */
1484 static int exec_drive_task(struct mtip_port *port, u8 *command)
1485 {
1486         struct host_to_dev_fis  fis;
1487         struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1488         unsigned int to;
1489
1490         /* Build the FIS. */
1491         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1492         fis.type        = 0x27;
1493         fis.opts        = 1 << 7;
1494         fis.command     = command[0];
1495         fis.features    = command[1];
1496         fis.sect_count  = command[2];
1497         fis.sector      = command[3];
1498         fis.cyl_low     = command[4];
1499         fis.cyl_hi      = command[5];
1500         fis.device      = command[6] & ~0x10; /* Clear the dev bit*/
1501
1502         mtip_set_timeout(port->dd, &fis, &to, 0);
1503
1504         dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
1505                 __func__,
1506                 command[0],
1507                 command[1],
1508                 command[2],
1509                 command[3],
1510                 command[4],
1511                 command[5],
1512                 command[6]);
1513
1514         /* Execute the command. */
1515         if (mtip_exec_internal_command(port,
1516                                  &fis,
1517                                  5,
1518                                  0,
1519                                  0,
1520                                  0,
1521                                  to) < 0) {
1522                 return -1;
1523         }
1524
1525         command[0] = reply->command; /* Status*/
1526         command[1] = reply->features; /* Error*/
1527         command[4] = reply->cyl_low;
1528         command[5] = reply->cyl_hi;
1529
1530         dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n",
1531                 __func__,
1532                 command[0],
1533                 command[1],
1534                 command[4],
1535                 command[5]);
1536
1537         return 0;
1538 }
1539
1540 /*
1541  * @brief Execute a drive command.
1542  *
1543  * @param port Pointer to the port data structure.
1544  * @param command Pointer to the user specified command parameters.
1545  * @param user_buffer Pointer to the user space buffer where read sector
1546  *                   data should be copied.
1547  *
1548  * return value 0 The command completed successfully.
1549  * return value -EFAULT An error occurred while copying the completion
1550  *                 data to the user space buffer.
1551  * return value -1 An error occurred while executing the command.
1552  */
1553 static int exec_drive_command(struct mtip_port *port, u8 *command,
1554                                 void __user *user_buffer)
1555 {
1556         struct host_to_dev_fis  fis;
1557         struct host_to_dev_fis *reply;
1558         u8 *buf = NULL;
1559         dma_addr_t dma_addr = 0;
1560         int rv = 0, xfer_sz = command[3];
1561         unsigned int to;
1562
1563         if (xfer_sz) {
1564                 if (!user_buffer)
1565                         return -EFAULT;
1566
1567                 buf = dma_alloc_coherent(&port->dd->pdev->dev,
1568                                 ATA_SECT_SIZE * xfer_sz,
1569                                 &dma_addr,
1570                                 GFP_KERNEL);
1571                 if (!buf) {
1572                         dev_err(&port->dd->pdev->dev,
1573                                 "Memory allocation failed (%d bytes)\n",
1574                                 ATA_SECT_SIZE * xfer_sz);
1575                         return -ENOMEM;
1576                 }
1577         }
1578
1579         /* Build the FIS. */
1580         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1581         fis.type        = 0x27;
1582         fis.opts        = 1 << 7;
1583         fis.command     = command[0];
1584         fis.features    = command[2];
1585         fis.sect_count  = command[3];
1586         if (fis.command == ATA_CMD_SMART) {
1587                 fis.sector      = command[1];
1588                 fis.cyl_low     = 0x4F;
1589                 fis.cyl_hi      = 0xC2;
1590         }
1591
1592         mtip_set_timeout(port->dd, &fis, &to, 0);
1593
1594         if (xfer_sz)
1595                 reply = (port->rxfis + RX_FIS_PIO_SETUP);
1596         else
1597                 reply = (port->rxfis + RX_FIS_D2H_REG);
1598
1599         dbg_printk(MTIP_DRV_NAME
1600                 " %s: User Command: cmd %x, sect %x, "
1601                 "feat %x, sectcnt %x\n",
1602                 __func__,
1603                 command[0],
1604                 command[1],
1605                 command[2],
1606                 command[3]);
1607
1608         /* Execute the command. */
1609         if (mtip_exec_internal_command(port,
1610                                 &fis,
1611                                  5,
1612                                  (xfer_sz ? dma_addr : 0),
1613                                  (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
1614                                  0,
1615                                  to)
1616                                  < 0) {
1617                 rv = -EFAULT;
1618                 goto exit_drive_command;
1619         }
1620
1621         /* Collect the completion status. */
1622         command[0] = reply->command; /* Status*/
1623         command[1] = reply->features; /* Error*/
1624         command[2] = reply->sect_count;
1625
1626         dbg_printk(MTIP_DRV_NAME
1627                 " %s: Completion Status: stat %x, "
1628                 "err %x, nsect %x\n",
1629                 __func__,
1630                 command[0],
1631                 command[1],
1632                 command[2]);
1633
1634         if (xfer_sz) {
1635                 if (copy_to_user(user_buffer,
1636                                  buf,
1637                                  ATA_SECT_SIZE * command[3])) {
1638                         rv = -EFAULT;
1639                         goto exit_drive_command;
1640                 }
1641         }
1642 exit_drive_command:
1643         if (buf)
1644                 dma_free_coherent(&port->dd->pdev->dev,
1645                                 ATA_SECT_SIZE * xfer_sz, buf, dma_addr);
1646         return rv;
1647 }
1648
1649 /*
1650  *  Indicates whether a command has a single sector payload.
1651  *
1652  *  @command passed to the device to perform the certain event.
1653  *  @features passed to the device to perform the certain event.
1654  *
1655  *  return value
1656  *      1       command is one that always has a single sector payload,
1657  *              regardless of the value in the Sector Count field.
1658  *      0       otherwise
1659  *
1660  */
1661 static unsigned int implicit_sector(unsigned char command,
1662                                     unsigned char features)
1663 {
1664         unsigned int rv = 0;
1665
1666         /* list of commands that have an implicit sector count of 1 */
1667         switch (command) {
1668         case ATA_CMD_SEC_SET_PASS:
1669         case ATA_CMD_SEC_UNLOCK:
1670         case ATA_CMD_SEC_ERASE_PREP:
1671         case ATA_CMD_SEC_ERASE_UNIT:
1672         case ATA_CMD_SEC_FREEZE_LOCK:
1673         case ATA_CMD_SEC_DISABLE_PASS:
1674         case ATA_CMD_PMP_READ:
1675         case ATA_CMD_PMP_WRITE:
1676                 rv = 1;
1677                 break;
1678         case ATA_CMD_SET_MAX:
1679                 if (features == ATA_SET_MAX_UNLOCK)
1680                         rv = 1;
1681                 break;
1682         case ATA_CMD_SMART:
1683                 if ((features == ATA_SMART_READ_VALUES) ||
1684                                 (features == ATA_SMART_READ_THRESHOLDS))
1685                         rv = 1;
1686                 break;
1687         case ATA_CMD_CONF_OVERLAY:
1688                 if ((features == ATA_DCO_IDENTIFY) ||
1689                                 (features == ATA_DCO_SET))
1690                         rv = 1;
1691                 break;
1692         }
1693         return rv;
1694 }
1695
1696 /*
1697  * Executes a taskfile
1698  * See ide_taskfile_ioctl() for derivation
1699  */
1700 static int exec_drive_taskfile(struct driver_data *dd,
1701                                void __user *buf,
1702                                ide_task_request_t *req_task,
1703                                int outtotal)
1704 {
1705         struct host_to_dev_fis  fis;
1706         struct host_to_dev_fis *reply;
1707         u8 *outbuf = NULL;
1708         u8 *inbuf = NULL;
1709         dma_addr_t outbuf_dma = 0;
1710         dma_addr_t inbuf_dma = 0;
1711         dma_addr_t dma_buffer = 0;
1712         int err = 0;
1713         unsigned int taskin = 0;
1714         unsigned int taskout = 0;
1715         u8 nsect = 0;
1716         unsigned int timeout;
1717         unsigned int force_single_sector;
1718         unsigned int transfer_size;
1719         unsigned long task_file_data;
1720         int intotal = outtotal + req_task->out_size;
1721         int erasemode = 0;
1722
1723         taskout = req_task->out_size;
1724         taskin = req_task->in_size;
1725         /* 130560 = 512 * 0xFF*/
1726         if (taskin > 130560 || taskout > 130560)
1727                 return -EINVAL;
1728
1729         if (taskout) {
1730                 outbuf = memdup_user(buf + outtotal, taskout);
1731                 if (IS_ERR(outbuf))
1732                         return PTR_ERR(outbuf);
1733
1734                 outbuf_dma = dma_map_single(&dd->pdev->dev, outbuf,
1735                                             taskout, DMA_TO_DEVICE);
1736                 if (dma_mapping_error(&dd->pdev->dev, outbuf_dma)) {
1737                         err = -ENOMEM;
1738                         goto abort;
1739                 }
1740                 dma_buffer = outbuf_dma;
1741         }
1742
1743         if (taskin) {
1744                 inbuf = memdup_user(buf + intotal, taskin);
1745                 if (IS_ERR(inbuf)) {
1746                         err = PTR_ERR(inbuf);
1747                         inbuf = NULL;
1748                         goto abort;
1749                 }
1750                 inbuf_dma = dma_map_single(&dd->pdev->dev, inbuf,
1751                                            taskin, DMA_FROM_DEVICE);
1752                 if (dma_mapping_error(&dd->pdev->dev, inbuf_dma)) {
1753                         err = -ENOMEM;
1754                         goto abort;
1755                 }
1756                 dma_buffer = inbuf_dma;
1757         }
1758
1759         /* only supports PIO and non-data commands from this ioctl. */
1760         switch (req_task->data_phase) {
1761         case TASKFILE_OUT:
1762                 nsect = taskout / ATA_SECT_SIZE;
1763                 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
1764                 break;
1765         case TASKFILE_IN:
1766                 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
1767                 break;
1768         case TASKFILE_NO_DATA:
1769                 reply = (dd->port->rxfis + RX_FIS_D2H_REG);
1770                 break;
1771         default:
1772                 err = -EINVAL;
1773                 goto abort;
1774         }
1775
1776         /* Build the FIS. */
1777         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1778
1779         fis.type        = 0x27;
1780         fis.opts        = 1 << 7;
1781         fis.command     = req_task->io_ports[7];
1782         fis.features    = req_task->io_ports[1];
1783         fis.sect_count  = req_task->io_ports[2];
1784         fis.lba_low     = req_task->io_ports[3];
1785         fis.lba_mid     = req_task->io_ports[4];
1786         fis.lba_hi      = req_task->io_ports[5];
1787          /* Clear the dev bit*/
1788         fis.device      = req_task->io_ports[6] & ~0x10;
1789
1790         if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
1791                 req_task->in_flags.all  =
1792                         IDE_TASKFILE_STD_IN_FLAGS |
1793                         (IDE_HOB_STD_IN_FLAGS << 8);
1794                 fis.lba_low_ex          = req_task->hob_ports[3];
1795                 fis.lba_mid_ex          = req_task->hob_ports[4];
1796                 fis.lba_hi_ex           = req_task->hob_ports[5];
1797                 fis.features_ex         = req_task->hob_ports[1];
1798                 fis.sect_cnt_ex         = req_task->hob_ports[2];
1799
1800         } else {
1801                 req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
1802         }
1803
1804         force_single_sector = implicit_sector(fis.command, fis.features);
1805
1806         if ((taskin || taskout) && (!fis.sect_count)) {
1807                 if (nsect)
1808                         fis.sect_count = nsect;
1809                 else {
1810                         if (!force_single_sector) {
1811                                 dev_warn(&dd->pdev->dev,
1812                                         "data movement but "
1813                                         "sect_count is 0\n");
1814                                 err = -EINVAL;
1815                                 goto abort;
1816                         }
1817                 }
1818         }
1819
1820         dbg_printk(MTIP_DRV_NAME
1821                 " %s: cmd %x, feat %x, nsect %x,"
1822                 " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
1823                 " head/dev %x\n",
1824                 __func__,
1825                 fis.command,
1826                 fis.features,
1827                 fis.sect_count,
1828                 fis.lba_low,
1829                 fis.lba_mid,
1830                 fis.lba_hi,
1831                 fis.device);
1832
1833         /* check for erase mode support during secure erase.*/
1834         if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf &&
1835                                         (outbuf[0] & MTIP_SEC_ERASE_MODE)) {
1836                 erasemode = 1;
1837         }
1838
1839         mtip_set_timeout(dd, &fis, &timeout, erasemode);
1840
1841         /* Determine the correct transfer size.*/
1842         if (force_single_sector)
1843                 transfer_size = ATA_SECT_SIZE;
1844         else
1845                 transfer_size = ATA_SECT_SIZE * fis.sect_count;
1846
1847         /* Execute the command.*/
1848         if (mtip_exec_internal_command(dd->port,
1849                                  &fis,
1850                                  5,
1851                                  dma_buffer,
1852                                  transfer_size,
1853                                  0,
1854                                  timeout) < 0) {
1855                 err = -EIO;
1856                 goto abort;
1857         }
1858
1859         task_file_data = readl(dd->port->mmio+PORT_TFDATA);
1860
1861         if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
1862                 reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
1863                 req_task->io_ports[7] = reply->control;
1864         } else {
1865                 reply = dd->port->rxfis + RX_FIS_D2H_REG;
1866                 req_task->io_ports[7] = reply->command;
1867         }
1868
1869         /* reclaim the DMA buffers.*/
1870         if (inbuf_dma)
1871                 dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin,
1872                                  DMA_FROM_DEVICE);
1873         if (outbuf_dma)
1874                 dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout,
1875                                  DMA_TO_DEVICE);
1876         inbuf_dma  = 0;
1877         outbuf_dma = 0;
1878
1879         /* return the ATA registers to the caller.*/
1880         req_task->io_ports[1] = reply->features;
1881         req_task->io_ports[2] = reply->sect_count;
1882         req_task->io_ports[3] = reply->lba_low;
1883         req_task->io_ports[4] = reply->lba_mid;
1884         req_task->io_ports[5] = reply->lba_hi;
1885         req_task->io_ports[6] = reply->device;
1886
1887         if (req_task->out_flags.all & 1)  {
1888
1889                 req_task->hob_ports[3] = reply->lba_low_ex;
1890                 req_task->hob_ports[4] = reply->lba_mid_ex;
1891                 req_task->hob_ports[5] = reply->lba_hi_ex;
1892                 req_task->hob_ports[1] = reply->features_ex;
1893                 req_task->hob_ports[2] = reply->sect_cnt_ex;
1894         }
1895         dbg_printk(MTIP_DRV_NAME
1896                 " %s: Completion: stat %x,"
1897                 "err %x, sect_cnt %x, lbalo %x,"
1898                 "lbamid %x, lbahi %x, dev %x\n",
1899                 __func__,
1900                 req_task->io_ports[7],
1901                 req_task->io_ports[1],
1902                 req_task->io_ports[2],
1903                 req_task->io_ports[3],
1904                 req_task->io_ports[4],
1905                 req_task->io_ports[5],
1906                 req_task->io_ports[6]);
1907
1908         if (taskout) {
1909                 if (copy_to_user(buf + outtotal, outbuf, taskout)) {
1910                         err = -EFAULT;
1911                         goto abort;
1912                 }
1913         }
1914         if (taskin) {
1915                 if (copy_to_user(buf + intotal, inbuf, taskin)) {
1916                         err = -EFAULT;
1917                         goto abort;
1918                 }
1919         }
1920 abort:
1921         if (inbuf_dma)
1922                 dma_unmap_single(&dd->pdev->dev, inbuf_dma, taskin,
1923                                  DMA_FROM_DEVICE);
1924         if (outbuf_dma)
1925                 dma_unmap_single(&dd->pdev->dev, outbuf_dma, taskout,
1926                                  DMA_TO_DEVICE);
1927         kfree(outbuf);
1928         kfree(inbuf);
1929
1930         return err;
1931 }
1932
1933 /*
1934  * Handle IOCTL calls from the Block Layer.
1935  *
1936  * This function is called by the Block Layer when it receives an IOCTL
1937  * command that it does not understand. If the IOCTL command is not supported
1938  * this function returns -ENOTTY.
1939  *
1940  * @dd  Pointer to the driver data structure.
1941  * @cmd IOCTL command passed from the Block Layer.
1942  * @arg IOCTL argument passed from the Block Layer.
1943  *
1944  * return value
1945  *      0       The IOCTL completed successfully.
1946  *      -ENOTTY The specified command is not supported.
1947  *      -EFAULT An error occurred copying data to a user space buffer.
1948  *      -EIO    An error occurred while executing the command.
1949  */
1950 static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
1951                          unsigned long arg)
1952 {
1953         switch (cmd) {
1954         case HDIO_GET_IDENTITY:
1955         {
1956                 if (copy_to_user((void __user *)arg, dd->port->identify,
1957                                                 sizeof(u16) * ATA_ID_WORDS))
1958                         return -EFAULT;
1959                 break;
1960         }
1961         case HDIO_DRIVE_CMD:
1962         {
1963                 u8 drive_command[4];
1964
1965                 /* Copy the user command info to our buffer. */
1966                 if (copy_from_user(drive_command,
1967                                          (void __user *) arg,
1968                                          sizeof(drive_command)))
1969                         return -EFAULT;
1970
1971                 /* Execute the drive command. */
1972                 if (exec_drive_command(dd->port,
1973                                          drive_command,
1974                                          (void __user *) (arg+4)))
1975                         return -EIO;
1976
1977                 /* Copy the status back to the users buffer. */
1978                 if (copy_to_user((void __user *) arg,
1979                                          drive_command,
1980                                          sizeof(drive_command)))
1981                         return -EFAULT;
1982
1983                 break;
1984         }
1985         case HDIO_DRIVE_TASK:
1986         {
1987                 u8 drive_command[7];
1988
1989                 /* Copy the user command info to our buffer. */
1990                 if (copy_from_user(drive_command,
1991                                          (void __user *) arg,
1992                                          sizeof(drive_command)))
1993                         return -EFAULT;
1994
1995                 /* Execute the drive command. */
1996                 if (exec_drive_task(dd->port, drive_command))
1997                         return -EIO;
1998
1999                 /* Copy the status back to the users buffer. */
2000                 if (copy_to_user((void __user *) arg,
2001                                          drive_command,
2002                                          sizeof(drive_command)))
2003                         return -EFAULT;
2004
2005                 break;
2006         }
2007         case HDIO_DRIVE_TASKFILE: {
2008                 ide_task_request_t req_task;
2009                 int ret, outtotal;
2010
2011                 if (copy_from_user(&req_task, (void __user *) arg,
2012                                         sizeof(req_task)))
2013                         return -EFAULT;
2014
2015                 outtotal = sizeof(req_task);
2016
2017                 ret = exec_drive_taskfile(dd, (void __user *) arg,
2018                                                 &req_task, outtotal);
2019
2020                 if (copy_to_user((void __user *) arg, &req_task,
2021                                                         sizeof(req_task)))
2022                         return -EFAULT;
2023
2024                 return ret;
2025         }
2026
2027         default:
2028                 return -EINVAL;
2029         }
2030         return 0;
2031 }
2032
2033 /*
2034  * Submit an IO to the hw
2035  *
2036  * This function is called by the block layer to issue an io
2037  * to the device. Upon completion, the callback function will
2038  * be called with the data parameter passed as the callback data.
2039  *
2040  * @dd       Pointer to the driver data structure.
2041  * @start    First sector to read.
2042  * @nsect    Number of sectors to read.
2043  * @tag      The tag of this read command.
2044  * @callback Pointer to the function that should be called
2045  *           when the read completes.
2046  * @data     Callback data passed to the callback function
2047  *           when the read completes.
2048  * @dir      Direction (read or write)
2049  *
2050  * return value
2051  *      None
2052  */
2053 static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq,
2054                               struct mtip_cmd *command,
2055                               struct blk_mq_hw_ctx *hctx)
2056 {
2057         struct mtip_cmd_hdr *hdr =
2058                 dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag;
2059         struct host_to_dev_fis  *fis;
2060         struct mtip_port *port = dd->port;
2061         int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
2062         u64 start = blk_rq_pos(rq);
2063         unsigned int nsect = blk_rq_sectors(rq);
2064         unsigned int nents;
2065
2066         /* Map the scatter list for DMA access */
2067         nents = blk_rq_map_sg(hctx->queue, rq, command->sg);
2068         nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
2069
2070         prefetch(&port->flags);
2071
2072         command->scatter_ents = nents;
2073
2074         /*
2075          * The number of retries for this command before it is
2076          * reported as a failure to the upper layers.
2077          */
2078         command->retries = MTIP_MAX_RETRIES;
2079
2080         /* Fill out fis */
2081         fis = command->command;
2082         fis->type        = 0x27;
2083         fis->opts        = 1 << 7;
2084         if (dma_dir == DMA_FROM_DEVICE)
2085                 fis->command = ATA_CMD_FPDMA_READ;
2086         else
2087                 fis->command = ATA_CMD_FPDMA_WRITE;
2088         fis->lba_low     = start & 0xFF;
2089         fis->lba_mid     = (start >> 8) & 0xFF;
2090         fis->lba_hi      = (start >> 16) & 0xFF;
2091         fis->lba_low_ex  = (start >> 24) & 0xFF;
2092         fis->lba_mid_ex  = (start >> 32) & 0xFF;
2093         fis->lba_hi_ex   = (start >> 40) & 0xFF;
2094         fis->device      = 1 << 6;
2095         fis->features    = nsect & 0xFF;
2096         fis->features_ex = (nsect >> 8) & 0xFF;
2097         fis->sect_count  = ((rq->tag << 3) | (rq->tag >> 5));
2098         fis->sect_cnt_ex = 0;
2099         fis->control     = 0;
2100         fis->res2        = 0;
2101         fis->res3        = 0;
2102         fill_command_sg(dd, command, nents);
2103
2104         if (unlikely(command->unaligned))
2105                 fis->device |= 1 << 7;
2106
2107         /* Populate the command header */
2108         hdr->ctba = cpu_to_le32(command->command_dma & 0xFFFFFFFF);
2109         if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
2110                 hdr->ctbau = cpu_to_le32((command->command_dma >> 16) >> 16);
2111         hdr->opts = cpu_to_le32((nents << 16) | 5 | AHCI_CMD_PREFETCH);
2112         hdr->byte_count = 0;
2113
2114         command->direction = dma_dir;
2115
2116         /*
2117          * To prevent this command from being issued
2118          * if an internal command is in progress or error handling is active.
2119          */
2120         if (unlikely(port->flags & MTIP_PF_PAUSE_IO)) {
2121                 set_bit(rq->tag, port->cmds_to_issue);
2122                 set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2123                 return;
2124         }
2125
2126         /* Issue the command to the hardware */
2127         mtip_issue_ncq_command(port, rq->tag);
2128 }
2129
2130 /*
2131  * Sysfs status dump.
2132  *
2133  * @dev  Pointer to the device structure, passed by the kernrel.
2134  * @attr Pointer to the device_attribute structure passed by the kernel.
2135  * @buf  Pointer to the char buffer that will receive the stats info.
2136  *
2137  * return value
2138  *      The size, in bytes, of the data copied into buf.
2139  */
2140 static ssize_t mtip_hw_show_status(struct device *dev,
2141                                 struct device_attribute *attr,
2142                                 char *buf)
2143 {
2144         struct driver_data *dd = dev_to_disk(dev)->private_data;
2145         int size = 0;
2146
2147         if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
2148                 size += sprintf(buf, "%s", "thermal_shutdown\n");
2149         else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
2150                 size += sprintf(buf, "%s", "write_protect\n");
2151         else
2152                 size += sprintf(buf, "%s", "online\n");
2153
2154         return size;
2155 }
2156
2157 static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL);
2158
2159 static struct attribute *mtip_disk_attrs[] = {
2160         &dev_attr_status.attr,
2161         NULL,
2162 };
2163
2164 static const struct attribute_group mtip_disk_attr_group = {
2165         .attrs = mtip_disk_attrs,
2166 };
2167
2168 static const struct attribute_group *mtip_disk_attr_groups[] = {
2169         &mtip_disk_attr_group,
2170         NULL,
2171 };
2172
2173 /* debugsfs entries */
2174
2175 static ssize_t show_device_status(struct device_driver *drv, char *buf)
2176 {
2177         int size = 0;
2178         struct driver_data *dd, *tmp;
2179         unsigned long flags;
2180         char id_buf[42];
2181         u16 status = 0;
2182
2183         spin_lock_irqsave(&dev_lock, flags);
2184         size += sprintf(&buf[size], "Devices Present:\n");
2185         list_for_each_entry_safe(dd, tmp, &online_list, online_list) {
2186                 if (dd->pdev) {
2187                         if (dd->port &&
2188                             dd->port->identify &&
2189                             dd->port->identify_valid) {
2190                                 strlcpy(id_buf,
2191                                         (char *) (dd->port->identify + 10), 21);
2192                                 status = *(dd->port->identify + 141);
2193                         } else {
2194                                 memset(id_buf, 0, 42);
2195                                 status = 0;
2196                         }
2197
2198                         if (dd->port &&
2199                             test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
2200                                 size += sprintf(&buf[size],
2201                                         " device %s %s (ftl rebuild %d %%)\n",
2202                                         dev_name(&dd->pdev->dev),
2203                                         id_buf,
2204                                         status);
2205                         } else {
2206                                 size += sprintf(&buf[size],
2207                                         " device %s %s\n",
2208                                         dev_name(&dd->pdev->dev),
2209                                         id_buf);
2210                         }
2211                 }
2212         }
2213
2214         size += sprintf(&buf[size], "Devices Being Removed:\n");
2215         list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) {
2216                 if (dd->pdev) {
2217                         if (dd->port &&
2218                             dd->port->identify &&
2219                             dd->port->identify_valid) {
2220                                 strlcpy(id_buf,
2221                                         (char *) (dd->port->identify+10), 21);
2222                                 status = *(dd->port->identify + 141);
2223                         } else {
2224                                 memset(id_buf, 0, 42);
2225                                 status = 0;
2226                         }
2227
2228                         if (dd->port &&
2229                             test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) {
2230                                 size += sprintf(&buf[size],
2231                                         " device %s %s (ftl rebuild %d %%)\n",
2232                                         dev_name(&dd->pdev->dev),
2233                                         id_buf,
2234                                         status);
2235                         } else {
2236                                 size += sprintf(&buf[size],
2237                                         " device %s %s\n",
2238                                         dev_name(&dd->pdev->dev),
2239                                         id_buf);
2240                         }
2241                 }
2242         }
2243         spin_unlock_irqrestore(&dev_lock, flags);
2244
2245         return size;
2246 }
2247
2248 static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf,
2249                                                 size_t len, loff_t *offset)
2250 {
2251         int size = *offset;
2252         char *buf;
2253         int rv = 0;
2254
2255         if (!len || *offset)
2256                 return 0;
2257
2258         buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2259         if (!buf)
2260                 return -ENOMEM;
2261
2262         size += show_device_status(NULL, buf);
2263
2264         *offset = size <= len ? size : len;
2265         size = copy_to_user(ubuf, buf, *offset);
2266         if (size)
2267                 rv = -EFAULT;
2268
2269         kfree(buf);
2270         return rv ? rv : *offset;
2271 }
2272
2273 static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
2274                                   size_t len, loff_t *offset)
2275 {
2276         struct driver_data *dd =  (struct driver_data *)f->private_data;
2277         char *buf;
2278         u32 group_allocated;
2279         int size = *offset;
2280         int n, rv = 0;
2281
2282         if (!len || size)
2283                 return 0;
2284
2285         buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2286         if (!buf)
2287                 return -ENOMEM;
2288
2289         size += sprintf(&buf[size], "H/ S ACTive      : [ 0x");
2290
2291         for (n = dd->slot_groups-1; n >= 0; n--)
2292                 size += sprintf(&buf[size], "%08X ",
2293                                          readl(dd->port->s_active[n]));
2294
2295         size += sprintf(&buf[size], "]\n");
2296         size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
2297
2298         for (n = dd->slot_groups-1; n >= 0; n--)
2299                 size += sprintf(&buf[size], "%08X ",
2300                                         readl(dd->port->cmd_issue[n]));
2301
2302         size += sprintf(&buf[size], "]\n");
2303         size += sprintf(&buf[size], "H/ Completed     : [ 0x");
2304
2305         for (n = dd->slot_groups-1; n >= 0; n--)
2306                 size += sprintf(&buf[size], "%08X ",
2307                                 readl(dd->port->completed[n]));
2308
2309         size += sprintf(&buf[size], "]\n");
2310         size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
2311                                 readl(dd->port->mmio + PORT_IRQ_STAT));
2312         size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
2313                                 readl(dd->mmio + HOST_IRQ_STAT));
2314         size += sprintf(&buf[size], "\n");
2315
2316         size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
2317
2318         for (n = dd->slot_groups-1; n >= 0; n--) {
2319                 if (sizeof(long) > sizeof(u32))
2320                         group_allocated =
2321                                 dd->port->cmds_to_issue[n/2] >> (32*(n&1));
2322                 else
2323                         group_allocated = dd->port->cmds_to_issue[n];
2324                 size += sprintf(&buf[size], "%08X ", group_allocated);
2325         }
2326         size += sprintf(&buf[size], "]\n");
2327
2328         *offset = size <= len ? size : len;
2329         size = copy_to_user(ubuf, buf, *offset);
2330         if (size)
2331                 rv = -EFAULT;
2332
2333         kfree(buf);
2334         return rv ? rv : *offset;
2335 }
2336
2337 static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
2338                                   size_t len, loff_t *offset)
2339 {
2340         struct driver_data *dd =  (struct driver_data *)f->private_data;
2341         char *buf;
2342         int size = *offset;
2343         int rv = 0;
2344
2345         if (!len || size)
2346                 return 0;
2347
2348         buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL);
2349         if (!buf)
2350                 return -ENOMEM;
2351
2352         size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
2353                                                         dd->port->flags);
2354         size += sprintf(&buf[size], "Flag-dd   : [ %08lX ]\n",
2355                                                         dd->dd_flag);
2356
2357         *offset = size <= len ? size : len;
2358         size = copy_to_user(ubuf, buf, *offset);
2359         if (size)
2360                 rv = -EFAULT;
2361
2362         kfree(buf);
2363         return rv ? rv : *offset;
2364 }
2365
2366 static const struct file_operations mtip_device_status_fops = {
2367         .owner  = THIS_MODULE,
2368         .open   = simple_open,
2369         .read   = mtip_hw_read_device_status,
2370         .llseek = no_llseek,
2371 };
2372
2373 static const struct file_operations mtip_regs_fops = {
2374         .owner  = THIS_MODULE,
2375         .open   = simple_open,
2376         .read   = mtip_hw_read_registers,
2377         .llseek = no_llseek,
2378 };
2379
2380 static const struct file_operations mtip_flags_fops = {
2381         .owner  = THIS_MODULE,
2382         .open   = simple_open,
2383         .read   = mtip_hw_read_flags,
2384         .llseek = no_llseek,
2385 };
2386
2387 static int mtip_hw_debugfs_init(struct driver_data *dd)
2388 {
2389         if (!dfs_parent)
2390                 return -1;
2391
2392         dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
2393         if (IS_ERR_OR_NULL(dd->dfs_node)) {
2394                 dev_warn(&dd->pdev->dev,
2395                         "Error creating node %s under debugfs\n",
2396                                                 dd->disk->disk_name);
2397                 dd->dfs_node = NULL;
2398                 return -1;
2399         }
2400
2401         debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops);
2402         debugfs_create_file("registers", 0444, dd->dfs_node, dd,
2403                             &mtip_regs_fops);
2404
2405         return 0;
2406 }
2407
2408 static void mtip_hw_debugfs_exit(struct driver_data *dd)
2409 {
2410         debugfs_remove_recursive(dd->dfs_node);
2411 }
2412
2413 /*
2414  * Perform any init/resume time hardware setup
2415  *
2416  * @dd Pointer to the driver data structure.
2417  *
2418  * return value
2419  *      None
2420  */
2421 static inline void hba_setup(struct driver_data *dd)
2422 {
2423         u32 hwdata;
2424         hwdata = readl(dd->mmio + HOST_HSORG);
2425
2426         /* interrupt bug workaround: use only 1 IS bit.*/
2427         writel(hwdata |
2428                 HSORG_DISABLE_SLOTGRP_INTR |
2429                 HSORG_DISABLE_SLOTGRP_PXIS,
2430                 dd->mmio + HOST_HSORG);
2431 }
2432
2433 static int mtip_device_unaligned_constrained(struct driver_data *dd)
2434 {
2435         return (dd->pdev->device == P420M_DEVICE_ID ? 1 : 0);
2436 }
2437
2438 /*
2439  * Detect the details of the product, and store anything needed
2440  * into the driver data structure.  This includes product type and
2441  * version and number of slot groups.
2442  *
2443  * @dd Pointer to the driver data structure.
2444  *
2445  * return value
2446  *      None
2447  */
2448 static void mtip_detect_product(struct driver_data *dd)
2449 {
2450         u32 hwdata;
2451         unsigned int rev, slotgroups;
2452
2453         /*
2454          * HBA base + 0xFC [15:0] - vendor-specific hardware interface
2455          * info register:
2456          * [15:8] hardware/software interface rev#
2457          * [   3] asic-style interface
2458          * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
2459          */
2460         hwdata = readl(dd->mmio + HOST_HSORG);
2461
2462         dd->product_type = MTIP_PRODUCT_UNKNOWN;
2463         dd->slot_groups = 1;
2464
2465         if (hwdata & 0x8) {
2466                 dd->product_type = MTIP_PRODUCT_ASICFPGA;
2467                 rev = (hwdata & HSORG_HWREV) >> 8;
2468                 slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
2469                 dev_info(&dd->pdev->dev,
2470                         "ASIC-FPGA design, HS rev 0x%x, "
2471                         "%i slot groups [%i slots]\n",
2472                          rev,
2473                          slotgroups,
2474                          slotgroups * 32);
2475
2476                 if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
2477                         dev_warn(&dd->pdev->dev,
2478                                 "Warning: driver only supports "
2479                                 "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
2480                         slotgroups = MTIP_MAX_SLOT_GROUPS;
2481                 }
2482                 dd->slot_groups = slotgroups;
2483                 return;
2484         }
2485
2486         dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
2487 }
2488
2489 /*
2490  * Blocking wait for FTL rebuild to complete
2491  *
2492  * @dd Pointer to the DRIVER_DATA structure.
2493  *
2494  * return value
2495  *      0       FTL rebuild completed successfully
2496  *      -EFAULT FTL rebuild error/timeout/interruption
2497  */
2498 static int mtip_ftl_rebuild_poll(struct driver_data *dd)
2499 {
2500         unsigned long timeout, cnt = 0, start;
2501
2502         dev_warn(&dd->pdev->dev,
2503                 "FTL rebuild in progress. Polling for completion.\n");
2504
2505         start = jiffies;
2506         timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
2507
2508         do {
2509                 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
2510                                 &dd->dd_flag)))
2511                         return -EFAULT;
2512                 if (mtip_check_surprise_removal(dd))
2513                         return -EFAULT;
2514
2515                 if (mtip_get_identify(dd->port, NULL) < 0)
2516                         return -EFAULT;
2517
2518                 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2519                         MTIP_FTL_REBUILD_MAGIC) {
2520                         ssleep(1);
2521                         /* Print message every 3 minutes */
2522                         if (cnt++ >= 180) {
2523                                 dev_warn(&dd->pdev->dev,
2524                                 "FTL rebuild in progress (%d secs).\n",
2525                                 jiffies_to_msecs(jiffies - start) / 1000);
2526                                 cnt = 0;
2527                         }
2528                 } else {
2529                         dev_warn(&dd->pdev->dev,
2530                                 "FTL rebuild complete (%d secs).\n",
2531                         jiffies_to_msecs(jiffies - start) / 1000);
2532                         mtip_block_initialize(dd);
2533                         return 0;
2534                 }
2535         } while (time_before(jiffies, timeout));
2536
2537         /* Check for timeout */
2538         dev_err(&dd->pdev->dev,
2539                 "Timed out waiting for FTL rebuild to complete (%d secs).\n",
2540                 jiffies_to_msecs(jiffies - start) / 1000);
2541         return -EFAULT;
2542 }
2543
2544 static void mtip_softirq_done_fn(struct request *rq)
2545 {
2546         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
2547         struct driver_data *dd = rq->q->queuedata;
2548
2549         /* Unmap the DMA scatter list entries */
2550         dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents,
2551                                                         cmd->direction);
2552
2553         if (unlikely(cmd->unaligned))
2554                 atomic_inc(&dd->port->cmd_slot_unal);
2555
2556         blk_mq_end_request(rq, cmd->status);
2557 }
2558
2559 static bool mtip_abort_cmd(struct request *req, void *data, bool reserved)
2560 {
2561         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
2562         struct driver_data *dd = data;
2563
2564         dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
2565
2566         clear_bit(req->tag, dd->port->cmds_to_issue);
2567         cmd->status = BLK_STS_IOERR;
2568         mtip_softirq_done_fn(req);
2569         return true;
2570 }
2571
2572 static bool mtip_queue_cmd(struct request *req, void *data, bool reserved)
2573 {
2574         struct driver_data *dd = data;
2575
2576         set_bit(req->tag, dd->port->cmds_to_issue);
2577         blk_abort_request(req);
2578         return true;
2579 }
2580
2581 /*
2582  * service thread to issue queued commands
2583  *
2584  * @data Pointer to the driver data structure.
2585  *
2586  * return value
2587  *      0
2588  */
2589
2590 static int mtip_service_thread(void *data)
2591 {
2592         struct driver_data *dd = (struct driver_data *)data;
2593         unsigned long slot, slot_start, slot_wrap, to;
2594         unsigned int num_cmd_slots = dd->slot_groups * 32;
2595         struct mtip_port *port = dd->port;
2596
2597         while (1) {
2598                 if (kthread_should_stop() ||
2599                         test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
2600                         goto st_out;
2601                 clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
2602
2603                 /*
2604                  * the condition is to check neither an internal command is
2605                  * is in progress nor error handling is active
2606                  */
2607                 wait_event_interruptible(port->svc_wait, (port->flags) &&
2608                         (port->flags & MTIP_PF_SVC_THD_WORK));
2609
2610                 if (kthread_should_stop() ||
2611                         test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
2612                         goto st_out;
2613
2614                 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
2615                                 &dd->dd_flag)))
2616                         goto st_out;
2617
2618                 set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
2619
2620 restart_eh:
2621                 /* Demux bits: start with error handling */
2622                 if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) {
2623                         mtip_handle_tfe(dd);
2624                         clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
2625                 }
2626
2627                 if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags))
2628                         goto restart_eh;
2629
2630                 if (test_bit(MTIP_PF_TO_ACTIVE_BIT, &port->flags)) {
2631                         to = jiffies + msecs_to_jiffies(5000);
2632
2633                         do {
2634                                 mdelay(100);
2635                         } while (atomic_read(&dd->irq_workers_active) != 0 &&
2636                                 time_before(jiffies, to));
2637
2638                         if (atomic_read(&dd->irq_workers_active) != 0)
2639                                 dev_warn(&dd->pdev->dev,
2640                                         "Completion workers still active!");
2641
2642                         blk_mq_quiesce_queue(dd->queue);
2643
2644                         blk_mq_tagset_busy_iter(&dd->tags, mtip_queue_cmd, dd);
2645
2646                         set_bit(MTIP_PF_ISSUE_CMDS_BIT, &dd->port->flags);
2647
2648                         if (mtip_device_reset(dd))
2649                                 blk_mq_tagset_busy_iter(&dd->tags,
2650                                                         mtip_abort_cmd, dd);
2651
2652                         clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
2653
2654                         blk_mq_unquiesce_queue(dd->queue);
2655                 }
2656
2657                 if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
2658                         slot = 1;
2659                         /* used to restrict the loop to one iteration */
2660                         slot_start = num_cmd_slots;
2661                         slot_wrap = 0;
2662                         while (1) {
2663                                 slot = find_next_bit(port->cmds_to_issue,
2664                                                 num_cmd_slots, slot);
2665                                 if (slot_wrap == 1) {
2666                                         if ((slot_start >= slot) ||
2667                                                 (slot >= num_cmd_slots))
2668                                                 break;
2669                                 }
2670                                 if (unlikely(slot_start == num_cmd_slots))
2671                                         slot_start = slot;
2672
2673                                 if (unlikely(slot == num_cmd_slots)) {
2674                                         slot = 1;
2675                                         slot_wrap = 1;
2676                                         continue;
2677                                 }
2678
2679                                 /* Issue the command to the hardware */
2680                                 mtip_issue_ncq_command(port, slot);
2681
2682                                 clear_bit(slot, port->cmds_to_issue);
2683                         }
2684
2685                         clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2686                 }
2687
2688                 if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
2689                         if (mtip_ftl_rebuild_poll(dd) == 0)
2690                                 clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
2691                 }
2692         }
2693
2694 st_out:
2695         return 0;
2696 }
2697
2698 /*
2699  * DMA region teardown
2700  *
2701  * @dd Pointer to driver_data structure
2702  *
2703  * return value
2704  *      None
2705  */
2706 static void mtip_dma_free(struct driver_data *dd)
2707 {
2708         struct mtip_port *port = dd->port;
2709
2710         if (port->block1)
2711                 dma_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
2712                                         port->block1, port->block1_dma);
2713
2714         if (port->command_list) {
2715                 dma_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
2716                                 port->command_list, port->command_list_dma);
2717         }
2718 }
2719
2720 /*
2721  * DMA region setup
2722  *
2723  * @dd Pointer to driver_data structure
2724  *
2725  * return value
2726  *      -ENOMEM Not enough free DMA region space to initialize driver
2727  */
2728 static int mtip_dma_alloc(struct driver_data *dd)
2729 {
2730         struct mtip_port *port = dd->port;
2731
2732         /* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
2733         port->block1 =
2734                 dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
2735                                         &port->block1_dma, GFP_KERNEL);
2736         if (!port->block1)
2737                 return -ENOMEM;
2738
2739         /* Allocate dma memory for command list */
2740         port->command_list =
2741                 dma_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
2742                                         &port->command_list_dma, GFP_KERNEL);
2743         if (!port->command_list) {
2744                 dma_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
2745                                         port->block1, port->block1_dma);
2746                 port->block1 = NULL;
2747                 port->block1_dma = 0;
2748                 return -ENOMEM;
2749         }
2750
2751         /* Setup all pointers into first DMA region */
2752         port->rxfis         = port->block1 + AHCI_RX_FIS_OFFSET;
2753         port->rxfis_dma     = port->block1_dma + AHCI_RX_FIS_OFFSET;
2754         port->identify      = port->block1 + AHCI_IDFY_OFFSET;
2755         port->identify_dma  = port->block1_dma + AHCI_IDFY_OFFSET;
2756         port->log_buf       = port->block1 + AHCI_SECTBUF_OFFSET;
2757         port->log_buf_dma   = port->block1_dma + AHCI_SECTBUF_OFFSET;
2758         port->smart_buf     = port->block1 + AHCI_SMARTBUF_OFFSET;
2759         port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
2760
2761         return 0;
2762 }
2763
2764 static int mtip_hw_get_identify(struct driver_data *dd)
2765 {
2766         struct smart_attr attr242;
2767         unsigned char *buf;
2768         int rv;
2769
2770         if (mtip_get_identify(dd->port, NULL) < 0)
2771                 return -EFAULT;
2772
2773         if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2774                 MTIP_FTL_REBUILD_MAGIC) {
2775                 set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
2776                 return MTIP_FTL_REBUILD_MAGIC;
2777         }
2778         mtip_dump_identify(dd->port);
2779
2780         /* check write protect, over temp and rebuild statuses */
2781         rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
2782                                 dd->port->log_buf,
2783                                 dd->port->log_buf_dma, 1);
2784         if (rv) {
2785                 dev_warn(&dd->pdev->dev,
2786                         "Error in READ LOG EXT (10h) command\n");
2787                 /* non-critical error, don't fail the load */
2788         } else {
2789                 buf = (unsigned char *)dd->port->log_buf;
2790                 if (buf[259] & 0x1) {
2791                         dev_info(&dd->pdev->dev,
2792                                 "Write protect bit is set.\n");
2793                         set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
2794                 }
2795                 if (buf[288] == 0xF7) {
2796                         dev_info(&dd->pdev->dev,
2797                                 "Exceeded Tmax, drive in thermal shutdown.\n");
2798                         set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
2799                 }
2800                 if (buf[288] == 0xBF) {
2801                         dev_info(&dd->pdev->dev,
2802                                 "Drive indicates rebuild has failed.\n");
2803                         set_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag);
2804                 }
2805         }
2806
2807         /* get write protect progess */
2808         memset(&attr242, 0, sizeof(struct smart_attr));
2809         if (mtip_get_smart_attr(dd->port, 242, &attr242))
2810                 dev_warn(&dd->pdev->dev,
2811                                 "Unable to check write protect progress\n");
2812         else
2813                 dev_info(&dd->pdev->dev,
2814                                 "Write protect progress: %u%% (%u blocks)\n",
2815                                 attr242.cur, le32_to_cpu(attr242.data));
2816
2817         return rv;
2818 }
2819
2820 /*
2821  * Called once for each card.
2822  *
2823  * @dd Pointer to the driver data structure.
2824  *
2825  * return value
2826  *      0 on success, else an error code.
2827  */
2828 static int mtip_hw_init(struct driver_data *dd)
2829 {
2830         int i;
2831         int rv;
2832         unsigned long timeout, timetaken;
2833
2834         dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
2835
2836         mtip_detect_product(dd);
2837         if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
2838                 rv = -EIO;
2839                 goto out1;
2840         }
2841
2842         hba_setup(dd);
2843
2844         dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
2845                                 dd->numa_node);
2846         if (!dd->port)
2847                 return -ENOMEM;
2848
2849         /* Continue workqueue setup */
2850         for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
2851                 dd->work[i].port = dd->port;
2852
2853         /* Enable unaligned IO constraints for some devices */
2854         if (mtip_device_unaligned_constrained(dd))
2855                 dd->unal_qdepth = MTIP_MAX_UNALIGNED_SLOTS;
2856         else
2857                 dd->unal_qdepth = 0;
2858
2859         atomic_set(&dd->port->cmd_slot_unal, dd->unal_qdepth);
2860
2861         /* Spinlock to prevent concurrent issue */
2862         for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
2863                 spin_lock_init(&dd->port->cmd_issue_lock[i]);
2864
2865         /* Set the port mmio base address. */
2866         dd->port->mmio  = dd->mmio + PORT_OFFSET;
2867         dd->port->dd    = dd;
2868
2869         /* DMA allocations */
2870         rv = mtip_dma_alloc(dd);
2871         if (rv < 0)
2872                 goto out1;
2873
2874         /* Setup the pointers to the extended s_active and CI registers. */
2875         for (i = 0; i < dd->slot_groups; i++) {
2876                 dd->port->s_active[i] =
2877                         dd->port->mmio + i*0x80 + PORT_SCR_ACT;
2878                 dd->port->cmd_issue[i] =
2879                         dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
2880                 dd->port->completed[i] =
2881                         dd->port->mmio + i*0x80 + PORT_SDBV;
2882         }
2883
2884         timetaken = jiffies;
2885         timeout = jiffies + msecs_to_jiffies(30000);
2886         while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) &&
2887                  time_before(jiffies, timeout)) {
2888                 mdelay(100);
2889         }
2890         if (unlikely(mtip_check_surprise_removal(dd))) {
2891                 timetaken = jiffies - timetaken;
2892                 dev_warn(&dd->pdev->dev,
2893                         "Surprise removal detected at %u ms\n",
2894                         jiffies_to_msecs(timetaken));
2895                 rv = -ENODEV;
2896                 goto out2 ;
2897         }
2898         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
2899                 timetaken = jiffies - timetaken;
2900                 dev_warn(&dd->pdev->dev,
2901                         "Removal detected at %u ms\n",
2902                         jiffies_to_msecs(timetaken));
2903                 rv = -EFAULT;
2904                 goto out2;
2905         }
2906
2907         /* Conditionally reset the HBA. */
2908         if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) {
2909                 if (mtip_hba_reset(dd) < 0) {
2910                         dev_err(&dd->pdev->dev,
2911                                 "Card did not reset within timeout\n");
2912                         rv = -EIO;
2913                         goto out2;
2914                 }
2915         } else {
2916                 /* Clear any pending interrupts on the HBA */
2917                 writel(readl(dd->mmio + HOST_IRQ_STAT),
2918                         dd->mmio + HOST_IRQ_STAT);
2919         }
2920
2921         mtip_init_port(dd->port);
2922         mtip_start_port(dd->port);
2923
2924         /* Setup the ISR and enable interrupts. */
2925         rv = request_irq(dd->pdev->irq, mtip_irq_handler, IRQF_SHARED,
2926                          dev_driver_string(&dd->pdev->dev), dd);
2927         if (rv) {
2928                 dev_err(&dd->pdev->dev,
2929                         "Unable to allocate IRQ %d\n", dd->pdev->irq);
2930                 goto out2;
2931         }
2932         irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
2933
2934         /* Enable interrupts on the HBA. */
2935         writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
2936                                         dd->mmio + HOST_CTL);
2937
2938         init_waitqueue_head(&dd->port->svc_wait);
2939
2940         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
2941                 rv = -EFAULT;
2942                 goto out3;
2943         }
2944
2945         return rv;
2946
2947 out3:
2948         /* Disable interrupts on the HBA. */
2949         writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
2950                         dd->mmio + HOST_CTL);
2951
2952         /* Release the IRQ. */
2953         irq_set_affinity_hint(dd->pdev->irq, NULL);
2954         free_irq(dd->pdev->irq, dd);
2955
2956 out2:
2957         mtip_deinit_port(dd->port);
2958         mtip_dma_free(dd);
2959
2960 out1:
2961         /* Free the memory allocated for the for structure. */
2962         kfree(dd->port);
2963
2964         return rv;
2965 }
2966
2967 static int mtip_standby_drive(struct driver_data *dd)
2968 {
2969         int rv = 0;
2970
2971         if (dd->sr || !dd->port)
2972                 return -ENODEV;
2973         /*
2974          * Send standby immediate (E0h) to the drive so that it
2975          * saves its state.
2976          */
2977         if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
2978             !test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag) &&
2979             !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) {
2980                 rv = mtip_standby_immediate(dd->port);
2981                 if (rv)
2982                         dev_warn(&dd->pdev->dev,
2983                                 "STANDBY IMMEDIATE failed\n");
2984         }
2985         return rv;
2986 }
2987
2988 /*
2989  * Called to deinitialize an interface.
2990  *
2991  * @dd Pointer to the driver data structure.
2992  *
2993  * return value
2994  *      0
2995  */
2996 static int mtip_hw_exit(struct driver_data *dd)
2997 {
2998         if (!dd->sr) {
2999                 /* de-initialize the port. */
3000                 mtip_deinit_port(dd->port);
3001
3002                 /* Disable interrupts on the HBA. */
3003                 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3004                                 dd->mmio + HOST_CTL);
3005         }
3006
3007         /* Release the IRQ. */
3008         irq_set_affinity_hint(dd->pdev->irq, NULL);
3009         free_irq(dd->pdev->irq, dd);
3010         msleep(1000);
3011
3012         /* Free dma regions */
3013         mtip_dma_free(dd);
3014
3015         /* Free the memory allocated for the for structure. */
3016         kfree(dd->port);
3017         dd->port = NULL;
3018
3019         return 0;
3020 }
3021
3022 /*
3023  * Issue a Standby Immediate command to the device.
3024  *
3025  * This function is called by the Block Layer just before the
3026  * system powers off during a shutdown.
3027  *
3028  * @dd Pointer to the driver data structure.
3029  *
3030  * return value
3031  *      0
3032  */
3033 static int mtip_hw_shutdown(struct driver_data *dd)
3034 {
3035         /*
3036          * Send standby immediate (E0h) to the drive so that it
3037          * saves its state.
3038          */
3039         mtip_standby_drive(dd);
3040
3041         return 0;
3042 }
3043
3044 /*
3045  * Suspend function
3046  *
3047  * This function is called by the Block Layer just before the
3048  * system hibernates.
3049  *
3050  * @dd Pointer to the driver data structure.
3051  *
3052  * return value
3053  *      0       Suspend was successful
3054  *      -EFAULT Suspend was not successful
3055  */
3056 static int mtip_hw_suspend(struct driver_data *dd)
3057 {
3058         /*
3059          * Send standby immediate (E0h) to the drive
3060          * so that it saves its state.
3061          */
3062         if (mtip_standby_drive(dd) != 0) {
3063                 dev_err(&dd->pdev->dev,
3064                         "Failed standby-immediate command\n");
3065                 return -EFAULT;
3066         }
3067
3068         /* Disable interrupts on the HBA.*/
3069         writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3070                         dd->mmio + HOST_CTL);
3071         mtip_deinit_port(dd->port);
3072
3073         return 0;
3074 }
3075
3076 /*
3077  * Resume function
3078  *
3079  * This function is called by the Block Layer as the
3080  * system resumes.
3081  *
3082  * @dd Pointer to the driver data structure.
3083  *
3084  * return value
3085  *      0       Resume was successful
3086  *      -EFAULT Resume was not successful
3087  */
3088 static int mtip_hw_resume(struct driver_data *dd)
3089 {
3090         /* Perform any needed hardware setup steps */
3091         hba_setup(dd);
3092
3093         /* Reset the HBA */
3094         if (mtip_hba_reset(dd) != 0) {
3095                 dev_err(&dd->pdev->dev,
3096                         "Unable to reset the HBA\n");
3097                 return -EFAULT;
3098         }
3099
3100         /*
3101          * Enable the port, DMA engine, and FIS reception specific
3102          * h/w in controller.
3103          */
3104         mtip_init_port(dd->port);
3105         mtip_start_port(dd->port);
3106
3107         /* Enable interrupts on the HBA.*/
3108         writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3109                         dd->mmio + HOST_CTL);
3110
3111         return 0;
3112 }
3113
3114 /*
3115  * Helper function for reusing disk name
3116  * upon hot insertion.
3117  */
3118 static int rssd_disk_name_format(char *prefix,
3119                                  int index,
3120                                  char *buf,
3121                                  int buflen)
3122 {
3123         const int base = 'z' - 'a' + 1;
3124         char *begin = buf + strlen(prefix);
3125         char *end = buf + buflen;
3126         char *p;
3127         int unit;
3128
3129         p = end - 1;
3130         *p = '\0';
3131         unit = base;
3132         do {
3133                 if (p == begin)
3134                         return -EINVAL;
3135                 *--p = 'a' + (index % unit);
3136                 index = (index / unit) - 1;
3137         } while (index >= 0);
3138
3139         memmove(begin, p, end - p);
3140         memcpy(buf, prefix, strlen(prefix));
3141
3142         return 0;
3143 }
3144
3145 /*
3146  * Block layer IOCTL handler.
3147  *
3148  * @dev Pointer to the block_device structure.
3149  * @mode ignored
3150  * @cmd IOCTL command passed from the user application.
3151  * @arg Argument passed from the user application.
3152  *
3153  * return value
3154  *      0        IOCTL completed successfully.
3155  *      -ENOTTY  IOCTL not supported or invalid driver data
3156  *                 structure pointer.
3157  */
3158 static int mtip_block_ioctl(struct block_device *dev,
3159                             fmode_t mode,
3160                             unsigned cmd,
3161                             unsigned long arg)
3162 {
3163         struct driver_data *dd = dev->bd_disk->private_data;
3164
3165         if (!capable(CAP_SYS_ADMIN))
3166                 return -EACCES;
3167
3168         if (!dd)
3169                 return -ENOTTY;
3170
3171         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3172                 return -ENOTTY;
3173
3174         switch (cmd) {
3175         case BLKFLSBUF:
3176                 return -ENOTTY;
3177         default:
3178                 return mtip_hw_ioctl(dd, cmd, arg);
3179         }
3180 }
3181
3182 #ifdef CONFIG_COMPAT
3183 /*
3184  * Block layer compat IOCTL handler.
3185  *
3186  * @dev Pointer to the block_device structure.
3187  * @mode ignored
3188  * @cmd IOCTL command passed from the user application.
3189  * @arg Argument passed from the user application.
3190  *
3191  * return value
3192  *      0        IOCTL completed successfully.
3193  *      -ENOTTY  IOCTL not supported or invalid driver data
3194  *                 structure pointer.
3195  */
3196 static int mtip_block_compat_ioctl(struct block_device *dev,
3197                             fmode_t mode,
3198                             unsigned cmd,
3199                             unsigned long arg)
3200 {
3201         struct driver_data *dd = dev->bd_disk->private_data;
3202
3203         if (!capable(CAP_SYS_ADMIN))
3204                 return -EACCES;
3205
3206         if (!dd)
3207                 return -ENOTTY;
3208
3209         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3210                 return -ENOTTY;
3211
3212         switch (cmd) {
3213         case BLKFLSBUF:
3214                 return -ENOTTY;
3215         case HDIO_DRIVE_TASKFILE: {
3216                 struct mtip_compat_ide_task_request_s __user *compat_req_task;
3217                 ide_task_request_t req_task;
3218                 int compat_tasksize, outtotal, ret;
3219
3220                 compat_tasksize =
3221                         sizeof(struct mtip_compat_ide_task_request_s);
3222
3223                 compat_req_task =
3224                         (struct mtip_compat_ide_task_request_s __user *) arg;
3225
3226                 if (copy_from_user(&req_task, (void __user *) arg,
3227                         compat_tasksize - (2 * sizeof(compat_long_t))))
3228                         return -EFAULT;
3229
3230                 if (get_user(req_task.out_size, &compat_req_task->out_size))
3231                         return -EFAULT;
3232
3233                 if (get_user(req_task.in_size, &compat_req_task->in_size))
3234                         return -EFAULT;
3235
3236                 outtotal = sizeof(struct mtip_compat_ide_task_request_s);
3237
3238                 ret = exec_drive_taskfile(dd, (void __user *) arg,
3239                                                 &req_task, outtotal);
3240
3241                 if (copy_to_user((void __user *) arg, &req_task,
3242                                 compat_tasksize -
3243                                 (2 * sizeof(compat_long_t))))
3244                         return -EFAULT;
3245
3246                 if (put_user(req_task.out_size, &compat_req_task->out_size))
3247                         return -EFAULT;
3248
3249                 if (put_user(req_task.in_size, &compat_req_task->in_size))
3250                         return -EFAULT;
3251
3252                 return ret;
3253         }
3254         default:
3255                 return mtip_hw_ioctl(dd, cmd, arg);
3256         }
3257 }
3258 #endif
3259
3260 /*
3261  * Obtain the geometry of the device.
3262  *
3263  * You may think that this function is obsolete, but some applications,
3264  * fdisk for example still used CHS values. This function describes the
3265  * device as having 224 heads and 56 sectors per cylinder. These values are
3266  * chosen so that each cylinder is aligned on a 4KB boundary. Since a
3267  * partition is described in terms of a start and end cylinder this means
3268  * that each partition is also 4KB aligned. Non-aligned partitions adversely
3269  * affects performance.
3270  *
3271  * @dev Pointer to the block_device strucutre.
3272  * @geo Pointer to a hd_geometry structure.
3273  *
3274  * return value
3275  *      0       Operation completed successfully.
3276  *      -ENOTTY An error occurred while reading the drive capacity.
3277  */
3278 static int mtip_block_getgeo(struct block_device *dev,
3279                                 struct hd_geometry *geo)
3280 {
3281         struct driver_data *dd = dev->bd_disk->private_data;
3282         sector_t capacity;
3283
3284         if (!dd)
3285                 return -ENOTTY;
3286
3287         if (!(mtip_hw_get_capacity(dd, &capacity))) {
3288                 dev_warn(&dd->pdev->dev,
3289                         "Could not get drive capacity.\n");
3290                 return -ENOTTY;
3291         }
3292
3293         geo->heads = 224;
3294         geo->sectors = 56;
3295         sector_div(capacity, (geo->heads * geo->sectors));
3296         geo->cylinders = capacity;
3297         return 0;
3298 }
3299
3300 static int mtip_block_open(struct block_device *dev, fmode_t mode)
3301 {
3302         struct driver_data *dd;
3303
3304         if (dev && dev->bd_disk) {
3305                 dd = (struct driver_data *) dev->bd_disk->private_data;
3306
3307                 if (dd) {
3308                         if (test_bit(MTIP_DDF_REMOVAL_BIT,
3309                                                         &dd->dd_flag)) {
3310                                 return -ENODEV;
3311                         }
3312                         return 0;
3313                 }
3314         }
3315         return -ENODEV;
3316 }
3317
3318 static void mtip_block_release(struct gendisk *disk, fmode_t mode)
3319 {
3320 }
3321
3322 /*
3323  * Block device operation function.
3324  *
3325  * This structure contains pointers to the functions required by the block
3326  * layer.
3327  */
3328 static const struct block_device_operations mtip_block_ops = {
3329         .open           = mtip_block_open,
3330         .release        = mtip_block_release,
3331         .ioctl          = mtip_block_ioctl,
3332 #ifdef CONFIG_COMPAT
3333         .compat_ioctl   = mtip_block_compat_ioctl,
3334 #endif
3335         .getgeo         = mtip_block_getgeo,
3336         .owner          = THIS_MODULE
3337 };
3338
3339 static inline bool is_se_active(struct driver_data *dd)
3340 {
3341         if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) {
3342                 if (dd->port->ic_pause_timer) {
3343                         unsigned long to = dd->port->ic_pause_timer +
3344                                                         msecs_to_jiffies(1000);
3345                         if (time_after(jiffies, to)) {
3346                                 clear_bit(MTIP_PF_SE_ACTIVE_BIT,
3347                                                         &dd->port->flags);
3348                                 clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
3349                                 dd->port->ic_pause_timer = 0;
3350                                 wake_up_interruptible(&dd->port->svc_wait);
3351                                 return false;
3352                         }
3353                 }
3354                 return true;
3355         }
3356         return false;
3357 }
3358
3359 static inline bool is_stopped(struct driver_data *dd, struct request *rq)
3360 {
3361         if (likely(!(dd->dd_flag & MTIP_DDF_STOP_IO)))
3362                 return false;
3363
3364         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
3365                 return true;
3366         if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
3367                 return true;
3368         if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag) &&
3369             rq_data_dir(rq))
3370                 return true;
3371         if (test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
3372                 return true;
3373         if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
3374                 return true;
3375
3376         return false;
3377 }
3378
3379 static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
3380                                   struct request *rq)
3381 {
3382         struct driver_data *dd = hctx->queue->queuedata;
3383         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3384
3385         if (rq_data_dir(rq) == READ || !dd->unal_qdepth)
3386                 return false;
3387
3388         /*
3389          * If unaligned depth must be limited on this controller, mark it
3390          * as unaligned if the IO isn't on a 4k boundary (start of length).
3391          */
3392         if (blk_rq_sectors(rq) <= 64) {
3393                 if ((blk_rq_pos(rq) & 7) || (blk_rq_sectors(rq) & 7))
3394                         cmd->unaligned = 1;
3395         }
3396
3397         if (cmd->unaligned && atomic_dec_if_positive(&dd->port->cmd_slot_unal) >= 0)
3398                 return true;
3399
3400         return false;
3401 }
3402
3403 static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3404                 struct request *rq)
3405 {
3406         struct driver_data *dd = hctx->queue->queuedata;
3407         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3408         struct mtip_int_cmd *icmd = cmd->icmd;
3409         struct mtip_cmd_hdr *hdr =
3410                 dd->port->command_list + sizeof(struct mtip_cmd_hdr) * rq->tag;
3411         struct mtip_cmd_sg *command_sg;
3412
3413         if (mtip_commands_active(dd->port))
3414                 return BLK_STS_DEV_RESOURCE;
3415
3416         hdr->ctba = cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
3417         if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
3418                 hdr->ctbau = cpu_to_le32((cmd->command_dma >> 16) >> 16);
3419         /* Populate the SG list */
3420         hdr->opts = cpu_to_le32(icmd->opts | icmd->fis_len);
3421         if (icmd->buf_len) {
3422                 command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ;
3423
3424                 command_sg->info = cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF);
3425                 command_sg->dba = cpu_to_le32(icmd->buffer & 0xFFFFFFFF);
3426                 command_sg->dba_upper =
3427                         cpu_to_le32((icmd->buffer >> 16) >> 16);
3428
3429                 hdr->opts |= cpu_to_le32((1 << 16));
3430         }
3431
3432         /* Populate the command header */
3433         hdr->byte_count = 0;
3434
3435         blk_mq_start_request(rq);
3436         mtip_issue_non_ncq_command(dd->port, rq->tag);
3437         return 0;
3438 }
3439
3440 static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
3441                          const struct blk_mq_queue_data *bd)
3442 {
3443         struct driver_data *dd = hctx->queue->queuedata;
3444         struct request *rq = bd->rq;
3445         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3446
3447         if (blk_rq_is_passthrough(rq))
3448                 return mtip_issue_reserved_cmd(hctx, rq);
3449
3450         if (unlikely(mtip_check_unal_depth(hctx, rq)))
3451                 return BLK_STS_DEV_RESOURCE;
3452
3453         if (is_se_active(dd) || is_stopped(dd, rq))
3454                 return BLK_STS_IOERR;
3455
3456         blk_mq_start_request(rq);
3457
3458         mtip_hw_submit_io(dd, rq, cmd, hctx);
3459         return BLK_STS_OK;
3460 }
3461
3462 static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
3463                           unsigned int hctx_idx)
3464 {
3465         struct driver_data *dd = set->driver_data;
3466         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3467
3468         if (!cmd->command)
3469                 return;
3470
3471         dma_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, cmd->command,
3472                           cmd->command_dma);
3473 }
3474
3475 static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq,
3476                          unsigned int hctx_idx, unsigned int numa_node)
3477 {
3478         struct driver_data *dd = set->driver_data;
3479         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3480
3481         cmd->command = dma_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
3482                         &cmd->command_dma, GFP_KERNEL);
3483         if (!cmd->command)
3484                 return -ENOMEM;
3485
3486         sg_init_table(cmd->sg, MTIP_MAX_SG);
3487         return 0;
3488 }
3489
3490 static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
3491                                                                 bool reserved)
3492 {
3493         struct driver_data *dd = req->q->queuedata;
3494
3495         if (reserved) {
3496                 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
3497
3498                 cmd->status = BLK_STS_TIMEOUT;
3499                 blk_mq_complete_request(req);
3500                 return BLK_EH_DONE;
3501         }
3502
3503         if (test_bit(req->tag, dd->port->cmds_to_issue))
3504                 goto exit_handler;
3505
3506         if (test_and_set_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags))
3507                 goto exit_handler;
3508
3509         wake_up_interruptible(&dd->port->svc_wait);
3510 exit_handler:
3511         return BLK_EH_RESET_TIMER;
3512 }
3513
3514 static const struct blk_mq_ops mtip_mq_ops = {
3515         .queue_rq       = mtip_queue_rq,
3516         .init_request   = mtip_init_cmd,
3517         .exit_request   = mtip_free_cmd,
3518         .complete       = mtip_softirq_done_fn,
3519         .timeout        = mtip_cmd_timeout,
3520 };
3521
3522 /*
3523  * Block layer initialization function.
3524  *
3525  * This function is called once by the PCI layer for each P320
3526  * device that is connected to the system.
3527  *
3528  * @dd Pointer to the driver data structure.
3529  *
3530  * return value
3531  *      0 on success else an error code.
3532  */
3533 static int mtip_block_initialize(struct driver_data *dd)
3534 {
3535         int rv = 0, wait_for_rebuild = 0;
3536         sector_t capacity;
3537         unsigned int index = 0;
3538
3539         if (dd->disk)
3540                 goto skip_create_disk; /* hw init done, before rebuild */
3541
3542         if (mtip_hw_init(dd)) {
3543                 rv = -EINVAL;
3544                 goto protocol_init_error;
3545         }
3546
3547         memset(&dd->tags, 0, sizeof(dd->tags));
3548         dd->tags.ops = &mtip_mq_ops;
3549         dd->tags.nr_hw_queues = 1;
3550         dd->tags.queue_depth = MTIP_MAX_COMMAND_SLOTS;
3551         dd->tags.reserved_tags = 1;
3552         dd->tags.cmd_size = sizeof(struct mtip_cmd);
3553         dd->tags.numa_node = dd->numa_node;
3554         dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
3555         dd->tags.driver_data = dd;
3556         dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
3557
3558         rv = blk_mq_alloc_tag_set(&dd->tags);
3559         if (rv) {
3560                 dev_err(&dd->pdev->dev,
3561                         "Unable to allocate request queue\n");
3562                 goto block_queue_alloc_tag_error;
3563         }
3564
3565         dd->disk = blk_mq_alloc_disk(&dd->tags, dd);
3566         if (IS_ERR(dd->disk)) {
3567                 dev_err(&dd->pdev->dev,
3568                         "Unable to allocate request queue\n");
3569                 rv = -ENOMEM;
3570                 goto block_queue_alloc_init_error;
3571         }
3572         dd->queue               = dd->disk->queue;
3573
3574         rv = ida_alloc(&rssd_index_ida, GFP_KERNEL);
3575         if (rv < 0)
3576                 goto ida_get_error;
3577         index = rv;
3578
3579         rv = rssd_disk_name_format("rssd",
3580                                 index,
3581                                 dd->disk->disk_name,
3582                                 DISK_NAME_LEN);
3583         if (rv)
3584                 goto disk_index_error;
3585
3586         dd->disk->major         = dd->major;
3587         dd->disk->first_minor   = index * MTIP_MAX_MINORS;
3588         dd->disk->minors        = MTIP_MAX_MINORS;
3589         dd->disk->fops          = &mtip_block_ops;
3590         dd->disk->private_data  = dd;
3591         dd->index               = index;
3592
3593         mtip_hw_debugfs_init(dd);
3594
3595 skip_create_disk:
3596         /* Initialize the protocol layer. */
3597         wait_for_rebuild = mtip_hw_get_identify(dd);
3598         if (wait_for_rebuild < 0) {
3599                 dev_err(&dd->pdev->dev,
3600                         "Protocol layer initialization failed\n");
3601                 rv = -EINVAL;
3602                 goto init_hw_cmds_error;
3603         }
3604
3605         /*
3606          * if rebuild pending, start the service thread, and delay the block
3607          * queue creation and device_add_disk()
3608          */
3609         if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3610                 goto start_service_thread;
3611
3612         /* Set device limits. */
3613         blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
3614         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
3615         blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
3616         blk_queue_physical_block_size(dd->queue, 4096);
3617         blk_queue_max_hw_sectors(dd->queue, 0xffff);
3618         blk_queue_max_segment_size(dd->queue, 0x400000);
3619         dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
3620         blk_queue_io_min(dd->queue, 4096);
3621
3622         /* Set the capacity of the device in 512 byte sectors. */
3623         if (!(mtip_hw_get_capacity(dd, &capacity))) {
3624                 dev_warn(&dd->pdev->dev,
3625                         "Could not read drive capacity\n");
3626                 rv = -EIO;
3627                 goto read_capacity_error;
3628         }
3629         set_capacity(dd->disk, capacity);
3630
3631         /* Enable the block device and add it to /dev */
3632         rv = device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups);
3633         if (rv)
3634                 goto read_capacity_error;
3635
3636         if (dd->mtip_svc_handler) {
3637                 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
3638                 return rv; /* service thread created for handling rebuild */
3639         }
3640
3641 start_service_thread:
3642         dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
3643                                                 dd, dd->numa_node,
3644                                                 "mtip_svc_thd_%02d", index);
3645
3646         if (IS_ERR(dd->mtip_svc_handler)) {
3647                 dev_err(&dd->pdev->dev, "service thread failed to start\n");
3648                 dd->mtip_svc_handler = NULL;
3649                 rv = -EFAULT;
3650                 goto kthread_run_error;
3651         }
3652         wake_up_process(dd->mtip_svc_handler);
3653         if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3654                 rv = wait_for_rebuild;
3655
3656         return rv;
3657
3658 kthread_run_error:
3659         /* Delete our gendisk. This also removes the device from /dev */
3660         del_gendisk(dd->disk);
3661 read_capacity_error:
3662 init_hw_cmds_error:
3663         mtip_hw_debugfs_exit(dd);
3664 disk_index_error:
3665         ida_free(&rssd_index_ida, index);
3666 ida_get_error:
3667         blk_cleanup_disk(dd->disk);
3668 block_queue_alloc_init_error:
3669         blk_mq_free_tag_set(&dd->tags);
3670 block_queue_alloc_tag_error:
3671         mtip_hw_exit(dd); /* De-initialize the protocol layer. */
3672 protocol_init_error:
3673         return rv;
3674 }
3675
3676 static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
3677 {
3678         struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3679
3680         cmd->status = BLK_STS_IOERR;
3681         blk_mq_complete_request(rq);
3682         return true;
3683 }
3684
3685 /*
3686  * Block layer deinitialization function.
3687  *
3688  * Called by the PCI layer as each P320 device is removed.
3689  *
3690  * @dd Pointer to the driver data structure.
3691  *
3692  * return value
3693  *      0
3694  */
3695 static int mtip_block_remove(struct driver_data *dd)
3696 {
3697         mtip_hw_debugfs_exit(dd);
3698
3699         if (dd->mtip_svc_handler) {
3700                 set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
3701                 wake_up_interruptible(&dd->port->svc_wait);
3702                 kthread_stop(dd->mtip_svc_handler);
3703         }
3704
3705         if (!dd->sr) {
3706                 /*
3707                  * Explicitly wait here for IOs to quiesce,
3708                  * as mtip_standby_drive usually won't wait for IOs.
3709                  */
3710                 if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS))
3711                         mtip_standby_drive(dd);
3712         }
3713         else
3714                 dev_info(&dd->pdev->dev, "device %s surprise removal\n",
3715                                                 dd->disk->disk_name);
3716
3717         blk_freeze_queue_start(dd->queue);
3718         blk_mq_quiesce_queue(dd->queue);
3719         blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
3720         blk_mq_unquiesce_queue(dd->queue);
3721
3722         if (dd->disk) {
3723                 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
3724                         del_gendisk(dd->disk);
3725                 if (dd->disk->queue) {
3726                         blk_cleanup_queue(dd->queue);
3727                         blk_mq_free_tag_set(&dd->tags);
3728                         dd->queue = NULL;
3729                 }
3730                 put_disk(dd->disk);
3731         }
3732         dd->disk  = NULL;
3733
3734         ida_free(&rssd_index_ida, dd->index);
3735
3736         /* De-initialize the protocol layer. */
3737         mtip_hw_exit(dd);
3738
3739         return 0;
3740 }
3741
3742 /*
3743  * Function called by the PCI layer when just before the
3744  * machine shuts down.
3745  *
3746  * If a protocol layer shutdown function is present it will be called
3747  * by this function.
3748  *
3749  * @dd Pointer to the driver data structure.
3750  *
3751  * return value
3752  *      0
3753  */
3754 static int mtip_block_shutdown(struct driver_data *dd)
3755 {
3756         mtip_hw_shutdown(dd);
3757
3758         /* Delete our gendisk structure, and cleanup the blk queue. */
3759         if (dd->disk) {
3760                 dev_info(&dd->pdev->dev,
3761                         "Shutting down %s ...\n", dd->disk->disk_name);
3762
3763                 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
3764                         del_gendisk(dd->disk);
3765                 if (dd->disk->queue) {
3766                         blk_cleanup_queue(dd->queue);
3767                         blk_mq_free_tag_set(&dd->tags);
3768                 }
3769                 put_disk(dd->disk);
3770                 dd->disk  = NULL;
3771                 dd->queue = NULL;
3772         }
3773
3774         ida_free(&rssd_index_ida, dd->index);
3775         return 0;
3776 }
3777
3778 static int mtip_block_suspend(struct driver_data *dd)
3779 {
3780         dev_info(&dd->pdev->dev,
3781                 "Suspending %s ...\n", dd->disk->disk_name);
3782         mtip_hw_suspend(dd);
3783         return 0;
3784 }
3785
3786 static int mtip_block_resume(struct driver_data *dd)
3787 {
3788         dev_info(&dd->pdev->dev, "Resuming %s ...\n",
3789                 dd->disk->disk_name);
3790         mtip_hw_resume(dd);
3791         return 0;
3792 }
3793
3794 static void drop_cpu(int cpu)
3795 {
3796         cpu_use[cpu]--;
3797 }
3798
3799 static int get_least_used_cpu_on_node(int node)
3800 {
3801         int cpu, least_used_cpu, least_cnt;
3802         const struct cpumask *node_mask;
3803
3804         node_mask = cpumask_of_node(node);
3805         least_used_cpu = cpumask_first(node_mask);
3806         least_cnt = cpu_use[least_used_cpu];
3807         cpu = least_used_cpu;
3808
3809         for_each_cpu(cpu, node_mask) {
3810                 if (cpu_use[cpu] < least_cnt) {
3811                         least_used_cpu = cpu;
3812                         least_cnt = cpu_use[cpu];
3813                 }
3814         }
3815         cpu_use[least_used_cpu]++;
3816         return least_used_cpu;
3817 }
3818
3819 /* Helper for selecting a node in round robin mode */
3820 static inline int mtip_get_next_rr_node(void)
3821 {
3822         static int next_node = NUMA_NO_NODE;
3823
3824         if (next_node == NUMA_NO_NODE) {
3825                 next_node = first_online_node;
3826                 return next_node;
3827         }
3828
3829         next_node = next_online_node(next_node);
3830         if (next_node == MAX_NUMNODES)
3831                 next_node = first_online_node;
3832         return next_node;
3833 }
3834
3835 static DEFINE_HANDLER(0);
3836 static DEFINE_HANDLER(1);
3837 static DEFINE_HANDLER(2);
3838 static DEFINE_HANDLER(3);
3839 static DEFINE_HANDLER(4);
3840 static DEFINE_HANDLER(5);
3841 static DEFINE_HANDLER(6);
3842 static DEFINE_HANDLER(7);
3843
3844 static void mtip_disable_link_opts(struct driver_data *dd, struct pci_dev *pdev)
3845 {
3846         unsigned short pcie_dev_ctrl;
3847
3848         if (pci_is_pcie(pdev)) {
3849                 pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &pcie_dev_ctrl);
3850                 if (pcie_dev_ctrl & PCI_EXP_DEVCTL_NOSNOOP_EN ||
3851                     pcie_dev_ctrl & PCI_EXP_DEVCTL_RELAX_EN) {
3852                         dev_info(&dd->pdev->dev,
3853                                 "Disabling ERO/No-Snoop on bridge device %04x:%04x\n",
3854                                         pdev->vendor, pdev->device);
3855                         pcie_dev_ctrl &= ~(PCI_EXP_DEVCTL_NOSNOOP_EN |
3856                                                 PCI_EXP_DEVCTL_RELAX_EN);
3857                         pcie_capability_write_word(pdev, PCI_EXP_DEVCTL,
3858                                 pcie_dev_ctrl);
3859                 }
3860         }
3861 }
3862
3863 static void mtip_fix_ero_nosnoop(struct driver_data *dd, struct pci_dev *pdev)
3864 {
3865         /*
3866          * This workaround is specific to AMD/ATI chipset with a PCI upstream
3867          * device with device id 0x5aXX
3868          */
3869         if (pdev->bus && pdev->bus->self) {
3870                 if (pdev->bus->self->vendor == PCI_VENDOR_ID_ATI &&
3871                     ((pdev->bus->self->device & 0xff00) == 0x5a00)) {
3872                         mtip_disable_link_opts(dd, pdev->bus->self);
3873                 } else {
3874                         /* Check further up the topology */
3875                         struct pci_dev *parent_dev = pdev->bus->self;
3876                         if (parent_dev->bus &&
3877                                 parent_dev->bus->parent &&
3878                                 parent_dev->bus->parent->self &&
3879                                 parent_dev->bus->parent->self->vendor ==
3880                                          PCI_VENDOR_ID_ATI &&
3881                                 (parent_dev->bus->parent->self->device &
3882                                         0xff00) == 0x5a00) {
3883                                 mtip_disable_link_opts(dd,
3884                                         parent_dev->bus->parent->self);
3885                         }
3886                 }
3887         }
3888 }
3889
3890 /*
3891  * Called for each supported PCI device detected.
3892  *
3893  * This function allocates the private data structure, enables the
3894  * PCI device and then calls the block layer initialization function.
3895  *
3896  * return value
3897  *      0 on success else an error code.
3898  */
3899 static int mtip_pci_probe(struct pci_dev *pdev,
3900                         const struct pci_device_id *ent)
3901 {
3902         int rv = 0;
3903         struct driver_data *dd = NULL;
3904         char cpu_list[256];
3905         const struct cpumask *node_mask;
3906         int cpu, i = 0, j = 0;
3907         int my_node = NUMA_NO_NODE;
3908         unsigned long flags;
3909
3910         /* Allocate memory for this devices private data. */
3911         my_node = pcibus_to_node(pdev->bus);
3912         if (my_node != NUMA_NO_NODE) {
3913                 if (!node_online(my_node))
3914                         my_node = mtip_get_next_rr_node();
3915         } else {
3916                 dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
3917                 my_node = mtip_get_next_rr_node();
3918         }
3919         dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
3920                 my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
3921                 cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id());
3922
3923         dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
3924         if (!dd)
3925                 return -ENOMEM;
3926
3927         /* Attach the private data to this PCI device.  */
3928         pci_set_drvdata(pdev, dd);
3929
3930         rv = pcim_enable_device(pdev);
3931         if (rv < 0) {
3932                 dev_err(&pdev->dev, "Unable to enable device\n");
3933                 goto iomap_err;
3934         }
3935
3936         /* Map BAR5 to memory. */
3937         rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
3938         if (rv < 0) {
3939                 dev_err(&pdev->dev, "Unable to map regions\n");
3940                 goto iomap_err;
3941         }
3942
3943         rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
3944         if (rv) {
3945                 dev_warn(&pdev->dev, "64-bit DMA enable failed\n");
3946                 goto setmask_err;
3947         }
3948
3949         /* Copy the info we may need later into the private data structure. */
3950         dd->major       = mtip_major;
3951         dd->instance    = instance;
3952         dd->pdev        = pdev;
3953         dd->numa_node   = my_node;
3954
3955         INIT_LIST_HEAD(&dd->online_list);
3956         INIT_LIST_HEAD(&dd->remove_list);
3957
3958         memset(dd->workq_name, 0, 32);
3959         snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
3960
3961         dd->isr_workq = create_workqueue(dd->workq_name);
3962         if (!dd->isr_workq) {
3963                 dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
3964                 rv = -ENOMEM;
3965                 goto setmask_err;
3966         }
3967
3968         memset(cpu_list, 0, sizeof(cpu_list));
3969
3970         node_mask = cpumask_of_node(dd->numa_node);
3971         if (!cpumask_empty(node_mask)) {
3972                 for_each_cpu(cpu, node_mask)
3973                 {
3974                         snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
3975                         j = strlen(cpu_list);
3976                 }
3977
3978                 dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
3979                         dd->numa_node,
3980                         topology_physical_package_id(cpumask_first(node_mask)),
3981                         nr_cpus_node(dd->numa_node),
3982                         cpu_list);
3983         } else
3984                 dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
3985
3986         dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
3987         dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
3988                 cpu_to_node(dd->isr_binding), dd->isr_binding);
3989
3990         /* first worker context always runs in ISR */
3991         dd->work[0].cpu_binding = dd->isr_binding;
3992         dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
3993         dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
3994         dd->work[3].cpu_binding = dd->work[0].cpu_binding;
3995         dd->work[4].cpu_binding = dd->work[1].cpu_binding;
3996         dd->work[5].cpu_binding = dd->work[2].cpu_binding;
3997         dd->work[6].cpu_binding = dd->work[2].cpu_binding;
3998         dd->work[7].cpu_binding = dd->work[1].cpu_binding;
3999
4000         /* Log the bindings */
4001         for_each_present_cpu(cpu) {
4002                 memset(cpu_list, 0, sizeof(cpu_list));
4003                 for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
4004                         if (dd->work[i].cpu_binding == cpu) {
4005                                 snprintf(&cpu_list[j], 256 - j, "%d ", i);
4006                                 j = strlen(cpu_list);
4007                         }
4008                 }
4009                 if (j)
4010                         dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
4011         }
4012
4013         INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
4014         INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
4015         INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
4016         INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
4017         INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
4018         INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
4019         INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
4020         INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
4021
4022         pci_set_master(pdev);
4023         rv = pci_enable_msi(pdev);
4024         if (rv) {
4025                 dev_warn(&pdev->dev,
4026                         "Unable to enable MSI interrupt.\n");
4027                 goto msi_initialize_err;
4028         }
4029
4030         mtip_fix_ero_nosnoop(dd, pdev);
4031
4032         /* Initialize the block layer. */
4033         rv = mtip_block_initialize(dd);
4034         if (rv < 0) {
4035                 dev_err(&pdev->dev,
4036                         "Unable to initialize block layer\n");
4037                 goto block_initialize_err;
4038         }
4039
4040         /*
4041          * Increment the instance count so that each device has a unique
4042          * instance number.
4043          */
4044         instance++;
4045         if (rv != MTIP_FTL_REBUILD_MAGIC)
4046                 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
4047         else
4048                 rv = 0; /* device in rebuild state, return 0 from probe */
4049
4050         /* Add to online list even if in ftl rebuild */
4051         spin_lock_irqsave(&dev_lock, flags);
4052         list_add(&dd->online_list, &online_list);
4053         spin_unlock_irqrestore(&dev_lock, flags);
4054
4055         goto done;
4056
4057 block_initialize_err:
4058         pci_disable_msi(pdev);
4059
4060 msi_initialize_err:
4061         if (dd->isr_workq) {
4062                 destroy_workqueue(dd->isr_workq);
4063                 drop_cpu(dd->work[0].cpu_binding);
4064                 drop_cpu(dd->work[1].cpu_binding);
4065                 drop_cpu(dd->work[2].cpu_binding);
4066         }
4067 setmask_err:
4068         pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4069
4070 iomap_err:
4071         kfree(dd);
4072         pci_set_drvdata(pdev, NULL);
4073         return rv;
4074 done:
4075         return rv;
4076 }
4077
4078 /*
4079  * Called for each probed device when the device is removed or the
4080  * driver is unloaded.
4081  *
4082  * return value
4083  *      None
4084  */
4085 static void mtip_pci_remove(struct pci_dev *pdev)
4086 {
4087         struct driver_data *dd = pci_get_drvdata(pdev);
4088         unsigned long flags, to;
4089
4090         set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag);
4091
4092         spin_lock_irqsave(&dev_lock, flags);
4093         list_del_init(&dd->online_list);
4094         list_add(&dd->remove_list, &removing_list);
4095         spin_unlock_irqrestore(&dev_lock, flags);
4096
4097         mtip_check_surprise_removal(dd);
4098         synchronize_irq(dd->pdev->irq);
4099
4100         /* Spin until workers are done */
4101         to = jiffies + msecs_to_jiffies(4000);
4102         do {
4103                 msleep(20);
4104         } while (atomic_read(&dd->irq_workers_active) != 0 &&
4105                 time_before(jiffies, to));
4106
4107         if (atomic_read(&dd->irq_workers_active) != 0) {
4108                 dev_warn(&dd->pdev->dev,
4109                         "Completion workers still active!\n");
4110         }
4111
4112         blk_mark_disk_dead(dd->disk);
4113         set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
4114
4115         /* Clean up the block layer. */
4116         mtip_block_remove(dd);
4117
4118         if (dd->isr_workq) {
4119                 destroy_workqueue(dd->isr_workq);
4120                 drop_cpu(dd->work[0].cpu_binding);
4121                 drop_cpu(dd->work[1].cpu_binding);
4122                 drop_cpu(dd->work[2].cpu_binding);
4123         }
4124
4125         pci_disable_msi(pdev);
4126
4127         spin_lock_irqsave(&dev_lock, flags);
4128         list_del_init(&dd->remove_list);
4129         spin_unlock_irqrestore(&dev_lock, flags);
4130
4131         kfree(dd);
4132
4133         pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4134         pci_set_drvdata(pdev, NULL);
4135 }
4136
4137 /*
4138  * Called for each probed device when the device is suspended.
4139  *
4140  * return value
4141  *      0  Success
4142  *      <0 Error
4143  */
4144 static int __maybe_unused mtip_pci_suspend(struct device *dev)
4145 {
4146         int rv = 0;
4147         struct driver_data *dd = dev_get_drvdata(dev);
4148
4149         set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4150
4151         /* Disable ports & interrupts then send standby immediate */
4152         rv = mtip_block_suspend(dd);
4153         if (rv < 0)
4154                 dev_err(dev, "Failed to suspend controller\n");
4155
4156         return rv;
4157 }
4158
4159 /*
4160  * Called for each probed device when the device is resumed.
4161  *
4162  * return value
4163  *      0  Success
4164  *      <0 Error
4165  */
4166 static int __maybe_unused mtip_pci_resume(struct device *dev)
4167 {
4168         int rv = 0;
4169         struct driver_data *dd = dev_get_drvdata(dev);
4170
4171         /*
4172          * Calls hbaReset, initPort, & startPort function
4173          * then enables interrupts
4174          */
4175         rv = mtip_block_resume(dd);
4176         if (rv < 0)
4177                 dev_err(dev, "Unable to resume\n");
4178
4179         clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4180
4181         return rv;
4182 }
4183
4184 /*
4185  * Shutdown routine
4186  *
4187  * return value
4188  *      None
4189  */
4190 static void mtip_pci_shutdown(struct pci_dev *pdev)
4191 {
4192         struct driver_data *dd = pci_get_drvdata(pdev);
4193         if (dd)
4194                 mtip_block_shutdown(dd);
4195 }
4196
4197 /* Table of device ids supported by this driver. */
4198 static const struct pci_device_id mtip_pci_tbl[] = {
4199         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) },
4200         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) },
4201         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) },
4202         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) },
4203         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) },
4204         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) },
4205         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) },
4206         { 0 }
4207 };
4208
4209 static SIMPLE_DEV_PM_OPS(mtip_pci_pm_ops, mtip_pci_suspend, mtip_pci_resume);
4210
4211 /* Structure that describes the PCI driver functions. */
4212 static struct pci_driver mtip_pci_driver = {
4213         .name                   = MTIP_DRV_NAME,
4214         .id_table               = mtip_pci_tbl,
4215         .probe                  = mtip_pci_probe,
4216         .remove                 = mtip_pci_remove,
4217         .driver.pm              = &mtip_pci_pm_ops,
4218         .shutdown               = mtip_pci_shutdown,
4219 };
4220
4221 MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
4222
4223 /*
4224  * Module initialization function.
4225  *
4226  * Called once when the module is loaded. This function allocates a major
4227  * block device number to the Cyclone devices and registers the PCI layer
4228  * of the driver.
4229  *
4230  * Return value
4231  *      0 on success else error code.
4232  */
4233 static int __init mtip_init(void)
4234 {
4235         int error;
4236
4237         pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
4238
4239         /* Allocate a major block device number to use with this driver. */
4240         error = register_blkdev(0, MTIP_DRV_NAME);
4241         if (error <= 0) {
4242                 pr_err("Unable to register block device (%d)\n",
4243                 error);
4244                 return -EBUSY;
4245         }
4246         mtip_major = error;
4247
4248         dfs_parent = debugfs_create_dir("rssd", NULL);
4249         if (IS_ERR_OR_NULL(dfs_parent)) {
4250                 pr_warn("Error creating debugfs parent\n");
4251                 dfs_parent = NULL;
4252         }
4253         if (dfs_parent) {
4254                 dfs_device_status = debugfs_create_file("device_status",
4255                                         0444, dfs_parent, NULL,
4256                                         &mtip_device_status_fops);
4257                 if (IS_ERR_OR_NULL(dfs_device_status)) {
4258                         pr_err("Error creating device_status node\n");
4259                         dfs_device_status = NULL;
4260                 }
4261         }
4262
4263         /* Register our PCI operations. */
4264         error = pci_register_driver(&mtip_pci_driver);
4265         if (error) {
4266                 debugfs_remove(dfs_parent);
4267                 unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4268         }
4269
4270         return error;
4271 }
4272
4273 /*
4274  * Module de-initialization function.
4275  *
4276  * Called once when the module is unloaded. This function deallocates
4277  * the major block device number allocated by mtip_init() and
4278  * unregisters the PCI layer of the driver.
4279  *
4280  * Return value
4281  *      none
4282  */
4283 static void __exit mtip_exit(void)
4284 {
4285         /* Release the allocated major block device number. */
4286         unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4287
4288         /* Unregister the PCI driver. */
4289         pci_unregister_driver(&mtip_pci_driver);
4290
4291         debugfs_remove_recursive(dfs_parent);
4292 }
4293
4294 MODULE_AUTHOR("Micron Technology, Inc");
4295 MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
4296 MODULE_LICENSE("GPL");
4297 MODULE_VERSION(MTIP_DRV_VERSION);
4298
4299 module_init(mtip_init);
4300 module_exit(mtip_exit);