GNU Linux-libre 4.9.333-gnu1
[releases.git] / drivers / net / wireless / intel / iwlwifi / pcie / tx.c
1 /******************************************************************************
2  *
3  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
4  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
5  * Copyright(c) 2016 Intel Deutschland GmbH
6  *
7  * Portions of this file are derived from the ipw3945 project, as well
8  * as portions of the ieee80211 subsystem header files.
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
17  * more details.
18  *
19  * You should have received a copy of the GNU General Public License along with
20  * this program; if not, write to the Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
22  *
23  * The full GNU General Public License is included in this distribution in the
24  * file called LICENSE.
25  *
26  * Contact Information:
27  *  Intel Linux Wireless <linuxwifi@intel.com>
28  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
29  *
30  *****************************************************************************/
31 #include <linux/etherdevice.h>
32 #include <linux/ieee80211.h>
33 #include <linux/slab.h>
34 #include <linux/sched.h>
35 #include <linux/pm_runtime.h>
36 #include <net/ip6_checksum.h>
37 #include <net/tso.h>
38
39 #include "iwl-debug.h"
40 #include "iwl-csr.h"
41 #include "iwl-prph.h"
42 #include "iwl-io.h"
43 #include "iwl-scd.h"
44 #include "iwl-op-mode.h"
45 #include "internal.h"
46 /* FIXME: need to abstract out TX command (once we know what it looks like) */
47 #include "dvm/commands.h"
48
49 #define IWL_TX_CRC_SIZE 4
50 #define IWL_TX_DELIMITER_SIZE 4
51
52 /*************** DMA-QUEUE-GENERAL-FUNCTIONS  *****
53  * DMA services
54  *
55  * Theory of operation
56  *
57  * A Tx or Rx queue resides in host DRAM, and is comprised of a circular buffer
58  * of buffer descriptors, each of which points to one or more data buffers for
59  * the device to read from or fill.  Driver and device exchange status of each
60  * queue via "read" and "write" pointers.  Driver keeps minimum of 2 empty
61  * entries in each circular buffer, to protect against confusing empty and full
62  * queue states.
63  *
64  * The device reads or writes the data in the queues via the device's several
65  * DMA/FIFO channels.  Each queue is mapped to a single DMA channel.
66  *
67  * For Tx queue, there are low mark and high mark limits. If, after queuing
68  * the packet for Tx, free space become < low mark, Tx queue stopped. When
69  * reclaiming packets (on 'tx done IRQ), if free space become > high mark,
70  * Tx queue resumed.
71  *
72  ***************************************************/
73
74 static int iwl_queue_space(const struct iwl_txq *q)
75 {
76         unsigned int max;
77         unsigned int used;
78
79         /*
80          * To avoid ambiguity between empty and completely full queues, there
81          * should always be less than TFD_QUEUE_SIZE_MAX elements in the queue.
82          * If q->n_window is smaller than TFD_QUEUE_SIZE_MAX, there is no need
83          * to reserve any queue entries for this purpose.
84          */
85         if (q->n_window < TFD_QUEUE_SIZE_MAX)
86                 max = q->n_window;
87         else
88                 max = TFD_QUEUE_SIZE_MAX - 1;
89
90         /*
91          * TFD_QUEUE_SIZE_MAX is a power of 2, so the following is equivalent to
92          * modulo by TFD_QUEUE_SIZE_MAX and is well defined.
93          */
94         used = (q->write_ptr - q->read_ptr) & (TFD_QUEUE_SIZE_MAX - 1);
95
96         if (WARN_ON(used > max))
97                 return 0;
98
99         return max - used;
100 }
101
102 /*
103  * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
104  */
105 static int iwl_queue_init(struct iwl_txq *q, int slots_num, u32 id)
106 {
107         q->n_window = slots_num;
108         q->id = id;
109
110         /* slots_num must be power-of-two size, otherwise
111          * get_cmd_index is broken. */
112         if (WARN_ON(!is_power_of_2(slots_num)))
113                 return -EINVAL;
114
115         q->low_mark = q->n_window / 4;
116         if (q->low_mark < 4)
117                 q->low_mark = 4;
118
119         q->high_mark = q->n_window / 8;
120         if (q->high_mark < 2)
121                 q->high_mark = 2;
122
123         q->write_ptr = 0;
124         q->read_ptr = 0;
125
126         return 0;
127 }
128
129 static int iwl_pcie_alloc_dma_ptr(struct iwl_trans *trans,
130                                   struct iwl_dma_ptr *ptr, size_t size)
131 {
132         if (WARN_ON(ptr->addr))
133                 return -EINVAL;
134
135         ptr->addr = dma_alloc_coherent(trans->dev, size,
136                                        &ptr->dma, GFP_KERNEL);
137         if (!ptr->addr)
138                 return -ENOMEM;
139         ptr->size = size;
140         return 0;
141 }
142
143 static void iwl_pcie_free_dma_ptr(struct iwl_trans *trans,
144                                   struct iwl_dma_ptr *ptr)
145 {
146         if (unlikely(!ptr->addr))
147                 return;
148
149         dma_free_coherent(trans->dev, ptr->size, ptr->addr, ptr->dma);
150         memset(ptr, 0, sizeof(*ptr));
151 }
152
153 static void iwl_pcie_txq_stuck_timer(unsigned long data)
154 {
155         struct iwl_txq *txq = (void *)data;
156         struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
157         struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
158
159         spin_lock(&txq->lock);
160         /* check if triggered erroneously */
161         if (txq->read_ptr == txq->write_ptr) {
162                 spin_unlock(&txq->lock);
163                 return;
164         }
165         spin_unlock(&txq->lock);
166
167         IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->id,
168                 jiffies_to_msecs(txq->wd_timeout));
169
170         iwl_trans_pcie_log_scd_error(trans, txq);
171
172         iwl_force_nmi(trans);
173 }
174
175 /*
176  * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array
177  */
178 static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
179                                              struct iwl_txq *txq, u16 byte_cnt,
180                                              int num_tbs)
181 {
182         struct iwlagn_scd_bc_tbl *scd_bc_tbl;
183         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
184         int write_ptr = txq->write_ptr;
185         int txq_id = txq->id;
186         u8 sec_ctl = 0;
187         u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
188         __le16 bc_ent;
189         struct iwl_tx_cmd *tx_cmd =
190                 (void *)txq->entries[txq->write_ptr].cmd->payload;
191
192         scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
193
194         sec_ctl = tx_cmd->sec_ctl;
195
196         switch (sec_ctl & TX_CMD_SEC_MSK) {
197         case TX_CMD_SEC_CCM:
198                 len += IEEE80211_CCMP_MIC_LEN;
199                 break;
200         case TX_CMD_SEC_TKIP:
201                 len += IEEE80211_TKIP_ICV_LEN;
202                 break;
203         case TX_CMD_SEC_WEP:
204                 len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN;
205                 break;
206         }
207         if (trans_pcie->bc_table_dword)
208                 len = DIV_ROUND_UP(len, 4);
209
210         if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
211                 return;
212
213         if (trans->cfg->use_tfh) {
214                 u8 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
215                                      num_tbs * sizeof(struct iwl_tfh_tb);
216                 /*
217                  * filled_tfd_size contains the number of filled bytes in the
218                  * TFD.
219                  * Dividing it by 64 will give the number of chunks to fetch
220                  * to SRAM- 0 for one chunk, 1 for 2 and so on.
221                  * If, for example, TFD contains only 3 TBs then 32 bytes
222                  * of the TFD are used, and only one chunk of 64 bytes should
223                  * be fetched
224                  */
225                 u8 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
226
227                 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
228         } else {
229                 u8 sta_id = tx_cmd->sta_id;
230
231                 bc_ent = cpu_to_le16(len | (sta_id << 12));
232         }
233
234         scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent;
235
236         if (write_ptr < TFD_QUEUE_SIZE_BC_DUP)
237                 scd_bc_tbl[txq_id].
238                         tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent;
239 }
240
241 static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
242                                             struct iwl_txq *txq)
243 {
244         struct iwl_trans_pcie *trans_pcie =
245                 IWL_TRANS_GET_PCIE_TRANS(trans);
246         struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
247         int txq_id = txq->id;
248         int read_ptr = txq->read_ptr;
249         u8 sta_id = 0;
250         __le16 bc_ent;
251         struct iwl_tx_cmd *tx_cmd =
252                 (void *)txq->entries[read_ptr].cmd->payload;
253
254         WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
255
256         if (txq_id != trans_pcie->cmd_queue)
257                 sta_id = tx_cmd->sta_id;
258
259         bc_ent = cpu_to_le16(1 | (sta_id << 12));
260
261         scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
262
263         if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
264                 scd_bc_tbl[txq_id].
265                         tfd_offset[TFD_QUEUE_SIZE_MAX + read_ptr] = bc_ent;
266 }
267
268 /*
269  * iwl_pcie_txq_inc_wr_ptr - Send new write index to hardware
270  */
271 static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
272                                     struct iwl_txq *txq)
273 {
274         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
275         u32 reg = 0;
276         int txq_id = txq->id;
277
278         lockdep_assert_held(&txq->lock);
279
280         /*
281          * explicitly wake up the NIC if:
282          * 1. shadow registers aren't enabled
283          * 2. NIC is woken up for CMD regardless of shadow outside this function
284          * 3. there is a chance that the NIC is asleep
285          */
286         if (!trans->cfg->base_params->shadow_reg_enable &&
287             txq_id != trans_pcie->cmd_queue &&
288             test_bit(STATUS_TPOWER_PMI, &trans->status)) {
289                 /*
290                  * wake up nic if it's powered down ...
291                  * uCode will wake up, and interrupt us again, so next
292                  * time we'll skip this part.
293                  */
294                 reg = iwl_read32(trans, CSR_UCODE_DRV_GP1);
295
296                 if (reg & CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP) {
297                         IWL_DEBUG_INFO(trans, "Tx queue %d requesting wakeup, GP1 = 0x%x\n",
298                                        txq_id, reg);
299                         iwl_set_bit(trans, CSR_GP_CNTRL,
300                                     CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
301                         txq->need_update = true;
302                         return;
303                 }
304         }
305
306         /*
307          * if not in power-save mode, uCode will never sleep when we're
308          * trying to tx (during RFKILL, we're not trying to tx).
309          */
310         IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
311         if (!txq->block)
312                 iwl_write32(trans, HBUS_TARG_WRPTR,
313                             txq->write_ptr | (txq_id << 8));
314 }
315
316 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
317 {
318         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
319         int i;
320
321         for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
322                 struct iwl_txq *txq = &trans_pcie->txq[i];
323
324                 spin_lock_bh(&txq->lock);
325                 if (trans_pcie->txq[i].need_update) {
326                         iwl_pcie_txq_inc_wr_ptr(trans, txq);
327                         trans_pcie->txq[i].need_update = false;
328                 }
329                 spin_unlock_bh(&txq->lock);
330         }
331 }
332
333 static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
334                                      struct iwl_txq *txq, int idx)
335 {
336         return txq->tfds + trans_pcie->tfd_size * idx;
337 }
338
339 static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans,
340                                                   void *_tfd, u8 idx)
341 {
342
343         if (trans->cfg->use_tfh) {
344                 struct iwl_tfh_tfd *tfd = _tfd;
345                 struct iwl_tfh_tb *tb = &tfd->tbs[idx];
346
347                 return (dma_addr_t)(le64_to_cpu(tb->addr));
348         } else {
349                 struct iwl_tfd *tfd = _tfd;
350                 struct iwl_tfd_tb *tb = &tfd->tbs[idx];
351                 dma_addr_t addr = get_unaligned_le32(&tb->lo);
352                 dma_addr_t hi_len;
353
354                 if (sizeof(dma_addr_t) <= sizeof(u32))
355                         return addr;
356
357                 hi_len = le16_to_cpu(tb->hi_n_len) & 0xF;
358
359                 /*
360                  * shift by 16 twice to avoid warnings on 32-bit
361                  * (where this code never runs anyway due to the
362                  * if statement above)
363                  */
364                 return addr | ((hi_len << 16) << 16);
365         }
366 }
367
368 static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd,
369                                        u8 idx, dma_addr_t addr, u16 len)
370 {
371         if (trans->cfg->use_tfh) {
372                 struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
373                 struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx];
374
375                 put_unaligned_le64(addr, &tb->addr);
376                 tb->tb_len = cpu_to_le16(len);
377
378                 tfd_fh->num_tbs = cpu_to_le16(idx + 1);
379         } else {
380                 struct iwl_tfd *tfd_fh = (void *)tfd;
381                 struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx];
382
383                 u16 hi_n_len = len << 4;
384
385                 put_unaligned_le32(addr, &tb->lo);
386                 if (sizeof(dma_addr_t) > sizeof(u32))
387                         hi_n_len |= ((addr >> 16) >> 16) & 0xF;
388
389                 tb->hi_n_len = cpu_to_le16(hi_n_len);
390
391                 tfd_fh->num_tbs = idx + 1;
392         }
393 }
394
395 static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd)
396 {
397         if (trans->cfg->use_tfh) {
398                 struct iwl_tfh_tfd *tfd = _tfd;
399
400                 return le16_to_cpu(tfd->num_tbs) & 0x1f;
401         } else {
402                 struct iwl_tfd *tfd = _tfd;
403
404                 return tfd->num_tbs & 0x1f;
405         }
406 }
407
408 static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
409                                struct iwl_cmd_meta *meta,
410                                struct iwl_txq *txq, int index)
411 {
412         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
413         int i, num_tbs;
414         void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
415
416         /* Sanity check on number of chunks */
417         num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
418
419         if (num_tbs >= trans_pcie->max_tbs) {
420                 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
421                 /* @todo issue fatal error, it is quite serious situation */
422                 return;
423         }
424
425         /* first TB is never freed - it's the bidirectional DMA data */
426
427         for (i = 1; i < num_tbs; i++) {
428                 if (meta->tbs & BIT(i))
429                         dma_unmap_page(trans->dev,
430                                        iwl_pcie_tfd_tb_get_addr(trans, tfd, i),
431                                        iwl_pcie_tfd_tb_get_len(trans, tfd, i),
432                                        DMA_TO_DEVICE);
433                 else
434                         dma_unmap_single(trans->dev,
435                                          iwl_pcie_tfd_tb_get_addr(trans, tfd,
436                                                                   i),
437                                          iwl_pcie_tfd_tb_get_len(trans, tfd,
438                                                                  i),
439                                          DMA_TO_DEVICE);
440         }
441
442         meta->tbs = 0;
443
444         if (trans->cfg->use_tfh) {
445                 struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
446
447                 tfd_fh->num_tbs = 0;
448         } else {
449                 struct iwl_tfd *tfd_fh = (void *)tfd;
450
451                 tfd_fh->num_tbs = 0;
452         }
453
454 }
455
456 /*
457  * iwl_pcie_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr]
458  * @trans - transport private data
459  * @txq - tx queue
460  * @dma_dir - the direction of the DMA mapping
461  *
462  * Does NOT advance any TFD circular buffer read/write indexes
463  * Does NOT free the TFD itself (which is within circular buffer)
464  */
465 static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
466 {
467         /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
468          * idx is bounded by n_window
469          */
470         int rd_ptr = txq->read_ptr;
471         int idx = get_cmd_index(txq, rd_ptr);
472
473         lockdep_assert_held(&txq->lock);
474
475         /* We have only q->n_window txq->entries, but we use
476          * TFD_QUEUE_SIZE_MAX tfds
477          */
478         iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr);
479
480         /* free SKB */
481         if (txq->entries) {
482                 struct sk_buff *skb;
483
484                 skb = txq->entries[idx].skb;
485
486                 /* Can be called from irqs-disabled context
487                  * If skb is not NULL, it means that the whole queue is being
488                  * freed and that the queue is not empty - free the skb
489                  */
490                 if (skb) {
491                         iwl_op_mode_free_skb(trans->op_mode, skb);
492                         txq->entries[idx].skb = NULL;
493                 }
494         }
495 }
496
497 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
498                                   dma_addr_t addr, u16 len, bool reset)
499 {
500         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
501         void *tfd;
502         u32 num_tbs;
503
504         tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr;
505
506         if (reset)
507                 memset(tfd, 0, trans_pcie->tfd_size);
508
509         num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
510
511         /* Each TFD can point to a maximum max_tbs Tx buffers */
512         if (num_tbs >= trans_pcie->max_tbs) {
513                 IWL_ERR(trans, "Error can not send more than %d chunks\n",
514                         trans_pcie->max_tbs);
515                 return -EINVAL;
516         }
517
518         if (WARN(addr & ~IWL_TX_DMA_MASK,
519                  "Unaligned address = %llx\n", (unsigned long long)addr))
520                 return -EINVAL;
521
522         iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len);
523
524         return num_tbs;
525 }
526
527 static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
528                                struct iwl_txq *txq, int slots_num,
529                                u32 txq_id)
530 {
531         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
532         size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX;
533         size_t tb0_buf_sz;
534         int i;
535
536         if (WARN_ON(txq->entries || txq->tfds))
537                 return -EINVAL;
538
539         setup_timer(&txq->stuck_timer, iwl_pcie_txq_stuck_timer,
540                     (unsigned long)txq);
541         txq->trans_pcie = trans_pcie;
542
543         txq->n_window = slots_num;
544
545         txq->entries = kcalloc(slots_num,
546                                sizeof(struct iwl_pcie_txq_entry),
547                                GFP_KERNEL);
548
549         if (!txq->entries)
550                 goto error;
551
552         if (txq_id == trans_pcie->cmd_queue)
553                 for (i = 0; i < slots_num; i++) {
554                         txq->entries[i].cmd =
555                                 kmalloc(sizeof(struct iwl_device_cmd),
556                                         GFP_KERNEL);
557                         if (!txq->entries[i].cmd)
558                                 goto error;
559                 }
560
561         /* Circular buffer of transmit frame descriptors (TFDs),
562          * shared with device */
563         txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
564                                        &txq->dma_addr, GFP_KERNEL);
565         if (!txq->tfds)
566                 goto error;
567
568         BUILD_BUG_ON(IWL_FIRST_TB_SIZE_ALIGN != sizeof(*txq->first_tb_bufs));
569
570         tb0_buf_sz = sizeof(*txq->first_tb_bufs) * slots_num;
571
572         txq->first_tb_bufs = dma_alloc_coherent(trans->dev, tb0_buf_sz,
573                                               &txq->first_tb_dma,
574                                               GFP_KERNEL);
575         if (!txq->first_tb_bufs)
576                 goto err_free_tfds;
577
578         txq->id = txq_id;
579
580         return 0;
581 err_free_tfds:
582         dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
583 error:
584         if (txq->entries && txq_id == trans_pcie->cmd_queue)
585                 for (i = 0; i < slots_num; i++)
586                         kfree(txq->entries[i].cmd);
587         kfree(txq->entries);
588         txq->entries = NULL;
589
590         return -ENOMEM;
591
592 }
593
594 static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
595                               int slots_num, u32 txq_id)
596 {
597         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
598         int ret;
599
600         txq->need_update = false;
601
602         /* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise
603          * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
604         BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
605
606         /* Initialize queue's high/low-water marks, and head/tail indexes */
607         ret = iwl_queue_init(txq, slots_num, txq_id);
608         if (ret)
609                 return ret;
610
611         spin_lock_init(&txq->lock);
612
613         if (txq_id == trans_pcie->cmd_queue) {
614                 static struct lock_class_key iwl_pcie_cmd_queue_lock_class;
615
616                 lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class);
617         }
618
619         __skb_queue_head_init(&txq->overflow_q);
620
621         /*
622          * Tell nic where to find circular buffer of Tx Frame Descriptors for
623          * given Tx queue, and enable the DMA channel used for that queue.
624          * Circular buffer (TFD queue in DRAM) physical base address */
625         if (trans->cfg->use_tfh)
626                 iwl_write_direct64(trans,
627                                    FH_MEM_CBBC_QUEUE(trans, txq_id),
628                                    txq->dma_addr);
629         else
630                 iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
631                                    txq->dma_addr >> 8);
632
633         return 0;
634 }
635
636 static void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
637                                    struct sk_buff *skb)
638 {
639         struct page **page_ptr;
640
641         page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
642
643         if (*page_ptr) {
644                 __free_page(*page_ptr);
645                 *page_ptr = NULL;
646         }
647 }
648
649 static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
650 {
651         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
652
653         lockdep_assert_held(&trans_pcie->reg_lock);
654
655         if (trans_pcie->ref_cmd_in_flight) {
656                 trans_pcie->ref_cmd_in_flight = false;
657                 IWL_DEBUG_RPM(trans, "clear ref_cmd_in_flight - unref\n");
658                 iwl_trans_unref(trans);
659         }
660
661         if (!trans->cfg->base_params->apmg_wake_up_wa)
662                 return;
663         if (WARN_ON(!trans_pcie->cmd_hold_nic_awake))
664                 return;
665
666         trans_pcie->cmd_hold_nic_awake = false;
667         __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
668                                    CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
669 }
670
671 /*
672  * iwl_pcie_txq_unmap -  Unmap any remaining DMA mappings and free skb's
673  */
674 static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
675 {
676         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
677         struct iwl_txq *txq = &trans_pcie->txq[txq_id];
678
679         if (!txq) {
680                 IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n");
681                 return;
682         }
683
684         spin_lock_bh(&txq->lock);
685         while (txq->write_ptr != txq->read_ptr) {
686                 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
687                                    txq_id, txq->read_ptr);
688
689                 if (txq_id != trans_pcie->cmd_queue) {
690                         struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
691
692                         if (WARN_ON_ONCE(!skb))
693                                 continue;
694
695                         iwl_pcie_free_tso_page(trans_pcie, skb);
696                 }
697                 iwl_pcie_txq_free_tfd(trans, txq);
698                 txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr);
699
700                 if (txq->read_ptr == txq->write_ptr) {
701                         unsigned long flags;
702
703                         spin_lock_irqsave(&trans_pcie->reg_lock, flags);
704                         if (txq_id != trans_pcie->cmd_queue) {
705                                 IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n",
706                                               txq->id);
707                                 iwl_trans_unref(trans);
708                         } else {
709                                 iwl_pcie_clear_cmd_in_flight(trans);
710                         }
711                         spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
712                 }
713         }
714         txq->active = false;
715
716         while (!skb_queue_empty(&txq->overflow_q)) {
717                 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q);
718
719                 iwl_op_mode_free_skb(trans->op_mode, skb);
720         }
721
722         spin_unlock_bh(&txq->lock);
723
724         /* just in case - this queue may have been stopped */
725         iwl_wake_queue(trans, txq);
726 }
727
728 /*
729  * iwl_pcie_txq_free - Deallocate DMA queue.
730  * @txq: Transmit queue to deallocate.
731  *
732  * Empty queue by removing and destroying all BD's.
733  * Free all buffers.
734  * 0-fill, but do not free "txq" descriptor structure.
735  */
736 static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
737 {
738         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
739         struct iwl_txq *txq = &trans_pcie->txq[txq_id];
740         struct device *dev = trans->dev;
741         int i;
742
743         if (WARN_ON(!txq))
744                 return;
745
746         iwl_pcie_txq_unmap(trans, txq_id);
747
748         /* De-alloc array of command/tx buffers */
749         if (txq_id == trans_pcie->cmd_queue)
750                 for (i = 0; i < txq->n_window; i++) {
751                         kzfree(txq->entries[i].cmd);
752                         kzfree(txq->entries[i].free_buf);
753                 }
754
755         /* De-alloc circular buffer of TFDs */
756         if (txq->tfds) {
757                 dma_free_coherent(dev,
758                                   trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX,
759                                   txq->tfds, txq->dma_addr);
760                 txq->dma_addr = 0;
761                 txq->tfds = NULL;
762
763                 dma_free_coherent(dev,
764                                   sizeof(*txq->first_tb_bufs) * txq->n_window,
765                                   txq->first_tb_bufs, txq->first_tb_dma);
766         }
767
768         kfree(txq->entries);
769         txq->entries = NULL;
770
771         del_timer_sync(&txq->stuck_timer);
772
773         /* 0-fill queue descriptor structure */
774         memset(txq, 0, sizeof(*txq));
775 }
776
777 void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
778 {
779         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
780         int nq = trans->cfg->base_params->num_of_queues;
781         int chan;
782         u32 reg_val;
783         int clear_dwords = (SCD_TRANS_TBL_OFFSET_QUEUE(nq) -
784                                 SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(u32);
785
786         /* make sure all queue are not stopped/used */
787         memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
788         memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
789
790         if (trans->cfg->use_tfh)
791                 return;
792
793         trans_pcie->scd_base_addr =
794                 iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
795
796         WARN_ON(scd_base_addr != 0 &&
797                 scd_base_addr != trans_pcie->scd_base_addr);
798
799         /* reset context data, TX status and translation data */
800         iwl_trans_write_mem(trans, trans_pcie->scd_base_addr +
801                                    SCD_CONTEXT_MEM_LOWER_BOUND,
802                             NULL, clear_dwords);
803
804         iwl_write_prph(trans, SCD_DRAM_BASE_ADDR,
805                        trans_pcie->scd_bc_tbls.dma >> 10);
806
807         /* The chain extension of the SCD doesn't work well. This feature is
808          * enabled by default by the HW, so we need to disable it manually.
809          */
810         if (trans->cfg->base_params->scd_chain_ext_wa)
811                 iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
812
813         iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
814                                 trans_pcie->cmd_fifo,
815                                 trans_pcie->cmd_q_wdg_timeout);
816
817         /* Activate all Tx DMA/FIFO channels */
818         iwl_scd_activate_fifos(trans);
819
820         /* Enable DMA channel */
821         for (chan = 0; chan < FH_TCSR_CHNL_NUM; chan++)
822                 iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan),
823                                    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
824                                    FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
825
826         /* Update FH chicken bits */
827         reg_val = iwl_read_direct32(trans, FH_TX_CHICKEN_BITS_REG);
828         iwl_write_direct32(trans, FH_TX_CHICKEN_BITS_REG,
829                            reg_val | FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
830
831         /* Enable L1-Active */
832         if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000)
833                 iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG,
834                                     APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
835 }
836
837 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
838 {
839         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
840         int txq_id;
841
842         for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
843              txq_id++) {
844                 struct iwl_txq *txq = &trans_pcie->txq[txq_id];
845                 if (trans->cfg->use_tfh)
846                         iwl_write_direct64(trans,
847                                            FH_MEM_CBBC_QUEUE(trans, txq_id),
848                                            txq->dma_addr);
849                 else
850                         iwl_write_direct32(trans,
851                                            FH_MEM_CBBC_QUEUE(trans, txq_id),
852                                            txq->dma_addr >> 8);
853                 iwl_pcie_txq_unmap(trans, txq_id);
854                 txq->read_ptr = 0;
855                 txq->write_ptr = 0;
856         }
857
858         /* Tell NIC where to find the "keep warm" buffer */
859         iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
860                            trans_pcie->kw.dma >> 4);
861
862         /*
863          * Send 0 as the scd_base_addr since the device may have be reset
864          * while we were in WoWLAN in which case SCD_SRAM_BASE_ADDR will
865          * contain garbage.
866          */
867         iwl_pcie_tx_start(trans, 0);
868 }
869
870 static void iwl_pcie_tx_stop_fh(struct iwl_trans *trans)
871 {
872         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
873         unsigned long flags;
874         int ch, ret;
875         u32 mask = 0;
876
877         spin_lock(&trans_pcie->irq_lock);
878
879         if (!iwl_trans_grab_nic_access(trans, &flags))
880                 goto out;
881
882         /* Stop each Tx DMA channel */
883         for (ch = 0; ch < FH_TCSR_CHNL_NUM; ch++) {
884                 iwl_write32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(ch), 0x0);
885                 mask |= FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(ch);
886         }
887
888         /* Wait for DMA channels to be idle */
889         ret = iwl_poll_bit(trans, FH_TSSR_TX_STATUS_REG, mask, mask, 5000);
890         if (ret < 0)
891                 IWL_ERR(trans,
892                         "Failing on timeout while stopping DMA channel %d [0x%08x]\n",
893                         ch, iwl_read32(trans, FH_TSSR_TX_STATUS_REG));
894
895         iwl_trans_release_nic_access(trans, &flags);
896
897 out:
898         spin_unlock(&trans_pcie->irq_lock);
899 }
900
901 /*
902  * iwl_pcie_tx_stop - Stop all Tx DMA channels
903  */
904 int iwl_pcie_tx_stop(struct iwl_trans *trans)
905 {
906         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
907         int txq_id;
908
909         /* Turn off all Tx DMA fifos */
910         iwl_scd_deactivate_fifos(trans);
911
912         /* Turn off all Tx DMA channels */
913         iwl_pcie_tx_stop_fh(trans);
914
915         /*
916          * This function can be called before the op_mode disabled the
917          * queues. This happens when we have an rfkill interrupt.
918          * Since we stop Tx altogether - mark the queues as stopped.
919          */
920         memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
921         memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
922
923         /* This can happen: start_hw, stop_device */
924         if (!trans_pcie->txq)
925                 return 0;
926
927         /* Unmap DMA from host system and free skb's */
928         for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
929              txq_id++)
930                 iwl_pcie_txq_unmap(trans, txq_id);
931
932         return 0;
933 }
934
935 /*
936  * iwl_trans_tx_free - Free TXQ Context
937  *
938  * Destroy all TX DMA queues and structures
939  */
940 void iwl_pcie_tx_free(struct iwl_trans *trans)
941 {
942         int txq_id;
943         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
944
945         /* Tx queues */
946         if (trans_pcie->txq) {
947                 for (txq_id = 0;
948                      txq_id < trans->cfg->base_params->num_of_queues; txq_id++)
949                         iwl_pcie_txq_free(trans, txq_id);
950         }
951
952         kfree(trans_pcie->txq);
953         trans_pcie->txq = NULL;
954
955         iwl_pcie_free_dma_ptr(trans, &trans_pcie->kw);
956
957         iwl_pcie_free_dma_ptr(trans, &trans_pcie->scd_bc_tbls);
958 }
959
960 /*
961  * iwl_pcie_tx_alloc - allocate TX context
962  * Allocate all Tx DMA structures and initialize them
963  */
964 static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
965 {
966         int ret;
967         int txq_id, slots_num;
968         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
969
970         u16 scd_bc_tbls_size = trans->cfg->base_params->num_of_queues *
971                         sizeof(struct iwlagn_scd_bc_tbl);
972
973         /*It is not allowed to alloc twice, so warn when this happens.
974          * We cannot rely on the previous allocation, so free and fail */
975         if (WARN_ON(trans_pcie->txq)) {
976                 ret = -EINVAL;
977                 goto error;
978         }
979
980         ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->scd_bc_tbls,
981                                    scd_bc_tbls_size);
982         if (ret) {
983                 IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
984                 goto error;
985         }
986
987         /* Alloc keep-warm buffer */
988         ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->kw, IWL_KW_SIZE);
989         if (ret) {
990                 IWL_ERR(trans, "Keep Warm allocation failed\n");
991                 goto error;
992         }
993
994         trans_pcie->txq = kcalloc(trans->cfg->base_params->num_of_queues,
995                                   sizeof(struct iwl_txq), GFP_KERNEL);
996         if (!trans_pcie->txq) {
997                 IWL_ERR(trans, "Not enough memory for txq\n");
998                 ret = -ENOMEM;
999                 goto error;
1000         }
1001
1002         /* Alloc and init all Tx queues, including the command queue (#4/#9) */
1003         for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
1004              txq_id++) {
1005                 slots_num = (txq_id == trans_pcie->cmd_queue) ?
1006                                         TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
1007                 ret = iwl_pcie_txq_alloc(trans, &trans_pcie->txq[txq_id],
1008                                           slots_num, txq_id);
1009                 if (ret) {
1010                         IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
1011                         goto error;
1012                 }
1013         }
1014
1015         return 0;
1016
1017 error:
1018         iwl_pcie_tx_free(trans);
1019
1020         return ret;
1021 }
1022 int iwl_pcie_tx_init(struct iwl_trans *trans)
1023 {
1024         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1025         int ret;
1026         int txq_id, slots_num;
1027         bool alloc = false;
1028
1029         if (!trans_pcie->txq) {
1030                 ret = iwl_pcie_tx_alloc(trans);
1031                 if (ret)
1032                         goto error;
1033                 alloc = true;
1034         }
1035
1036         spin_lock(&trans_pcie->irq_lock);
1037
1038         /* Turn off all Tx DMA fifos */
1039         iwl_scd_deactivate_fifos(trans);
1040
1041         /* Tell NIC where to find the "keep warm" buffer */
1042         iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
1043                            trans_pcie->kw.dma >> 4);
1044
1045         spin_unlock(&trans_pcie->irq_lock);
1046
1047         /* Alloc and init all Tx queues, including the command queue (#4/#9) */
1048         for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
1049              txq_id++) {
1050                 slots_num = (txq_id == trans_pcie->cmd_queue) ?
1051                                         TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
1052                 ret = iwl_pcie_txq_init(trans, &trans_pcie->txq[txq_id],
1053                                          slots_num, txq_id);
1054                 if (ret) {
1055                         IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
1056                         goto error;
1057                 }
1058         }
1059
1060         if (trans->cfg->use_tfh) {
1061                 iwl_write_direct32(trans, TFH_TRANSFER_MODE,
1062                                    TFH_TRANSFER_MAX_PENDING_REQ |
1063                                    TFH_CHUNK_SIZE_128 |
1064                                    TFH_CHUNK_SPLIT_MODE);
1065                 return 0;
1066         }
1067
1068         iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
1069         if (trans->cfg->base_params->num_of_queues > 20)
1070                 iwl_set_bits_prph(trans, SCD_GP_CTRL,
1071                                   SCD_GP_CTRL_ENABLE_31_QUEUES);
1072
1073         return 0;
1074 error:
1075         /*Upon error, free only if we allocated something */
1076         if (alloc)
1077                 iwl_pcie_tx_free(trans);
1078         return ret;
1079 }
1080
1081 static inline void iwl_pcie_txq_progress(struct iwl_txq *txq)
1082 {
1083         lockdep_assert_held(&txq->lock);
1084
1085         if (!txq->wd_timeout)
1086                 return;
1087
1088         /*
1089          * station is asleep and we send data - that must
1090          * be uAPSD or PS-Poll. Don't rearm the timer.
1091          */
1092         if (txq->frozen)
1093                 return;
1094
1095         /*
1096          * if empty delete timer, otherwise move timer forward
1097          * since we're making progress on this queue
1098          */
1099         if (txq->read_ptr == txq->write_ptr)
1100                 del_timer(&txq->stuck_timer);
1101         else
1102                 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1103 }
1104
1105 /* Frees buffers until index _not_ inclusive */
1106 void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
1107                             struct sk_buff_head *skbs)
1108 {
1109         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1110         struct iwl_txq *txq = &trans_pcie->txq[txq_id];
1111         int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1);
1112         int last_to_free;
1113
1114         /* This function is not meant to release cmd queue*/
1115         if (WARN_ON(txq_id == trans_pcie->cmd_queue))
1116                 return;
1117
1118         spin_lock_bh(&txq->lock);
1119
1120         if (!txq->active) {
1121                 IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
1122                                     txq_id, ssn);
1123                 goto out;
1124         }
1125
1126         if (txq->read_ptr == tfd_num)
1127                 goto out;
1128
1129         IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
1130                            txq_id, txq->read_ptr, tfd_num, ssn);
1131
1132         /*Since we free until index _not_ inclusive, the one before index is
1133          * the last we will free. This one must be used */
1134         last_to_free = iwl_queue_dec_wrap(tfd_num);
1135
1136         if (!iwl_queue_used(txq, last_to_free)) {
1137                 IWL_ERR(trans,
1138                         "%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
1139                         __func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX,
1140                         txq->write_ptr, txq->read_ptr);
1141                 goto out;
1142         }
1143
1144         if (WARN_ON(!skb_queue_empty(skbs)))
1145                 goto out;
1146
1147         for (;
1148              txq->read_ptr != tfd_num;
1149              txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
1150                 struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
1151
1152                 if (WARN_ON_ONCE(!skb))
1153                         continue;
1154
1155                 iwl_pcie_free_tso_page(trans_pcie, skb);
1156
1157                 __skb_queue_tail(skbs, skb);
1158
1159                 txq->entries[txq->read_ptr].skb = NULL;
1160
1161                 if (!trans->cfg->use_tfh)
1162                         iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
1163
1164                 iwl_pcie_txq_free_tfd(trans, txq);
1165         }
1166
1167         iwl_pcie_txq_progress(txq);
1168
1169         if (iwl_queue_space(txq) > txq->low_mark &&
1170             test_bit(txq_id, trans_pcie->queue_stopped)) {
1171                 struct sk_buff_head overflow_skbs;
1172
1173                 __skb_queue_head_init(&overflow_skbs);
1174                 skb_queue_splice_init(&txq->overflow_q, &overflow_skbs);
1175
1176                 /*
1177                  * This is tricky: we are in reclaim path which is non
1178                  * re-entrant, so noone will try to take the access the
1179                  * txq data from that path. We stopped tx, so we can't
1180                  * have tx as well. Bottom line, we can unlock and re-lock
1181                  * later.
1182                  */
1183                 spin_unlock_bh(&txq->lock);
1184
1185                 while (!skb_queue_empty(&overflow_skbs)) {
1186                         struct sk_buff *skb = __skb_dequeue(&overflow_skbs);
1187                         struct iwl_device_cmd *dev_cmd_ptr;
1188
1189                         dev_cmd_ptr = *(void **)((u8 *)skb->cb +
1190                                                  trans_pcie->dev_cmd_offs);
1191
1192                         /*
1193                          * Note that we can very well be overflowing again.
1194                          * In that case, iwl_queue_space will be small again
1195                          * and we won't wake mac80211's queue.
1196                          */
1197                         iwl_trans_pcie_tx(trans, skb, dev_cmd_ptr, txq_id);
1198                 }
1199                 spin_lock_bh(&txq->lock);
1200
1201                 if (iwl_queue_space(txq) > txq->low_mark)
1202                         iwl_wake_queue(trans, txq);
1203         }
1204
1205         if (txq->read_ptr == txq->write_ptr) {
1206                 IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id);
1207                 iwl_trans_unref(trans);
1208         }
1209
1210 out:
1211         spin_unlock_bh(&txq->lock);
1212 }
1213
1214 static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
1215                                       const struct iwl_host_cmd *cmd)
1216 {
1217         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1218         int ret;
1219
1220         lockdep_assert_held(&trans_pcie->reg_lock);
1221
1222         if (!(cmd->flags & CMD_SEND_IN_IDLE) &&
1223             !trans_pcie->ref_cmd_in_flight) {
1224                 trans_pcie->ref_cmd_in_flight = true;
1225                 IWL_DEBUG_RPM(trans, "set ref_cmd_in_flight - ref\n");
1226                 iwl_trans_ref(trans);
1227         }
1228
1229         /*
1230          * wake up the NIC to make sure that the firmware will see the host
1231          * command - we will let the NIC sleep once all the host commands
1232          * returned. This needs to be done only on NICs that have
1233          * apmg_wake_up_wa set.
1234          */
1235         if (trans->cfg->base_params->apmg_wake_up_wa &&
1236             !trans_pcie->cmd_hold_nic_awake) {
1237                 __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
1238                                          CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
1239
1240                 ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
1241                                    CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
1242                                    (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
1243                                     CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP),
1244                                    15000);
1245                 if (ret < 0) {
1246                         __iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
1247                                         CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
1248                         IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
1249                         return -EIO;
1250                 }
1251                 trans_pcie->cmd_hold_nic_awake = true;
1252         }
1253
1254         return 0;
1255 }
1256
1257 /*
1258  * iwl_pcie_cmdq_reclaim - Reclaim TX command queue entries already Tx'd
1259  *
1260  * When FW advances 'R' index, all entries between old and new 'R' index
1261  * need to be reclaimed. As result, some free space forms.  If there is
1262  * enough free space (> low mark), wake the stack that feeds us.
1263  */
1264 static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
1265 {
1266         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1267         struct iwl_txq *txq = &trans_pcie->txq[txq_id];
1268         unsigned long flags;
1269         int nfreed = 0;
1270
1271         lockdep_assert_held(&txq->lock);
1272
1273         if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) {
1274                 IWL_ERR(trans,
1275                         "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
1276                         __func__, txq_id, idx, TFD_QUEUE_SIZE_MAX,
1277                         txq->write_ptr, txq->read_ptr);
1278                 return;
1279         }
1280
1281         for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx;
1282              txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
1283
1284                 if (nfreed++ > 0) {
1285                         IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
1286                                 idx, txq->write_ptr, txq->read_ptr);
1287                         iwl_force_nmi(trans);
1288                 }
1289         }
1290
1291         if (txq->read_ptr == txq->write_ptr) {
1292                 spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1293                 iwl_pcie_clear_cmd_in_flight(trans);
1294                 spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1295         }
1296
1297         iwl_pcie_txq_progress(txq);
1298 }
1299
1300 static int iwl_pcie_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
1301                                  u16 txq_id)
1302 {
1303         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1304         u32 tbl_dw_addr;
1305         u32 tbl_dw;
1306         u16 scd_q2ratid;
1307
1308         scd_q2ratid = ra_tid & SCD_QUEUE_RA_TID_MAP_RATID_MSK;
1309
1310         tbl_dw_addr = trans_pcie->scd_base_addr +
1311                         SCD_TRANS_TBL_OFFSET_QUEUE(txq_id);
1312
1313         tbl_dw = iwl_trans_read_mem32(trans, tbl_dw_addr);
1314
1315         if (txq_id & 0x1)
1316                 tbl_dw = (scd_q2ratid << 16) | (tbl_dw & 0x0000FFFF);
1317         else
1318                 tbl_dw = scd_q2ratid | (tbl_dw & 0xFFFF0000);
1319
1320         iwl_trans_write_mem32(trans, tbl_dw_addr, tbl_dw);
1321
1322         return 0;
1323 }
1324
1325 /* Receiver address (actually, Rx station's index into station table),
1326  * combined with Traffic ID (QOS priority), in format used by Tx Scheduler */
1327 #define BUILD_RAxTID(sta_id, tid)       (((sta_id) << 4) + (tid))
1328
1329 void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
1330                                const struct iwl_trans_txq_scd_cfg *cfg,
1331                                unsigned int wdg_timeout)
1332 {
1333         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1334         struct iwl_txq *txq = &trans_pcie->txq[txq_id];
1335         int fifo = -1;
1336
1337         if (test_and_set_bit(txq_id, trans_pcie->queue_used))
1338                 WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
1339
1340         if (cfg && trans->cfg->use_tfh)
1341                 WARN_ONCE(1, "Expected no calls to SCD configuration");
1342
1343         txq->wd_timeout = msecs_to_jiffies(wdg_timeout);
1344
1345         if (cfg) {
1346                 fifo = cfg->fifo;
1347
1348                 /* Disable the scheduler prior configuring the cmd queue */
1349                 if (txq_id == trans_pcie->cmd_queue &&
1350                     trans_pcie->scd_set_active)
1351                         iwl_scd_enable_set_active(trans, 0);
1352
1353                 /* Stop this Tx queue before configuring it */
1354                 iwl_scd_txq_set_inactive(trans, txq_id);
1355
1356                 /* Set this queue as a chain-building queue unless it is CMD */
1357                 if (txq_id != trans_pcie->cmd_queue)
1358                         iwl_scd_txq_set_chain(trans, txq_id);
1359
1360                 if (cfg->aggregate) {
1361                         u16 ra_tid = BUILD_RAxTID(cfg->sta_id, cfg->tid);
1362
1363                         /* Map receiver-address / traffic-ID to this queue */
1364                         iwl_pcie_txq_set_ratid_map(trans, ra_tid, txq_id);
1365
1366                         /* enable aggregations for the queue */
1367                         iwl_scd_txq_enable_agg(trans, txq_id);
1368                         txq->ampdu = true;
1369                 } else {
1370                         /*
1371                          * disable aggregations for the queue, this will also
1372                          * make the ra_tid mapping configuration irrelevant
1373                          * since it is now a non-AGG queue.
1374                          */
1375                         iwl_scd_txq_disable_agg(trans, txq_id);
1376
1377                         ssn = txq->read_ptr;
1378                 }
1379         }
1380
1381         /* Place first TFD at index corresponding to start sequence number.
1382          * Assumes that ssn_idx is valid (!= 0xFFF) */
1383         txq->read_ptr = (ssn & 0xff);
1384         txq->write_ptr = (ssn & 0xff);
1385         iwl_write_direct32(trans, HBUS_TARG_WRPTR,
1386                            (ssn & 0xff) | (txq_id << 8));
1387
1388         if (cfg) {
1389                 u8 frame_limit = cfg->frame_limit;
1390
1391                 iwl_write_prph(trans, SCD_QUEUE_RDPTR(txq_id), ssn);
1392
1393                 /* Set up Tx window size and frame limit for this queue */
1394                 iwl_trans_write_mem32(trans, trans_pcie->scd_base_addr +
1395                                 SCD_CONTEXT_QUEUE_OFFSET(txq_id), 0);
1396                 iwl_trans_write_mem32(trans,
1397                         trans_pcie->scd_base_addr +
1398                         SCD_CONTEXT_QUEUE_OFFSET(txq_id) + sizeof(u32),
1399                         ((frame_limit << SCD_QUEUE_CTX_REG2_WIN_SIZE_POS) &
1400                                         SCD_QUEUE_CTX_REG2_WIN_SIZE_MSK) |
1401                         ((frame_limit << SCD_QUEUE_CTX_REG2_FRAME_LIMIT_POS) &
1402                                         SCD_QUEUE_CTX_REG2_FRAME_LIMIT_MSK));
1403
1404                 /* Set up status area in SRAM, map to Tx DMA/FIFO, activate */
1405                 iwl_write_prph(trans, SCD_QUEUE_STATUS_BITS(txq_id),
1406                                (1 << SCD_QUEUE_STTS_REG_POS_ACTIVE) |
1407                                (cfg->fifo << SCD_QUEUE_STTS_REG_POS_TXF) |
1408                                (1 << SCD_QUEUE_STTS_REG_POS_WSL) |
1409                                SCD_QUEUE_STTS_REG_MSK);
1410
1411                 /* enable the scheduler for this queue (only) */
1412                 if (txq_id == trans_pcie->cmd_queue &&
1413                     trans_pcie->scd_set_active)
1414                         iwl_scd_enable_set_active(trans, BIT(txq_id));
1415
1416                 IWL_DEBUG_TX_QUEUES(trans,
1417                                     "Activate queue %d on FIFO %d WrPtr: %d\n",
1418                                     txq_id, fifo, ssn & 0xff);
1419         } else {
1420                 IWL_DEBUG_TX_QUEUES(trans,
1421                                     "Activate queue %d WrPtr: %d\n",
1422                                     txq_id, ssn & 0xff);
1423         }
1424
1425         txq->active = true;
1426 }
1427
1428 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
1429                                         bool shared_mode)
1430 {
1431         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1432         struct iwl_txq *txq = &trans_pcie->txq[txq_id];
1433
1434         txq->ampdu = !shared_mode;
1435 }
1436
1437 dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq)
1438 {
1439         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1440
1441         return trans_pcie->scd_bc_tbls.dma +
1442                txq * sizeof(struct iwlagn_scd_bc_tbl);
1443 }
1444
1445 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
1446                                 bool configure_scd)
1447 {
1448         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1449         u32 stts_addr = trans_pcie->scd_base_addr +
1450                         SCD_TX_STTS_QUEUE_OFFSET(txq_id);
1451         static const u32 zero_val[4] = {};
1452
1453         trans_pcie->txq[txq_id].frozen_expiry_remainder = 0;
1454         trans_pcie->txq[txq_id].frozen = false;
1455
1456         /*
1457          * Upon HW Rfkill - we stop the device, and then stop the queues
1458          * in the op_mode. Just for the sake of the simplicity of the op_mode,
1459          * allow the op_mode to call txq_disable after it already called
1460          * stop_device.
1461          */
1462         if (!test_and_clear_bit(txq_id, trans_pcie->queue_used)) {
1463                 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
1464                           "queue %d not used", txq_id);
1465                 return;
1466         }
1467
1468         if (configure_scd && trans->cfg->use_tfh)
1469                 WARN_ONCE(1, "Expected no calls to SCD configuration");
1470
1471         if (configure_scd) {
1472                 iwl_scd_txq_set_inactive(trans, txq_id);
1473
1474                 iwl_trans_write_mem(trans, stts_addr, (void *)zero_val,
1475                                     ARRAY_SIZE(zero_val));
1476         }
1477
1478         iwl_pcie_txq_unmap(trans, txq_id);
1479         trans_pcie->txq[txq_id].ampdu = false;
1480
1481         IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", txq_id);
1482 }
1483
1484 /*************** HOST COMMAND QUEUE FUNCTIONS   *****/
1485
1486 /*
1487  * iwl_pcie_enqueue_hcmd - enqueue a uCode command
1488  * @priv: device private data point
1489  * @cmd: a pointer to the ucode command structure
1490  *
1491  * The function returns < 0 values to indicate the operation
1492  * failed. On success, it returns the index (>= 0) of command in the
1493  * command queue.
1494  */
1495 static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1496                                  struct iwl_host_cmd *cmd)
1497 {
1498         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1499         struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
1500         struct iwl_device_cmd *out_cmd;
1501         struct iwl_cmd_meta *out_meta;
1502         unsigned long flags;
1503         void *dup_buf = NULL;
1504         dma_addr_t phys_addr;
1505         int idx;
1506         u16 copy_size, cmd_size, tb0_size;
1507         bool had_nocopy = false;
1508         u8 group_id = iwl_cmd_groupid(cmd->id);
1509         int i, ret;
1510         u32 cmd_pos;
1511         const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
1512         u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
1513         unsigned long flags2;
1514
1515         if (WARN(!trans->wide_cmd_header &&
1516                  group_id > IWL_ALWAYS_LONG_GROUP,
1517                  "unsupported wide command %#x\n", cmd->id))
1518                 return -EINVAL;
1519
1520         if (group_id != 0) {
1521                 copy_size = sizeof(struct iwl_cmd_header_wide);
1522                 cmd_size = sizeof(struct iwl_cmd_header_wide);
1523         } else {
1524                 copy_size = sizeof(struct iwl_cmd_header);
1525                 cmd_size = sizeof(struct iwl_cmd_header);
1526         }
1527
1528         /* need one for the header if the first is NOCOPY */
1529         BUILD_BUG_ON(IWL_MAX_CMD_TBS_PER_TFD > IWL_NUM_OF_TBS - 1);
1530
1531         for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1532                 cmddata[i] = cmd->data[i];
1533                 cmdlen[i] = cmd->len[i];
1534
1535                 if (!cmd->len[i])
1536                         continue;
1537
1538                 /* need at least IWL_FIRST_TB_SIZE copied */
1539                 if (copy_size < IWL_FIRST_TB_SIZE) {
1540                         int copy = IWL_FIRST_TB_SIZE - copy_size;
1541
1542                         if (copy > cmdlen[i])
1543                                 copy = cmdlen[i];
1544                         cmdlen[i] -= copy;
1545                         cmddata[i] += copy;
1546                         copy_size += copy;
1547                 }
1548
1549                 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) {
1550                         had_nocopy = true;
1551                         if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) {
1552                                 idx = -EINVAL;
1553                                 goto free_dup_buf;
1554                         }
1555                 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) {
1556                         /*
1557                          * This is also a chunk that isn't copied
1558                          * to the static buffer so set had_nocopy.
1559                          */
1560                         had_nocopy = true;
1561
1562                         /* only allowed once */
1563                         if (WARN_ON(dup_buf)) {
1564                                 idx = -EINVAL;
1565                                 goto free_dup_buf;
1566                         }
1567
1568                         dup_buf = kmemdup(cmddata[i], cmdlen[i],
1569                                           GFP_ATOMIC);
1570                         if (!dup_buf)
1571                                 return -ENOMEM;
1572                 } else {
1573                         /* NOCOPY must not be followed by normal! */
1574                         if (WARN_ON(had_nocopy)) {
1575                                 idx = -EINVAL;
1576                                 goto free_dup_buf;
1577                         }
1578                         copy_size += cmdlen[i];
1579                 }
1580                 cmd_size += cmd->len[i];
1581         }
1582
1583         /*
1584          * If any of the command structures end up being larger than
1585          * the TFD_MAX_PAYLOAD_SIZE and they aren't dynamically
1586          * allocated into separate TFDs, then we will need to
1587          * increase the size of the buffers.
1588          */
1589         if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE,
1590                  "Command %s (%#x) is too large (%d bytes)\n",
1591                  iwl_get_cmd_string(trans, cmd->id),
1592                  cmd->id, copy_size)) {
1593                 idx = -EINVAL;
1594                 goto free_dup_buf;
1595         }
1596
1597         spin_lock_irqsave(&txq->lock, flags2);
1598
1599         if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
1600                 spin_unlock_irqrestore(&txq->lock, flags2);
1601
1602                 IWL_ERR(trans, "No space in command queue\n");
1603                 iwl_op_mode_cmd_queue_full(trans->op_mode);
1604                 idx = -ENOSPC;
1605                 goto free_dup_buf;
1606         }
1607
1608         idx = get_cmd_index(txq, txq->write_ptr);
1609         out_cmd = txq->entries[idx].cmd;
1610         out_meta = &txq->entries[idx].meta;
1611
1612         memset(out_meta, 0, sizeof(*out_meta)); /* re-initialize to NULL */
1613         if (cmd->flags & CMD_WANT_SKB)
1614                 out_meta->source = cmd;
1615
1616         /* set up the header */
1617         if (group_id != 0) {
1618                 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id);
1619                 out_cmd->hdr_wide.group_id = group_id;
1620                 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id);
1621                 out_cmd->hdr_wide.length =
1622                         cpu_to_le16(cmd_size -
1623                                     sizeof(struct iwl_cmd_header_wide));
1624                 out_cmd->hdr_wide.reserved = 0;
1625                 out_cmd->hdr_wide.sequence =
1626                         cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
1627                                                  INDEX_TO_SEQ(txq->write_ptr));
1628
1629                 cmd_pos = sizeof(struct iwl_cmd_header_wide);
1630                 copy_size = sizeof(struct iwl_cmd_header_wide);
1631         } else {
1632                 out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
1633                 out_cmd->hdr.sequence =
1634                         cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
1635                                                  INDEX_TO_SEQ(txq->write_ptr));
1636                 out_cmd->hdr.group_id = 0;
1637
1638                 cmd_pos = sizeof(struct iwl_cmd_header);
1639                 copy_size = sizeof(struct iwl_cmd_header);
1640         }
1641
1642         /* and copy the data that needs to be copied */
1643         for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1644                 int copy;
1645
1646                 if (!cmd->len[i])
1647                         continue;
1648
1649                 /* copy everything if not nocopy/dup */
1650                 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1651                                            IWL_HCMD_DFL_DUP))) {
1652                         copy = cmd->len[i];
1653
1654                         memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1655                         cmd_pos += copy;
1656                         copy_size += copy;
1657                         continue;
1658                 }
1659
1660                 /*
1661                  * Otherwise we need at least IWL_FIRST_TB_SIZE copied
1662                  * in total (for bi-directional DMA), but copy up to what
1663                  * we can fit into the payload for debug dump purposes.
1664                  */
1665                 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
1666
1667                 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1668                 cmd_pos += copy;
1669
1670                 /* However, treat copy_size the proper way, we need it below */
1671                 if (copy_size < IWL_FIRST_TB_SIZE) {
1672                         copy = IWL_FIRST_TB_SIZE - copy_size;
1673
1674                         if (copy > cmd->len[i])
1675                                 copy = cmd->len[i];
1676                         copy_size += copy;
1677                 }
1678         }
1679
1680         IWL_DEBUG_HC(trans,
1681                      "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1682                      iwl_get_cmd_string(trans, cmd->id),
1683                      group_id, out_cmd->hdr.cmd,
1684                      le16_to_cpu(out_cmd->hdr.sequence),
1685                      cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
1686
1687         /* start the TFD with the minimum copy bytes */
1688         tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
1689         memcpy(&txq->first_tb_bufs[idx], &out_cmd->hdr, tb0_size);
1690         iwl_pcie_txq_build_tfd(trans, txq,
1691                                iwl_pcie_get_first_tb_dma(txq, idx),
1692                                tb0_size, true);
1693
1694         /* map first command fragment, if any remains */
1695         if (copy_size > tb0_size) {
1696                 phys_addr = dma_map_single(trans->dev,
1697                                            ((u8 *)&out_cmd->hdr) + tb0_size,
1698                                            copy_size - tb0_size,
1699                                            DMA_TO_DEVICE);
1700                 if (dma_mapping_error(trans->dev, phys_addr)) {
1701                         iwl_pcie_tfd_unmap(trans, out_meta, txq,
1702                                            txq->write_ptr);
1703                         idx = -ENOMEM;
1704                         goto out;
1705                 }
1706
1707                 iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
1708                                        copy_size - tb0_size, false);
1709         }
1710
1711         /* map the remaining (adjusted) nocopy/dup fragments */
1712         for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1713                 const void *data = cmddata[i];
1714
1715                 if (!cmdlen[i])
1716                         continue;
1717                 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1718                                            IWL_HCMD_DFL_DUP)))
1719                         continue;
1720                 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
1721                         data = dup_buf;
1722                 phys_addr = dma_map_single(trans->dev, (void *)data,
1723                                            cmdlen[i], DMA_TO_DEVICE);
1724                 if (dma_mapping_error(trans->dev, phys_addr)) {
1725                         iwl_pcie_tfd_unmap(trans, out_meta, txq,
1726                                            txq->write_ptr);
1727                         idx = -ENOMEM;
1728                         goto out;
1729                 }
1730
1731                 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
1732         }
1733
1734         BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
1735         out_meta->flags = cmd->flags;
1736         if (WARN_ON_ONCE(txq->entries[idx].free_buf))
1737                 kzfree(txq->entries[idx].free_buf);
1738         txq->entries[idx].free_buf = dup_buf;
1739
1740         trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
1741
1742         /* start timer if queue currently empty */
1743         if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
1744                 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1745
1746         spin_lock_irqsave(&trans_pcie->reg_lock, flags);
1747         ret = iwl_pcie_set_cmd_in_flight(trans, cmd);
1748         if (ret < 0) {
1749                 idx = ret;
1750                 spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1751                 goto out;
1752         }
1753
1754         /* Increment and update queue's write index */
1755         txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
1756         iwl_pcie_txq_inc_wr_ptr(trans, txq);
1757
1758         spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
1759
1760  out:
1761         spin_unlock_irqrestore(&txq->lock, flags2);
1762  free_dup_buf:
1763         if (idx < 0)
1764                 kfree(dup_buf);
1765         return idx;
1766 }
1767
1768 /*
1769  * iwl_pcie_hcmd_complete - Pull unused buffers off the queue and reclaim them
1770  * @rxb: Rx buffer to reclaim
1771  */
1772 void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
1773                             struct iwl_rx_cmd_buffer *rxb)
1774 {
1775         struct iwl_rx_packet *pkt = rxb_addr(rxb);
1776         u16 sequence = le16_to_cpu(pkt->hdr.sequence);
1777         u8 group_id = iwl_cmd_groupid(pkt->hdr.group_id);
1778         u32 cmd_id;
1779         int txq_id = SEQ_TO_QUEUE(sequence);
1780         int index = SEQ_TO_INDEX(sequence);
1781         int cmd_index;
1782         struct iwl_device_cmd *cmd;
1783         struct iwl_cmd_meta *meta;
1784         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1785         struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
1786
1787         /* If a Tx command is being handled and it isn't in the actual
1788          * command queue then there a command routing bug has been introduced
1789          * in the queue management code. */
1790         if (WARN(txq_id != trans_pcie->cmd_queue,
1791                  "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n",
1792                  txq_id, trans_pcie->cmd_queue, sequence,
1793                  trans_pcie->txq[trans_pcie->cmd_queue].read_ptr,
1794                  trans_pcie->txq[trans_pcie->cmd_queue].write_ptr)) {
1795                 iwl_print_hex_error(trans, pkt, 32);
1796                 return;
1797         }
1798
1799         spin_lock_bh(&txq->lock);
1800
1801         cmd_index = get_cmd_index(txq, index);
1802         cmd = txq->entries[cmd_index].cmd;
1803         meta = &txq->entries[cmd_index].meta;
1804         cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0);
1805
1806         iwl_pcie_tfd_unmap(trans, meta, txq, index);
1807
1808         /* Input error checking is done when commands are added to queue. */
1809         if (meta->flags & CMD_WANT_SKB) {
1810                 struct page *p = rxb_steal_page(rxb);
1811
1812                 meta->source->resp_pkt = pkt;
1813                 meta->source->_rx_page_addr = (unsigned long)page_address(p);
1814                 meta->source->_rx_page_order = trans_pcie->rx_page_order;
1815         }
1816
1817         if (meta->flags & CMD_WANT_ASYNC_CALLBACK)
1818                 iwl_op_mode_async_cb(trans->op_mode, cmd);
1819
1820         iwl_pcie_cmdq_reclaim(trans, txq_id, index);
1821
1822         if (!(meta->flags & CMD_ASYNC)) {
1823                 if (!test_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status)) {
1824                         IWL_WARN(trans,
1825                                  "HCMD_ACTIVE already clear for command %s\n",
1826                                  iwl_get_cmd_string(trans, cmd_id));
1827                 }
1828                 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1829                 IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1830                                iwl_get_cmd_string(trans, cmd_id));
1831                 wake_up(&trans_pcie->wait_command_queue);
1832         }
1833
1834         if (meta->flags & CMD_MAKE_TRANS_IDLE) {
1835                 IWL_DEBUG_INFO(trans, "complete %s - mark trans as idle\n",
1836                                iwl_get_cmd_string(trans, cmd->hdr.cmd));
1837                 set_bit(STATUS_TRANS_IDLE, &trans->status);
1838                 wake_up(&trans_pcie->d0i3_waitq);
1839         }
1840
1841         if (meta->flags & CMD_WAKE_UP_TRANS) {
1842                 IWL_DEBUG_INFO(trans, "complete %s - clear trans idle flag\n",
1843                                iwl_get_cmd_string(trans, cmd->hdr.cmd));
1844                 clear_bit(STATUS_TRANS_IDLE, &trans->status);
1845                 wake_up(&trans_pcie->d0i3_waitq);
1846         }
1847
1848         meta->flags = 0;
1849
1850         spin_unlock_bh(&txq->lock);
1851 }
1852
1853 #define HOST_COMPLETE_TIMEOUT   (2 * HZ)
1854
1855 static int iwl_pcie_send_hcmd_async(struct iwl_trans *trans,
1856                                     struct iwl_host_cmd *cmd)
1857 {
1858         int ret;
1859
1860         /* An asynchronous command can not expect an SKB to be set. */
1861         if (WARN_ON(cmd->flags & CMD_WANT_SKB))
1862                 return -EINVAL;
1863
1864         ret = iwl_pcie_enqueue_hcmd(trans, cmd);
1865         if (ret < 0) {
1866                 IWL_ERR(trans,
1867                         "Error sending %s: enqueue_hcmd failed: %d\n",
1868                         iwl_get_cmd_string(trans, cmd->id), ret);
1869                 return ret;
1870         }
1871         return 0;
1872 }
1873
1874 static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
1875                                    struct iwl_host_cmd *cmd)
1876 {
1877         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1878         int cmd_idx;
1879         int ret;
1880
1881         IWL_DEBUG_INFO(trans, "Attempting to send sync command %s\n",
1882                        iwl_get_cmd_string(trans, cmd->id));
1883
1884         if (WARN(test_and_set_bit(STATUS_SYNC_HCMD_ACTIVE,
1885                                   &trans->status),
1886                  "Command %s: a command is already active!\n",
1887                  iwl_get_cmd_string(trans, cmd->id)))
1888                 return -EIO;
1889
1890         IWL_DEBUG_INFO(trans, "Setting HCMD_ACTIVE for command %s\n",
1891                        iwl_get_cmd_string(trans, cmd->id));
1892
1893         if (pm_runtime_suspended(&trans_pcie->pci_dev->dev)) {
1894                 ret = wait_event_timeout(trans_pcie->d0i3_waitq,
1895                                  pm_runtime_active(&trans_pcie->pci_dev->dev),
1896                                  msecs_to_jiffies(IWL_TRANS_IDLE_TIMEOUT));
1897                 if (!ret) {
1898                         IWL_ERR(trans, "Timeout exiting D0i3 before hcmd\n");
1899                         return -ETIMEDOUT;
1900                 }
1901         }
1902
1903         cmd_idx = iwl_pcie_enqueue_hcmd(trans, cmd);
1904         if (cmd_idx < 0) {
1905                 ret = cmd_idx;
1906                 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1907                 IWL_ERR(trans,
1908                         "Error sending %s: enqueue_hcmd failed: %d\n",
1909                         iwl_get_cmd_string(trans, cmd->id), ret);
1910                 return ret;
1911         }
1912
1913         ret = wait_event_timeout(trans_pcie->wait_command_queue,
1914                                  !test_bit(STATUS_SYNC_HCMD_ACTIVE,
1915                                            &trans->status),
1916                                  HOST_COMPLETE_TIMEOUT);
1917         if (!ret) {
1918                 struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
1919
1920                 IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
1921                         iwl_get_cmd_string(trans, cmd->id),
1922                         jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
1923
1924                 IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
1925                         txq->read_ptr, txq->write_ptr);
1926
1927                 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1928                 IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1929                                iwl_get_cmd_string(trans, cmd->id));
1930                 ret = -ETIMEDOUT;
1931
1932                 iwl_force_nmi(trans);
1933                 iwl_trans_fw_error(trans);
1934
1935                 goto cancel;
1936         }
1937
1938         if (test_bit(STATUS_FW_ERROR, &trans->status)) {
1939                 IWL_ERR(trans, "FW error in SYNC CMD %s\n",
1940                         iwl_get_cmd_string(trans, cmd->id));
1941                 dump_stack();
1942                 ret = -EIO;
1943                 goto cancel;
1944         }
1945
1946         if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1947             test_bit(STATUS_RFKILL, &trans->status)) {
1948                 IWL_DEBUG_RF_KILL(trans, "RFKILL in SYNC CMD... no rsp\n");
1949                 ret = -ERFKILL;
1950                 goto cancel;
1951         }
1952
1953         if ((cmd->flags & CMD_WANT_SKB) && !cmd->resp_pkt) {
1954                 IWL_ERR(trans, "Error: Response NULL in '%s'\n",
1955                         iwl_get_cmd_string(trans, cmd->id));
1956                 ret = -EIO;
1957                 goto cancel;
1958         }
1959
1960         return 0;
1961
1962 cancel:
1963         if (cmd->flags & CMD_WANT_SKB) {
1964                 /*
1965                  * Cancel the CMD_WANT_SKB flag for the cmd in the
1966                  * TX cmd queue. Otherwise in case the cmd comes
1967                  * in later, it will possibly set an invalid
1968                  * address (cmd->meta.source).
1969                  */
1970                 trans_pcie->txq[trans_pcie->cmd_queue].
1971                         entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
1972         }
1973
1974         if (cmd->resp_pkt) {
1975                 iwl_free_resp(cmd);
1976                 cmd->resp_pkt = NULL;
1977         }
1978
1979         return ret;
1980 }
1981
1982 int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
1983 {
1984         if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
1985             test_bit(STATUS_RFKILL, &trans->status)) {
1986                 IWL_DEBUG_RF_KILL(trans, "Dropping CMD 0x%x: RF KILL\n",
1987                                   cmd->id);
1988                 return -ERFKILL;
1989         }
1990
1991         if (cmd->flags & CMD_ASYNC)
1992                 return iwl_pcie_send_hcmd_async(trans, cmd);
1993
1994         /* We still can fail on RFKILL that can be asserted while we wait */
1995         return iwl_pcie_send_hcmd_sync(trans, cmd);
1996 }
1997
1998 static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
1999                              struct iwl_txq *txq, u8 hdr_len,
2000                              struct iwl_cmd_meta *out_meta,
2001                              struct iwl_device_cmd *dev_cmd, u16 tb1_len)
2002 {
2003         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2004         u16 tb2_len;
2005         int i;
2006
2007         /*
2008          * Set up TFD's third entry to point directly to remainder
2009          * of skb's head, if any
2010          */
2011         tb2_len = skb_headlen(skb) - hdr_len;
2012
2013         if (tb2_len > 0) {
2014                 dma_addr_t tb2_phys = dma_map_single(trans->dev,
2015                                                      skb->data + hdr_len,
2016                                                      tb2_len, DMA_TO_DEVICE);
2017                 if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
2018                         iwl_pcie_tfd_unmap(trans, out_meta, txq,
2019                                            txq->write_ptr);
2020                         return -EINVAL;
2021                 }
2022                 iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false);
2023         }
2024
2025         /* set up the remaining entries to point to the data */
2026         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2027                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2028                 dma_addr_t tb_phys;
2029                 int tb_idx;
2030
2031                 if (!skb_frag_size(frag))
2032                         continue;
2033
2034                 tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
2035                                            skb_frag_size(frag), DMA_TO_DEVICE);
2036
2037                 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
2038                         iwl_pcie_tfd_unmap(trans, out_meta, txq,
2039                                            txq->write_ptr);
2040                         return -EINVAL;
2041                 }
2042                 tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2043                                                 skb_frag_size(frag), false);
2044
2045                 out_meta->tbs |= BIT(tb_idx);
2046         }
2047
2048         trace_iwlwifi_dev_tx(trans->dev, skb,
2049                              iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
2050                              trans_pcie->tfd_size,
2051                              &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
2052                              skb->data + hdr_len, tb2_len);
2053         trace_iwlwifi_dev_tx_data(trans->dev, skb,
2054                                   hdr_len, skb->len - hdr_len);
2055         return 0;
2056 }
2057
2058 #ifdef CONFIG_INET
2059 static struct iwl_tso_hdr_page *
2060 get_page_hdr(struct iwl_trans *trans, size_t len)
2061 {
2062         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2063         struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->tso_hdr_page);
2064
2065         if (!p->page)
2066                 goto alloc;
2067
2068         /* enough room on this page */
2069         if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE)
2070                 return p;
2071
2072         /* We don't have enough room on this page, get a new one. */
2073         __free_page(p->page);
2074
2075 alloc:
2076         p->page = alloc_page(GFP_ATOMIC);
2077         if (!p->page)
2078                 return NULL;
2079         p->pos = page_address(p->page);
2080         return p;
2081 }
2082
2083 static void iwl_compute_pseudo_hdr_csum(void *iph, struct tcphdr *tcph,
2084                                         bool ipv6, unsigned int len)
2085 {
2086         if (ipv6) {
2087                 struct ipv6hdr *iphv6 = iph;
2088
2089                 tcph->check = ~csum_ipv6_magic(&iphv6->saddr, &iphv6->daddr,
2090                                                len + tcph->doff * 4,
2091                                                IPPROTO_TCP, 0);
2092         } else {
2093                 struct iphdr *iphv4 = iph;
2094
2095                 ip_send_check(iphv4);
2096                 tcph->check = ~csum_tcpudp_magic(iphv4->saddr, iphv4->daddr,
2097                                                  len + tcph->doff * 4,
2098                                                  IPPROTO_TCP, 0);
2099         }
2100 }
2101
2102 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2103                                    struct iwl_txq *txq, u8 hdr_len,
2104                                    struct iwl_cmd_meta *out_meta,
2105                                    struct iwl_device_cmd *dev_cmd, u16 tb1_len)
2106 {
2107         struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload;
2108         struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
2109         struct ieee80211_hdr *hdr = (void *)skb->data;
2110         unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
2111         unsigned int mss = skb_shinfo(skb)->gso_size;
2112         u16 length, iv_len, amsdu_pad;
2113         u8 *start_hdr;
2114         struct iwl_tso_hdr_page *hdr_page;
2115         struct page **page_ptr;
2116         int ret;
2117         struct tso_t tso;
2118
2119         /* if the packet is protected, then it must be CCMP or GCMP */
2120         BUILD_BUG_ON(IEEE80211_CCMP_HDR_LEN != IEEE80211_GCMP_HDR_LEN);
2121         iv_len = ieee80211_has_protected(hdr->frame_control) ?
2122                 IEEE80211_CCMP_HDR_LEN : 0;
2123
2124         trace_iwlwifi_dev_tx(trans->dev, skb,
2125                              iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
2126                              trans_pcie->tfd_size,
2127                              &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
2128                              NULL, 0);
2129
2130         ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
2131         snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
2132         total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
2133         amsdu_pad = 0;
2134
2135         /* total amount of header we may need for this A-MSDU */
2136         hdr_room = DIV_ROUND_UP(total_len, mss) *
2137                 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
2138
2139         /* Our device supports 9 segments at most, it will fit in 1 page */
2140         hdr_page = get_page_hdr(trans, hdr_room);
2141         if (!hdr_page)
2142                 return -ENOMEM;
2143
2144         get_page(hdr_page->page);
2145         start_hdr = hdr_page->pos;
2146         page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
2147         *page_ptr = hdr_page->page;
2148         memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
2149         hdr_page->pos += iv_len;
2150
2151         /*
2152          * Pull the ieee80211 header + IV to be able to use TSO core,
2153          * we will restore it for the tx_status flow.
2154          */
2155         skb_pull(skb, hdr_len + iv_len);
2156
2157         /*
2158          * Remove the length of all the headers that we don't actually
2159          * have in the MPDU by themselves, but that we duplicate into
2160          * all the different MSDUs inside the A-MSDU.
2161          */
2162         le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen);
2163
2164         tso_start(skb, &tso);
2165
2166         while (total_len) {
2167                 /* this is the data left for this subframe */
2168                 unsigned int data_left =
2169                         min_t(unsigned int, mss, total_len);
2170                 struct sk_buff *csum_skb = NULL;
2171                 unsigned int hdr_tb_len;
2172                 dma_addr_t hdr_tb_phys;
2173                 struct tcphdr *tcph;
2174                 u8 *iph, *subf_hdrs_start = hdr_page->pos;
2175
2176                 total_len -= data_left;
2177
2178                 memset(hdr_page->pos, 0, amsdu_pad);
2179                 hdr_page->pos += amsdu_pad;
2180                 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
2181                                   data_left)) & 0x3;
2182                 ether_addr_copy(hdr_page->pos, ieee80211_get_DA(hdr));
2183                 hdr_page->pos += ETH_ALEN;
2184                 ether_addr_copy(hdr_page->pos, ieee80211_get_SA(hdr));
2185                 hdr_page->pos += ETH_ALEN;
2186
2187                 length = snap_ip_tcp_hdrlen + data_left;
2188                 *((__be16 *)hdr_page->pos) = cpu_to_be16(length);
2189                 hdr_page->pos += sizeof(length);
2190
2191                 /*
2192                  * This will copy the SNAP as well which will be considered
2193                  * as MAC header.
2194                  */
2195                 tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len);
2196                 iph = hdr_page->pos + 8;
2197                 tcph = (void *)(iph + ip_hdrlen);
2198
2199                 /* For testing on current hardware only */
2200                 if (trans_pcie->sw_csum_tx) {
2201                         csum_skb = alloc_skb(data_left + tcp_hdrlen(skb),
2202                                              GFP_ATOMIC);
2203                         if (!csum_skb) {
2204                                 ret = -ENOMEM;
2205                                 goto out_unmap;
2206                         }
2207
2208                         iwl_compute_pseudo_hdr_csum(iph, tcph,
2209                                                     skb->protocol ==
2210                                                         htons(ETH_P_IPV6),
2211                                                     data_left);
2212
2213                         memcpy(skb_put(csum_skb, tcp_hdrlen(skb)),
2214                                tcph, tcp_hdrlen(skb));
2215                         skb_set_transport_header(csum_skb, 0);
2216                         csum_skb->csum_start =
2217                                 (unsigned char *)tcp_hdr(csum_skb) -
2218                                                  csum_skb->head;
2219                 }
2220
2221                 hdr_page->pos += snap_ip_tcp_hdrlen;
2222
2223                 hdr_tb_len = hdr_page->pos - start_hdr;
2224                 hdr_tb_phys = dma_map_single(trans->dev, start_hdr,
2225                                              hdr_tb_len, DMA_TO_DEVICE);
2226                 if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys))) {
2227                         dev_kfree_skb(csum_skb);
2228                         ret = -EINVAL;
2229                         goto out_unmap;
2230                 }
2231                 iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
2232                                        hdr_tb_len, false);
2233                 trace_iwlwifi_dev_tx_tso_chunk(trans->dev, start_hdr,
2234                                                hdr_tb_len);
2235                 /* add this subframe's headers' length to the tx_cmd */
2236                 le16_add_cpu(&tx_cmd->len, hdr_page->pos - subf_hdrs_start);
2237
2238                 /* prepare the start_hdr for the next subframe */
2239                 start_hdr = hdr_page->pos;
2240
2241                 /* put the payload */
2242                 while (data_left) {
2243                         unsigned int size = min_t(unsigned int, tso.size,
2244                                                   data_left);
2245                         dma_addr_t tb_phys;
2246
2247                         if (trans_pcie->sw_csum_tx)
2248                                 memcpy(skb_put(csum_skb, size), tso.data, size);
2249
2250                         tb_phys = dma_map_single(trans->dev, tso.data,
2251                                                  size, DMA_TO_DEVICE);
2252                         if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
2253                                 dev_kfree_skb(csum_skb);
2254                                 ret = -EINVAL;
2255                                 goto out_unmap;
2256                         }
2257
2258                         iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2259                                                size, false);
2260                         trace_iwlwifi_dev_tx_tso_chunk(trans->dev, tso.data,
2261                                                        size);
2262
2263                         data_left -= size;
2264                         tso_build_data(skb, &tso, size);
2265                 }
2266
2267                 /* For testing on early hardware only */
2268                 if (trans_pcie->sw_csum_tx) {
2269                         __wsum csum;
2270
2271                         csum = skb_checksum(csum_skb,
2272                                             skb_checksum_start_offset(csum_skb),
2273                                             csum_skb->len -
2274                                             skb_checksum_start_offset(csum_skb),
2275                                             0);
2276                         dev_kfree_skb(csum_skb);
2277                         dma_sync_single_for_cpu(trans->dev, hdr_tb_phys,
2278                                                 hdr_tb_len, DMA_TO_DEVICE);
2279                         tcph->check = csum_fold(csum);
2280                         dma_sync_single_for_device(trans->dev, hdr_tb_phys,
2281                                                    hdr_tb_len, DMA_TO_DEVICE);
2282                 }
2283         }
2284
2285         /* re -add the WiFi header and IV */
2286         skb_push(skb, hdr_len + iv_len);
2287
2288         return 0;
2289
2290 out_unmap:
2291         iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr);
2292         return ret;
2293 }
2294 #else /* CONFIG_INET */
2295 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2296                                    struct iwl_txq *txq, u8 hdr_len,
2297                                    struct iwl_cmd_meta *out_meta,
2298                                    struct iwl_device_cmd *dev_cmd, u16 tb1_len)
2299 {
2300         /* No A-MSDU without CONFIG_INET */
2301         WARN_ON(1);
2302
2303         return -1;
2304 }
2305 #endif /* CONFIG_INET */
2306
2307 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
2308                       struct iwl_device_cmd *dev_cmd, int txq_id)
2309 {
2310         struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2311         struct ieee80211_hdr *hdr;
2312         struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
2313         struct iwl_cmd_meta *out_meta;
2314         struct iwl_txq *txq;
2315         dma_addr_t tb0_phys, tb1_phys, scratch_phys;
2316         void *tb1_addr;
2317         void *tfd;
2318         u16 len, tb1_len;
2319         bool wait_write_ptr;
2320         __le16 fc;
2321         u8 hdr_len;
2322         u16 wifi_seq;
2323         bool amsdu;
2324
2325         txq = &trans_pcie->txq[txq_id];
2326
2327         if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
2328                       "TX on unused queue %d\n", txq_id))
2329                 return -EINVAL;
2330
2331         if (unlikely(trans_pcie->sw_csum_tx &&
2332                      skb->ip_summed == CHECKSUM_PARTIAL)) {
2333                 int offs = skb_checksum_start_offset(skb);
2334                 int csum_offs = offs + skb->csum_offset;
2335                 __wsum csum;
2336
2337                 if (skb_ensure_writable(skb, csum_offs + sizeof(__sum16)))
2338                         return -1;
2339
2340                 csum = skb_checksum(skb, offs, skb->len - offs, 0);
2341                 *(__sum16 *)(skb->data + csum_offs) = csum_fold(csum);
2342
2343                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2344         }
2345
2346         if (skb_is_nonlinear(skb) &&
2347             skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
2348             __skb_linearize(skb))
2349                 return -ENOMEM;
2350
2351         /* mac80211 always puts the full header into the SKB's head,
2352          * so there's no need to check if it's readable there
2353          */
2354         hdr = (struct ieee80211_hdr *)skb->data;
2355         fc = hdr->frame_control;
2356         hdr_len = ieee80211_hdrlen(fc);
2357
2358         spin_lock(&txq->lock);
2359
2360         if (iwl_queue_space(txq) < txq->high_mark) {
2361                 iwl_stop_queue(trans, txq);
2362
2363                 /* don't put the packet on the ring, if there is no room */
2364                 if (unlikely(iwl_queue_space(txq) < 3)) {
2365                         struct iwl_device_cmd **dev_cmd_ptr;
2366
2367                         dev_cmd_ptr = (void *)((u8 *)skb->cb +
2368                                                trans_pcie->dev_cmd_offs);
2369
2370                         *dev_cmd_ptr = dev_cmd;
2371                         __skb_queue_tail(&txq->overflow_q, skb);
2372
2373                         spin_unlock(&txq->lock);
2374                         return 0;
2375                 }
2376         }
2377
2378         /* In AGG mode, the index in the ring must correspond to the WiFi
2379          * sequence number. This is a HW requirements to help the SCD to parse
2380          * the BA.
2381          * Check here that the packets are in the right place on the ring.
2382          */
2383         wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
2384         WARN_ONCE(txq->ampdu &&
2385                   (wifi_seq & 0xff) != txq->write_ptr,
2386                   "Q: %d WiFi Seq %d tfdNum %d",
2387                   txq_id, wifi_seq, txq->write_ptr);
2388
2389         /* Set up driver data for this TFD */
2390         txq->entries[txq->write_ptr].skb = skb;
2391         txq->entries[txq->write_ptr].cmd = dev_cmd;
2392
2393         dev_cmd->hdr.sequence =
2394                 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
2395                             INDEX_TO_SEQ(txq->write_ptr)));
2396
2397         tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
2398         scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
2399                        offsetof(struct iwl_tx_cmd, scratch);
2400
2401         tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
2402         tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
2403
2404         /* Set up first empty entry in queue's array of Tx/cmd buffers */
2405         out_meta = &txq->entries[txq->write_ptr].meta;
2406         out_meta->flags = 0;
2407
2408         /*
2409          * The second TB (tb1) points to the remainder of the TX command
2410          * and the 802.11 header - dword aligned size
2411          * (This calculation modifies the TX command, so do it before the
2412          * setup of the first TB)
2413          */
2414         len = sizeof(struct iwl_tx_cmd) + sizeof(struct iwl_cmd_header) +
2415               hdr_len - IWL_FIRST_TB_SIZE;
2416         /* do not align A-MSDU to dword as the subframe header aligns it */
2417         amsdu = ieee80211_is_data_qos(fc) &&
2418                 (*ieee80211_get_qos_ctl(hdr) &
2419                  IEEE80211_QOS_CTL_A_MSDU_PRESENT);
2420         if (trans_pcie->sw_csum_tx || !amsdu) {
2421                 tb1_len = ALIGN(len, 4);
2422                 /* Tell NIC about any 2-byte padding after MAC header */
2423                 if (tb1_len != len)
2424                         tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
2425         } else {
2426                 tb1_len = len;
2427         }
2428
2429         /*
2430          * The first TB points to bi-directional DMA data, we'll
2431          * memcpy the data into it later.
2432          */
2433         iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
2434                                IWL_FIRST_TB_SIZE, true);
2435
2436         /* there must be data left over for TB1 or this code must be changed */
2437         BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE);
2438
2439         /* map the data for TB1 */
2440         tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
2441         tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
2442         if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
2443                 goto out_err;
2444         iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, false);
2445
2446         if (amsdu) {
2447                 if (unlikely(iwl_fill_data_tbs_amsdu(trans, skb, txq, hdr_len,
2448                                                      out_meta, dev_cmd,
2449                                                      tb1_len)))
2450                         goto out_err;
2451         } else if (unlikely(iwl_fill_data_tbs(trans, skb, txq, hdr_len,
2452                                        out_meta, dev_cmd, tb1_len))) {
2453                 goto out_err;
2454         }
2455
2456         /* building the A-MSDU might have changed this data, so memcpy it now */
2457         memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
2458                IWL_FIRST_TB_SIZE);
2459
2460         tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
2461         /* Set up entry for this TFD in Tx byte-count array */
2462         iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
2463                                          iwl_pcie_tfd_get_num_tbs(trans, tfd));
2464
2465         wait_write_ptr = ieee80211_has_morefrags(fc);
2466
2467         /* start timer if queue currently empty */
2468         if (txq->read_ptr == txq->write_ptr) {
2469                 if (txq->wd_timeout) {
2470                         /*
2471                          * If the TXQ is active, then set the timer, if not,
2472                          * set the timer in remainder so that the timer will
2473                          * be armed with the right value when the station will
2474                          * wake up.
2475                          */
2476                         if (!txq->frozen)
2477                                 mod_timer(&txq->stuck_timer,
2478                                           jiffies + txq->wd_timeout);
2479                         else
2480                                 txq->frozen_expiry_remainder = txq->wd_timeout;
2481                 }
2482                 IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id);
2483                 iwl_trans_ref(trans);
2484         }
2485
2486         /* Tell device the write index *just past* this latest filled TFD */
2487         txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
2488         if (!wait_write_ptr)
2489                 iwl_pcie_txq_inc_wr_ptr(trans, txq);
2490
2491         /*
2492          * At this point the frame is "transmitted" successfully
2493          * and we will get a TX status notification eventually.
2494          */
2495         spin_unlock(&txq->lock);
2496         return 0;
2497 out_err:
2498         spin_unlock(&txq->lock);
2499         return -1;
2500 }