GNU Linux-libre 5.16.19-gnu
[releases.git] / drivers / crypto / marvell / octeontx2 / otx2_cptvf_reqmgr.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (C) 2020 Marvell. */
3
4 #include "otx2_cptvf.h"
5 #include "otx2_cpt_common.h"
6
7 /* SG list header size in bytes */
8 #define SG_LIST_HDR_SIZE        8
9
10 /* Default timeout when waiting for free pending entry in us */
11 #define CPT_PENTRY_TIMEOUT      1000
12 #define CPT_PENTRY_STEP         50
13
14 /* Default threshold for stopping and resuming sender requests */
15 #define CPT_IQ_STOP_MARGIN      128
16 #define CPT_IQ_RESUME_MARGIN    512
17
18 /* Default command timeout in seconds */
19 #define CPT_COMMAND_TIMEOUT     4
20 #define CPT_TIME_IN_RESET_COUNT 5
21
22 static void otx2_cpt_dump_sg_list(struct pci_dev *pdev,
23                                   struct otx2_cpt_req_info *req)
24 {
25         int i;
26
27         pr_debug("Gather list size %d\n", req->in_cnt);
28         for (i = 0; i < req->in_cnt; i++) {
29                 pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
30                          req->in[i].size, req->in[i].vptr,
31                          (void *) req->in[i].dma_addr);
32                 pr_debug("Buffer hexdump (%d bytes)\n",
33                          req->in[i].size);
34                 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
35                                      req->in[i].vptr, req->in[i].size, false);
36         }
37         pr_debug("Scatter list size %d\n", req->out_cnt);
38         for (i = 0; i < req->out_cnt; i++) {
39                 pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i,
40                          req->out[i].size, req->out[i].vptr,
41                          (void *) req->out[i].dma_addr);
42                 pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size);
43                 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1,
44                                      req->out[i].vptr, req->out[i].size, false);
45         }
46 }
47
48 static inline struct otx2_cpt_pending_entry *get_free_pending_entry(
49                                         struct otx2_cpt_pending_queue *q,
50                                         int qlen)
51 {
52         struct otx2_cpt_pending_entry *ent = NULL;
53
54         ent = &q->head[q->rear];
55         if (unlikely(ent->busy))
56                 return NULL;
57
58         q->rear++;
59         if (unlikely(q->rear == qlen))
60                 q->rear = 0;
61
62         return ent;
63 }
64
65 static inline u32 modulo_inc(u32 index, u32 length, u32 inc)
66 {
67         if (WARN_ON(inc > length))
68                 inc = length;
69
70         index += inc;
71         if (unlikely(index >= length))
72                 index -= length;
73
74         return index;
75 }
76
77 static inline void free_pentry(struct otx2_cpt_pending_entry *pentry)
78 {
79         pentry->completion_addr = NULL;
80         pentry->info = NULL;
81         pentry->callback = NULL;
82         pentry->areq = NULL;
83         pentry->resume_sender = false;
84         pentry->busy = false;
85 }
86
87 static inline int setup_sgio_components(struct pci_dev *pdev,
88                                         struct otx2_cpt_buf_ptr *list,
89                                         int buf_count, u8 *buffer)
90 {
91         struct otx2_cpt_sglist_component *sg_ptr = NULL;
92         int ret = 0, i, j;
93         int components;
94
95         if (unlikely(!list)) {
96                 dev_err(&pdev->dev, "Input list pointer is NULL\n");
97                 return -EFAULT;
98         }
99
100         for (i = 0; i < buf_count; i++) {
101                 if (unlikely(!list[i].vptr))
102                         continue;
103                 list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr,
104                                                   list[i].size,
105                                                   DMA_BIDIRECTIONAL);
106                 if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) {
107                         dev_err(&pdev->dev, "Dma mapping failed\n");
108                         ret = -EIO;
109                         goto sg_cleanup;
110                 }
111         }
112         components = buf_count / 4;
113         sg_ptr = (struct otx2_cpt_sglist_component *)buffer;
114         for (i = 0; i < components; i++) {
115                 sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size);
116                 sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size);
117                 sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size);
118                 sg_ptr->len3 = cpu_to_be16(list[i * 4 + 3].size);
119                 sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
120                 sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
121                 sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
122                 sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr);
123                 sg_ptr++;
124         }
125         components = buf_count % 4;
126
127         switch (components) {
128         case 3:
129                 sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size);
130                 sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr);
131                 fallthrough;
132         case 2:
133                 sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size);
134                 sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr);
135                 fallthrough;
136         case 1:
137                 sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size);
138                 sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr);
139                 break;
140         default:
141                 break;
142         }
143         return ret;
144
145 sg_cleanup:
146         for (j = 0; j < i; j++) {
147                 if (list[j].dma_addr) {
148                         dma_unmap_single(&pdev->dev, list[j].dma_addr,
149                                          list[j].size, DMA_BIDIRECTIONAL);
150                 }
151
152                 list[j].dma_addr = 0;
153         }
154         return ret;
155 }
156
157 static inline struct otx2_cpt_inst_info *info_create(struct pci_dev *pdev,
158                                               struct otx2_cpt_req_info *req,
159                                               gfp_t gfp)
160 {
161         int align = OTX2_CPT_DMA_MINALIGN;
162         struct otx2_cpt_inst_info *info;
163         u32 dlen, align_dlen, info_len;
164         u16 g_sz_bytes, s_sz_bytes;
165         u32 total_mem_len;
166
167         if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT ||
168                      req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) {
169                 dev_err(&pdev->dev, "Error too many sg components\n");
170                 return NULL;
171         }
172
173         g_sz_bytes = ((req->in_cnt + 3) / 4) *
174                       sizeof(struct otx2_cpt_sglist_component);
175         s_sz_bytes = ((req->out_cnt + 3) / 4) *
176                       sizeof(struct otx2_cpt_sglist_component);
177
178         dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
179         align_dlen = ALIGN(dlen, align);
180         info_len = ALIGN(sizeof(*info), align);
181         total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
182
183         info = kzalloc(total_mem_len, gfp);
184         if (unlikely(!info))
185                 return NULL;
186
187         info->dlen = dlen;
188         info->in_buffer = (u8 *)info + info_len;
189
190         ((u16 *)info->in_buffer)[0] = req->out_cnt;
191         ((u16 *)info->in_buffer)[1] = req->in_cnt;
192         ((u16 *)info->in_buffer)[2] = 0;
193         ((u16 *)info->in_buffer)[3] = 0;
194         cpu_to_be64s((u64 *)info->in_buffer);
195
196         /* Setup gather (input) components */
197         if (setup_sgio_components(pdev, req->in, req->in_cnt,
198                                   &info->in_buffer[8])) {
199                 dev_err(&pdev->dev, "Failed to setup gather list\n");
200                 goto destroy_info;
201         }
202
203         if (setup_sgio_components(pdev, req->out, req->out_cnt,
204                                   &info->in_buffer[8 + g_sz_bytes])) {
205                 dev_err(&pdev->dev, "Failed to setup scatter list\n");
206                 goto destroy_info;
207         }
208
209         info->dma_len = total_mem_len - info_len;
210         info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer,
211                                           info->dma_len, DMA_BIDIRECTIONAL);
212         if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) {
213                 dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n");
214                 goto destroy_info;
215         }
216         /*
217          * Get buffer for union otx2_cpt_res_s response
218          * structure and its physical address
219          */
220         info->completion_addr = info->in_buffer + align_dlen;
221         info->comp_baddr = info->dptr_baddr + align_dlen;
222
223         return info;
224
225 destroy_info:
226         otx2_cpt_info_destroy(pdev, info);
227         return NULL;
228 }
229
230 static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
231                            struct otx2_cpt_pending_queue *pqueue,
232                            struct otx2_cptlf_info *lf)
233 {
234         struct otx2_cptvf_request *cpt_req = &req->req;
235         struct otx2_cpt_pending_entry *pentry = NULL;
236         union otx2_cpt_ctrl_info *ctrl = &req->ctrl;
237         struct otx2_cpt_inst_info *info = NULL;
238         union otx2_cpt_res_s *result = NULL;
239         struct otx2_cpt_iq_command iq_cmd;
240         union otx2_cpt_inst_s cptinst;
241         int retry, ret = 0;
242         u8 resume_sender;
243         gfp_t gfp;
244
245         gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
246                                                               GFP_ATOMIC;
247         if (unlikely(!otx2_cptlf_started(lf->lfs)))
248                 return -ENODEV;
249
250         info = info_create(pdev, req, gfp);
251         if (unlikely(!info)) {
252                 dev_err(&pdev->dev, "Setting up cpt inst info failed");
253                 return -ENOMEM;
254         }
255         cpt_req->dlen = info->dlen;
256
257         result = info->completion_addr;
258         result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT;
259
260         spin_lock_bh(&pqueue->lock);
261         pentry = get_free_pending_entry(pqueue, pqueue->qlen);
262         retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP;
263         while (unlikely(!pentry) && retry--) {
264                 spin_unlock_bh(&pqueue->lock);
265                 udelay(CPT_PENTRY_STEP);
266                 spin_lock_bh(&pqueue->lock);
267                 pentry = get_free_pending_entry(pqueue, pqueue->qlen);
268         }
269
270         if (unlikely(!pentry)) {
271                 ret = -ENOSPC;
272                 goto destroy_info;
273         }
274
275         /*
276          * Check if we are close to filling in entire pending queue,
277          * if so then tell the sender to stop/sleep by returning -EBUSY
278          * We do it only for context which can sleep (GFP_KERNEL)
279          */
280         if (gfp == GFP_KERNEL &&
281             pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) {
282                 pentry->resume_sender = true;
283         } else
284                 pentry->resume_sender = false;
285         resume_sender = pentry->resume_sender;
286         pqueue->pending_count++;
287
288         pentry->completion_addr = info->completion_addr;
289         pentry->info = info;
290         pentry->callback = req->callback;
291         pentry->areq = req->areq;
292         pentry->busy = true;
293         info->pentry = pentry;
294         info->time_in = jiffies;
295         info->req = req;
296
297         /* Fill in the command */
298         iq_cmd.cmd.u = 0;
299         iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags);
300         iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1);
301         iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2);
302         iq_cmd.cmd.s.dlen   = cpu_to_be16(cpt_req->dlen);
303
304         /* 64-bit swap for microcode data reads, not needed for addresses*/
305         cpu_to_be64s(&iq_cmd.cmd.u);
306         iq_cmd.dptr = info->dptr_baddr;
307         iq_cmd.rptr = 0;
308         iq_cmd.cptr.u = 0;
309         iq_cmd.cptr.s.grp = ctrl->s.grp;
310
311         /* Fill in the CPT_INST_S type command for HW interpretation */
312         otx2_cpt_fill_inst(&cptinst, &iq_cmd, info->comp_baddr);
313
314         /* Print debug info if enabled */
315         otx2_cpt_dump_sg_list(pdev, req);
316         pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX2_CPT_INST_SIZE);
317         print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX2_CPT_INST_SIZE, false);
318         pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen);
319         print_hex_dump_debug("", 0, 16, 1, info->in_buffer,
320                              cpt_req->dlen, false);
321
322         /* Send CPT command */
323         lf->lfs->ops->send_cmd(&cptinst, 1, lf);
324
325         /*
326          * We allocate and prepare pending queue entry in critical section
327          * together with submitting CPT instruction to CPT instruction queue
328          * to make sure that order of CPT requests is the same in both
329          * pending and instruction queues
330          */
331         spin_unlock_bh(&pqueue->lock);
332
333         ret = resume_sender ? -EBUSY : -EINPROGRESS;
334         return ret;
335
336 destroy_info:
337         spin_unlock_bh(&pqueue->lock);
338         otx2_cpt_info_destroy(pdev, info);
339         return ret;
340 }
341
342 int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
343                         int cpu_num)
344 {
345         struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev);
346         struct otx2_cptlfs_info *lfs = &cptvf->lfs;
347
348         return process_request(lfs->pdev, req, &lfs->lf[cpu_num].pqueue,
349                                &lfs->lf[cpu_num]);
350 }
351
352 static int cpt_process_ccode(struct otx2_cptlfs_info *lfs,
353                              union otx2_cpt_res_s *cpt_status,
354                              struct otx2_cpt_inst_info *info,
355                              u32 *res_code)
356 {
357         u8 uc_ccode = lfs->ops->cpt_get_uc_compcode(cpt_status);
358         u8 ccode = lfs->ops->cpt_get_compcode(cpt_status);
359         struct pci_dev *pdev = lfs->pdev;
360
361         switch (ccode) {
362         case OTX2_CPT_COMP_E_FAULT:
363                 dev_err(&pdev->dev,
364                         "Request failed with DMA fault\n");
365                 otx2_cpt_dump_sg_list(pdev, info->req);
366                 break;
367
368         case OTX2_CPT_COMP_E_HWERR:
369                 dev_err(&pdev->dev,
370                         "Request failed with hardware error\n");
371                 otx2_cpt_dump_sg_list(pdev, info->req);
372                 break;
373
374         case OTX2_CPT_COMP_E_INSTERR:
375                 dev_err(&pdev->dev,
376                         "Request failed with instruction error\n");
377                 otx2_cpt_dump_sg_list(pdev, info->req);
378                 break;
379
380         case OTX2_CPT_COMP_E_NOTDONE:
381                 /* check for timeout */
382                 if (time_after_eq(jiffies, info->time_in +
383                                   CPT_COMMAND_TIMEOUT * HZ))
384                         dev_warn(&pdev->dev,
385                                  "Request timed out 0x%p", info->req);
386                 else if (info->extra_time < CPT_TIME_IN_RESET_COUNT) {
387                         info->time_in = jiffies;
388                         info->extra_time++;
389                 }
390                 return 1;
391
392         case OTX2_CPT_COMP_E_GOOD:
393         case OTX2_CPT_COMP_E_WARN:
394                 /*
395                  * Check microcode completion code, it is only valid
396                  * when completion code is CPT_COMP_E::GOOD
397                  */
398                 if (uc_ccode != OTX2_CPT_UCC_SUCCESS) {
399                         /*
400                          * If requested hmac is truncated and ucode returns
401                          * s/g write length error then we report success
402                          * because ucode writes as many bytes of calculated
403                          * hmac as available in gather buffer and reports
404                          * s/g write length error if number of bytes in gather
405                          * buffer is less than full hmac size.
406                          */
407                         if (info->req->is_trunc_hmac &&
408                             uc_ccode == OTX2_CPT_UCC_SG_WRITE_LENGTH) {
409                                 *res_code = 0;
410                                 break;
411                         }
412
413                         dev_err(&pdev->dev,
414                                 "Request failed with software error code 0x%x\n",
415                                 cpt_status->s.uc_compcode);
416                         otx2_cpt_dump_sg_list(pdev, info->req);
417                         break;
418                 }
419                 /* Request has been processed with success */
420                 *res_code = 0;
421                 break;
422
423         default:
424                 dev_err(&pdev->dev,
425                         "Request returned invalid status %d\n", ccode);
426                 break;
427         }
428         return 0;
429 }
430
431 static inline void process_pending_queue(struct otx2_cptlfs_info *lfs,
432                                          struct otx2_cpt_pending_queue *pqueue)
433 {
434         struct otx2_cpt_pending_entry *resume_pentry = NULL;
435         void (*callback)(int status, void *arg, void *req);
436         struct otx2_cpt_pending_entry *pentry = NULL;
437         union otx2_cpt_res_s *cpt_status = NULL;
438         struct otx2_cpt_inst_info *info = NULL;
439         struct otx2_cpt_req_info *req = NULL;
440         struct crypto_async_request *areq;
441         struct pci_dev *pdev = lfs->pdev;
442         u32 res_code, resume_index;
443
444         while (1) {
445                 spin_lock_bh(&pqueue->lock);
446                 pentry = &pqueue->head[pqueue->front];
447
448                 if (WARN_ON(!pentry)) {
449                         spin_unlock_bh(&pqueue->lock);
450                         break;
451                 }
452
453                 res_code = -EINVAL;
454                 if (unlikely(!pentry->busy)) {
455                         spin_unlock_bh(&pqueue->lock);
456                         break;
457                 }
458
459                 if (unlikely(!pentry->callback)) {
460                         dev_err(&pdev->dev, "Callback NULL\n");
461                         goto process_pentry;
462                 }
463
464                 info = pentry->info;
465                 if (unlikely(!info)) {
466                         dev_err(&pdev->dev, "Pending entry post arg NULL\n");
467                         goto process_pentry;
468                 }
469
470                 req = info->req;
471                 if (unlikely(!req)) {
472                         dev_err(&pdev->dev, "Request NULL\n");
473                         goto process_pentry;
474                 }
475
476                 cpt_status = pentry->completion_addr;
477                 if (unlikely(!cpt_status)) {
478                         dev_err(&pdev->dev, "Completion address NULL\n");
479                         goto process_pentry;
480                 }
481
482                 if (cpt_process_ccode(lfs, cpt_status, info, &res_code)) {
483                         spin_unlock_bh(&pqueue->lock);
484                         return;
485                 }
486                 info->pdev = pdev;
487
488 process_pentry:
489                 /*
490                  * Check if we should inform sending side to resume
491                  * We do it CPT_IQ_RESUME_MARGIN elements in advance before
492                  * pending queue becomes empty
493                  */
494                 resume_index = modulo_inc(pqueue->front, pqueue->qlen,
495                                           CPT_IQ_RESUME_MARGIN);
496                 resume_pentry = &pqueue->head[resume_index];
497                 if (resume_pentry &&
498                     resume_pentry->resume_sender) {
499                         resume_pentry->resume_sender = false;
500                         callback = resume_pentry->callback;
501                         areq = resume_pentry->areq;
502
503                         if (callback) {
504                                 spin_unlock_bh(&pqueue->lock);
505
506                                 /*
507                                  * EINPROGRESS is an indication for sending
508                                  * side that it can resume sending requests
509                                  */
510                                 callback(-EINPROGRESS, areq, info);
511                                 spin_lock_bh(&pqueue->lock);
512                         }
513                 }
514
515                 callback = pentry->callback;
516                 areq = pentry->areq;
517                 free_pentry(pentry);
518
519                 pqueue->pending_count--;
520                 pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1);
521                 spin_unlock_bh(&pqueue->lock);
522
523                 /*
524                  * Call callback after current pending entry has been
525                  * processed, we don't do it if the callback pointer is
526                  * invalid.
527                  */
528                 if (callback)
529                         callback(res_code, areq, info);
530         }
531 }
532
533 void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe)
534 {
535         process_pending_queue(wqe->lfs,
536                               &wqe->lfs->lf[wqe->lf_num].pqueue);
537 }
538
539 int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev)
540 {
541         struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev);
542
543         return cptvf->lfs.kcrypto_eng_grp_num;
544 }