1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
4 #include <linux/vhost_types.h>
5 #include <linux/vdpa.h>
7 #include <linux/string.h>
8 #include <linux/mlx5/qp.h>
11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
16 _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
20 static int get_octo_len(u64 len, int page_shift)
22 u64 page_size = 1ULL << page_shift;
25 npages = ALIGN(len, page_size) >> page_shift;
26 return (npages + 1) / 2;
29 static void mlx5_set_access_mode(void *mkc, int mode)
31 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
32 MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
37 struct scatterlist *sg;
44 for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
45 for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
47 nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
48 mtt[j++] = cpu_to_be64(dma_addr);
52 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
59 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
60 in = kvzalloc(inlen, GFP_KERNEL);
64 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
65 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
66 MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
67 MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
68 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
69 MLX5_SET(mkc, mkc, qpn, 0xffffff);
70 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
71 MLX5_SET64(mkc, mkc, start_addr, mr->offset);
72 MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
73 MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
74 MLX5_SET(mkc, mkc, translations_octword_size,
75 get_octo_len(mr->end - mr->start, mr->log_size));
76 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
77 get_octo_len(mr->end - mr->start, mr->log_size));
78 populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
79 err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
82 mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
89 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
91 mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
94 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
96 return max_t(u64, map->start, mr->start);
99 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
101 return min_t(u64, map->last + 1, mr->end);
104 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
106 return map_end(map, mr) - map_start(map, mr);
109 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
110 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
112 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
114 struct mlx5_vdpa_direct_mr *s;
116 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
118 return MLX5_VDPA_INVALID_START_ADDR;
123 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
125 struct mlx5_vdpa_direct_mr *s;
126 struct mlx5_vdpa_direct_mr *e;
128 s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
130 return MLX5_VDPA_INVALID_LEN;
132 e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
134 return e->end - s->start;
137 #define LOG_MAX_KLM_SIZE 30
138 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
140 static u32 klm_bcount(u64 size)
145 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
147 struct mlx5_vdpa_direct_mr *dmr;
148 struct mlx5_klm *klmarr;
149 struct mlx5_klm *klm;
154 klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
156 list_for_each_entry(dmr, &mkey->head, list) {
164 if (preve == dmr->start) {
165 klm->key = cpu_to_be32(dmr->mr);
166 klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
169 klm->key = cpu_to_be32(mvdev->res.null_mkey);
170 klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
177 static int klm_byte_size(int nklms)
179 return 16 * ALIGN(nklms, 4);
182 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
191 start = indir_start_addr(mr);
193 if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
196 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
197 in = kzalloc(inlen, GFP_KERNEL);
201 MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
202 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
203 MLX5_SET(mkc, mkc, lw, 1);
204 MLX5_SET(mkc, mkc, lr, 1);
205 mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
206 MLX5_SET(mkc, mkc, qpn, 0xffffff);
207 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
208 MLX5_SET64(mkc, mkc, start_addr, start);
209 MLX5_SET64(mkc, mkc, len, len);
210 MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
211 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
212 fill_indir(mvdev, mr, in);
213 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
218 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
220 mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
223 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
224 struct vhost_iotlb *iotlb)
226 struct vhost_iotlb_map *map;
227 unsigned long lgcd = 0;
237 struct scatterlist *sg;
238 struct device *dma = mvdev->vdev.dma_dev;
240 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
241 map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
242 size = maplen(map, mr);
243 lgcd = gcd(lgcd, size);
246 log_entity_size = ilog2(lgcd);
248 sglen = 1 << log_entity_size;
249 nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
251 err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
255 sg = mr->sg_head.sgl;
256 for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
257 map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
258 paend = map->addr + maplen(map, mr);
259 for (pa = map->addr; pa < paend; pa += sglen) {
260 pg = pfn_to_page(__phys_to_pfn(pa));
262 mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
263 map->start, map->last + 1);
267 sg_set_page(sg, pg, sglen, 0);
274 mr->log_size = log_entity_size;
276 mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
282 err = create_direct_mr(mvdev, mr);
289 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
291 sg_free_table(&mr->sg_head);
295 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
297 struct device *dma = mvdev->vdev.dma_dev;
299 destroy_direct_mr(mvdev, mr);
300 dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
301 sg_free_table(&mr->sg_head);
304 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
305 struct mlx5_vdpa_mr *mr,
309 struct vhost_iotlb *iotlb)
311 struct mlx5_vdpa_direct_mr *dmr;
312 struct mlx5_vdpa_direct_mr *n;
320 sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
321 dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
330 err = map_direct_mr(mvdev, dmr, iotlb);
336 list_add_tail(&dmr->list, &tmp);
342 list_splice_tail(&tmp, &mr->head);
346 list_for_each_entry_safe(dmr, n, &mr->head, list) {
347 list_del_init(&dmr->list);
348 unmap_direct_mr(mvdev, dmr);
354 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
355 * merging mergeable maps, and create direct memory keys that provide the
356 * device access to memory. The direct mkeys are then referred to by the
357 * indirect memory key that provides access to the enitre address space given
360 static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
361 struct mlx5_vdpa_mr *mr,
362 struct vhost_iotlb *iotlb)
364 struct mlx5_vdpa_direct_mr *dmr;
365 struct mlx5_vdpa_direct_mr *n;
366 struct vhost_iotlb_map *map;
375 INIT_LIST_HEAD(&mr->head);
376 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
377 map = vhost_iotlb_itree_next(map, start, last)) {
379 if (pe == map->start && pperm == map->perm) {
383 if (pe < map->start) {
384 /* We have a hole in the map. Check how
385 * many null keys are required to fill it.
387 nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
389 mr->num_klms += nnuls;
391 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
400 err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
404 /* Create the memory key that defines the guests's address space. This
405 * memory key refers to the direct keys that contain the MTT
408 err = create_indirect_key(mvdev, mr);
416 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
417 list_del_init(&dmr->list);
418 unmap_direct_mr(mvdev, dmr);
424 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
426 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
431 in = kzalloc(inlen, GFP_KERNEL);
435 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
437 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
438 MLX5_SET(mkc, mkc, length64, 1);
439 MLX5_SET(mkc, mkc, lw, 1);
440 MLX5_SET(mkc, mkc, lr, 1);
441 MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
442 MLX5_SET(mkc, mkc, qpn, 0xffffff);
444 err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
452 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
454 mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
457 static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src)
459 struct vhost_iotlb_map *map;
460 u64 start = 0, last = ULLONG_MAX;
467 err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW);
471 for (map = vhost_iotlb_itree_first(src, start, last); map;
472 map = vhost_iotlb_itree_next(map, start, last)) {
473 err = vhost_iotlb_add_range(dst, map->start, map->last,
474 map->addr, map->perm);
481 static void prune_iotlb(struct vhost_iotlb *iotlb)
483 vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX);
486 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
488 struct mlx5_vdpa_direct_mr *dmr;
489 struct mlx5_vdpa_direct_mr *n;
491 destroy_indirect_key(mvdev, mr);
492 list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
493 list_del_init(&dmr->list);
494 unmap_direct_mr(mvdev, dmr);
499 static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
505 destroy_user_mr(mvdev, mr);
507 destroy_dma_mr(mvdev, mr);
509 vhost_iotlb_free(mr->iotlb);
511 list_del(&mr->mr_list);
516 static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
517 struct mlx5_vdpa_mr *mr)
522 if (refcount_dec_and_test(&mr->refcount))
523 _mlx5_vdpa_destroy_mr(mvdev, mr);
526 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
527 struct mlx5_vdpa_mr *mr)
529 mutex_lock(&mvdev->mr_mtx);
530 _mlx5_vdpa_put_mr(mvdev, mr);
531 mutex_unlock(&mvdev->mr_mtx);
534 static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
535 struct mlx5_vdpa_mr *mr)
540 refcount_inc(&mr->refcount);
543 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
544 struct mlx5_vdpa_mr *mr)
546 mutex_lock(&mvdev->mr_mtx);
547 _mlx5_vdpa_get_mr(mvdev, mr);
548 mutex_unlock(&mvdev->mr_mtx);
551 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
552 struct mlx5_vdpa_mr *new_mr,
555 struct mlx5_vdpa_mr *old_mr = mvdev->mr[asid];
557 mutex_lock(&mvdev->mr_mtx);
559 _mlx5_vdpa_put_mr(mvdev, old_mr);
560 mvdev->mr[asid] = new_mr;
562 mutex_unlock(&mvdev->mr_mtx);
565 static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
567 struct mlx5_vdpa_mr *mr;
569 mutex_lock(&mvdev->mr_mtx);
571 list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) {
573 mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
574 "mr: %p, mkey: 0x%x, refcount: %u\n",
575 mr, mr->mkey, refcount_read(&mr->refcount));
578 mutex_unlock(&mvdev->mr_mtx);
582 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
584 for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
585 mlx5_vdpa_update_mr(mvdev, NULL, i);
587 prune_iotlb(mvdev->cvq.iotlb);
589 mlx5_vdpa_show_mr_leaks(mvdev);
592 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
593 struct mlx5_vdpa_mr *mr,
594 struct vhost_iotlb *iotlb)
599 err = create_user_mr(mvdev, mr, iotlb);
601 err = create_dma_mr(mvdev, mr);
606 mr->iotlb = vhost_iotlb_alloc(0, 0);
612 err = dup_iotlb(mr->iotlb, iotlb);
616 list_add_tail(&mr->mr_list, &mvdev->mr_list_head);
621 vhost_iotlb_free(mr->iotlb);
625 destroy_user_mr(mvdev, mr);
627 destroy_dma_mr(mvdev, mr);
632 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
633 struct vhost_iotlb *iotlb)
635 struct mlx5_vdpa_mr *mr;
638 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
640 return ERR_PTR(-ENOMEM);
642 mutex_lock(&mvdev->mr_mtx);
643 err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
644 mutex_unlock(&mvdev->mr_mtx);
649 refcount_set(&mr->refcount, 1);
658 int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
659 struct vhost_iotlb *iotlb,
664 if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
667 spin_lock(&mvdev->cvq.iommu_lock);
669 prune_iotlb(mvdev->cvq.iotlb);
670 err = dup_iotlb(mvdev->cvq.iotlb, iotlb);
672 spin_unlock(&mvdev->cvq.iommu_lock);
677 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev)
679 struct mlx5_vdpa_mr *mr;
681 mr = mlx5_vdpa_create_mr(mvdev, NULL);
685 mlx5_vdpa_update_mr(mvdev, mr, 0);
687 return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0);
690 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
692 if (asid >= MLX5_VDPA_NUM_AS)
695 mlx5_vdpa_update_mr(mvdev, NULL, asid);
697 if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
698 if (mlx5_vdpa_create_dma_mr(mvdev))
699 mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
701 mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid);