GNU Linux-libre 5.10.217-gnu1
[releases.git] / arch / mips / kvm / mmu.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * KVM/MIPS MMU handling in the KVM module.
7  *
8  * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
9  * Authors: Sanjay Lal <sanjayl@kymasys.com>
10  */
11
12 #include <linux/highmem.h>
13 #include <linux/kvm_host.h>
14 #include <linux/uaccess.h>
15 #include <asm/mmu_context.h>
16 #include <asm/pgalloc.h>
17
18 /*
19  * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels
20  * for which pages need to be cached.
21  */
22 #if defined(__PAGETABLE_PMD_FOLDED)
23 #define KVM_MMU_CACHE_MIN_PAGES 1
24 #else
25 #define KVM_MMU_CACHE_MIN_PAGES 2
26 #endif
27
28 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
29 {
30         kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
31 }
32
33 /**
34  * kvm_pgd_init() - Initialise KVM GPA page directory.
35  * @page:       Pointer to page directory (PGD) for KVM GPA.
36  *
37  * Initialise a KVM GPA page directory with pointers to the invalid table, i.e.
38  * representing no mappings. This is similar to pgd_init(), however it
39  * initialises all the page directory pointers, not just the ones corresponding
40  * to the userland address space (since it is for the guest physical address
41  * space rather than a virtual address space).
42  */
43 static void kvm_pgd_init(void *page)
44 {
45         unsigned long *p, *end;
46         unsigned long entry;
47
48 #ifdef __PAGETABLE_PMD_FOLDED
49         entry = (unsigned long)invalid_pte_table;
50 #else
51         entry = (unsigned long)invalid_pmd_table;
52 #endif
53
54         p = (unsigned long *)page;
55         end = p + PTRS_PER_PGD;
56
57         do {
58                 p[0] = entry;
59                 p[1] = entry;
60                 p[2] = entry;
61                 p[3] = entry;
62                 p[4] = entry;
63                 p += 8;
64                 p[-3] = entry;
65                 p[-2] = entry;
66                 p[-1] = entry;
67         } while (p != end);
68 }
69
70 /**
71  * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
72  *
73  * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
74  * to host physical page mappings.
75  *
76  * Returns:     Pointer to new KVM GPA page directory.
77  *              NULL on allocation failure.
78  */
79 pgd_t *kvm_pgd_alloc(void)
80 {
81         pgd_t *ret;
82
83         ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER);
84         if (ret)
85                 kvm_pgd_init(ret);
86
87         return ret;
88 }
89
90 /**
91  * kvm_mips_walk_pgd() - Walk page table with optional allocation.
92  * @pgd:        Page directory pointer.
93  * @addr:       Address to index page table using.
94  * @cache:      MMU page cache to allocate new page tables from, or NULL.
95  *
96  * Walk the page tables pointed to by @pgd to find the PTE corresponding to the
97  * address @addr. If page tables don't exist for @addr, they will be created
98  * from the MMU cache if @cache is not NULL.
99  *
100  * Returns:     Pointer to pte_t corresponding to @addr.
101  *              NULL if a page table doesn't exist for @addr and !@cache.
102  *              NULL if a page table allocation failed.
103  */
104 static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
105                                 unsigned long addr)
106 {
107         p4d_t *p4d;
108         pud_t *pud;
109         pmd_t *pmd;
110
111         pgd += pgd_index(addr);
112         if (pgd_none(*pgd)) {
113                 /* Not used on MIPS yet */
114                 BUG();
115                 return NULL;
116         }
117         p4d = p4d_offset(pgd, addr);
118         pud = pud_offset(p4d, addr);
119         if (pud_none(*pud)) {
120                 pmd_t *new_pmd;
121
122                 if (!cache)
123                         return NULL;
124                 new_pmd = kvm_mmu_memory_cache_alloc(cache);
125                 pmd_init((unsigned long)new_pmd,
126                          (unsigned long)invalid_pte_table);
127                 pud_populate(NULL, pud, new_pmd);
128         }
129         pmd = pmd_offset(pud, addr);
130         if (pmd_none(*pmd)) {
131                 pte_t *new_pte;
132
133                 if (!cache)
134                         return NULL;
135                 new_pte = kvm_mmu_memory_cache_alloc(cache);
136                 clear_page(new_pte);
137                 pmd_populate_kernel(NULL, pmd, new_pte);
138         }
139         return pte_offset_kernel(pmd, addr);
140 }
141
142 /* Caller must hold kvm->mm_lock */
143 static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm,
144                                    struct kvm_mmu_memory_cache *cache,
145                                    unsigned long addr)
146 {
147         return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr);
148 }
149
150 /*
151  * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}.
152  * Flush a range of guest physical address space from the VM's GPA page tables.
153  */
154
155 static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa,
156                                    unsigned long end_gpa)
157 {
158         int i_min = pte_index(start_gpa);
159         int i_max = pte_index(end_gpa);
160         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
161         int i;
162
163         for (i = i_min; i <= i_max; ++i) {
164                 if (!pte_present(pte[i]))
165                         continue;
166
167                 set_pte(pte + i, __pte(0));
168         }
169         return safe_to_remove;
170 }
171
172 static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa,
173                                    unsigned long end_gpa)
174 {
175         pte_t *pte;
176         unsigned long end = ~0ul;
177         int i_min = pmd_index(start_gpa);
178         int i_max = pmd_index(end_gpa);
179         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
180         int i;
181
182         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
183                 if (!pmd_present(pmd[i]))
184                         continue;
185
186                 pte = pte_offset_kernel(pmd + i, 0);
187                 if (i == i_max)
188                         end = end_gpa;
189
190                 if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) {
191                         pmd_clear(pmd + i);
192                         pte_free_kernel(NULL, pte);
193                 } else {
194                         safe_to_remove = false;
195                 }
196         }
197         return safe_to_remove;
198 }
199
200 static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa,
201                                    unsigned long end_gpa)
202 {
203         pmd_t *pmd;
204         unsigned long end = ~0ul;
205         int i_min = pud_index(start_gpa);
206         int i_max = pud_index(end_gpa);
207         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
208         int i;
209
210         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
211                 if (!pud_present(pud[i]))
212                         continue;
213
214                 pmd = pmd_offset(pud + i, 0);
215                 if (i == i_max)
216                         end = end_gpa;
217
218                 if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) {
219                         pud_clear(pud + i);
220                         pmd_free(NULL, pmd);
221                 } else {
222                         safe_to_remove = false;
223                 }
224         }
225         return safe_to_remove;
226 }
227
228 static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa,
229                                    unsigned long end_gpa)
230 {
231         p4d_t *p4d;
232         pud_t *pud;
233         unsigned long end = ~0ul;
234         int i_min = pgd_index(start_gpa);
235         int i_max = pgd_index(end_gpa);
236         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
237         int i;
238
239         for (i = i_min; i <= i_max; ++i, start_gpa = 0) {
240                 if (!pgd_present(pgd[i]))
241                         continue;
242
243                 p4d = p4d_offset(pgd, 0);
244                 pud = pud_offset(p4d + i, 0);
245                 if (i == i_max)
246                         end = end_gpa;
247
248                 if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) {
249                         pgd_clear(pgd + i);
250                         pud_free(NULL, pud);
251                 } else {
252                         safe_to_remove = false;
253                 }
254         }
255         return safe_to_remove;
256 }
257
258 /**
259  * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses.
260  * @kvm:        KVM pointer.
261  * @start_gfn:  Guest frame number of first page in GPA range to flush.
262  * @end_gfn:    Guest frame number of last page in GPA range to flush.
263  *
264  * Flushes a range of GPA mappings from the GPA page tables.
265  *
266  * The caller must hold the @kvm->mmu_lock spinlock.
267  *
268  * Returns:     Whether its safe to remove the top level page directory because
269  *              all lower levels have been removed.
270  */
271 bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
272 {
273         return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd,
274                                       start_gfn << PAGE_SHIFT,
275                                       end_gfn << PAGE_SHIFT);
276 }
277
278 #define BUILD_PTE_RANGE_OP(name, op)                                    \
279 static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start,       \
280                                  unsigned long end)                     \
281 {                                                                       \
282         int ret = 0;                                                    \
283         int i_min = pte_index(start);                           \
284         int i_max = pte_index(end);                                     \
285         int i;                                                          \
286         pte_t old, new;                                                 \
287                                                                         \
288         for (i = i_min; i <= i_max; ++i) {                              \
289                 if (!pte_present(pte[i]))                               \
290                         continue;                                       \
291                                                                         \
292                 old = pte[i];                                           \
293                 new = op(old);                                          \
294                 if (pte_val(new) == pte_val(old))                       \
295                         continue;                                       \
296                 set_pte(pte + i, new);                                  \
297                 ret = 1;                                                \
298         }                                                               \
299         return ret;                                                     \
300 }                                                                       \
301                                                                         \
302 /* returns true if anything was done */                                 \
303 static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start,       \
304                                  unsigned long end)                     \
305 {                                                                       \
306         int ret = 0;                                                    \
307         pte_t *pte;                                                     \
308         unsigned long cur_end = ~0ul;                                   \
309         int i_min = pmd_index(start);                           \
310         int i_max = pmd_index(end);                                     \
311         int i;                                                          \
312                                                                         \
313         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
314                 if (!pmd_present(pmd[i]))                               \
315                         continue;                                       \
316                                                                         \
317                 pte = pte_offset_kernel(pmd + i, 0);                            \
318                 if (i == i_max)                                         \
319                         cur_end = end;                                  \
320                                                                         \
321                 ret |= kvm_mips_##name##_pte(pte, start, cur_end);      \
322         }                                                               \
323         return ret;                                                     \
324 }                                                                       \
325                                                                         \
326 static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start,       \
327                                  unsigned long end)                     \
328 {                                                                       \
329         int ret = 0;                                                    \
330         pmd_t *pmd;                                                     \
331         unsigned long cur_end = ~0ul;                                   \
332         int i_min = pud_index(start);                           \
333         int i_max = pud_index(end);                                     \
334         int i;                                                          \
335                                                                         \
336         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
337                 if (!pud_present(pud[i]))                               \
338                         continue;                                       \
339                                                                         \
340                 pmd = pmd_offset(pud + i, 0);                           \
341                 if (i == i_max)                                         \
342                         cur_end = end;                                  \
343                                                                         \
344                 ret |= kvm_mips_##name##_pmd(pmd, start, cur_end);      \
345         }                                                               \
346         return ret;                                                     \
347 }                                                                       \
348                                                                         \
349 static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start,       \
350                                  unsigned long end)                     \
351 {                                                                       \
352         int ret = 0;                                                    \
353         p4d_t *p4d;                                                     \
354         pud_t *pud;                                                     \
355         unsigned long cur_end = ~0ul;                                   \
356         int i_min = pgd_index(start);                                   \
357         int i_max = pgd_index(end);                                     \
358         int i;                                                          \
359                                                                         \
360         for (i = i_min; i <= i_max; ++i, start = 0) {                   \
361                 if (!pgd_present(pgd[i]))                               \
362                         continue;                                       \
363                                                                         \
364                 p4d = p4d_offset(pgd, 0);                               \
365                 pud = pud_offset(p4d + i, 0);                           \
366                 if (i == i_max)                                         \
367                         cur_end = end;                                  \
368                                                                         \
369                 ret |= kvm_mips_##name##_pud(pud, start, cur_end);      \
370         }                                                               \
371         return ret;                                                     \
372 }
373
374 /*
375  * kvm_mips_mkclean_gpa_pt.
376  * Mark a range of guest physical address space clean (writes fault) in the VM's
377  * GPA page table to allow dirty page tracking.
378  */
379
380 BUILD_PTE_RANGE_OP(mkclean, pte_mkclean)
381
382 /**
383  * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
384  * @kvm:        KVM pointer.
385  * @start_gfn:  Guest frame number of first page in GPA range to flush.
386  * @end_gfn:    Guest frame number of last page in GPA range to flush.
387  *
388  * Make a range of GPA mappings clean so that guest writes will fault and
389  * trigger dirty page logging.
390  *
391  * The caller must hold the @kvm->mmu_lock spinlock.
392  *
393  * Returns:     Whether any GPA mappings were modified, which would require
394  *              derived mappings (GVA page tables & TLB enties) to be
395  *              invalidated.
396  */
397 int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
398 {
399         return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd,
400                                     start_gfn << PAGE_SHIFT,
401                                     end_gfn << PAGE_SHIFT);
402 }
403
404 /**
405  * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
406  * @kvm:        The KVM pointer
407  * @slot:       The memory slot associated with mask
408  * @gfn_offset: The gfn offset in memory slot
409  * @mask:       The mask of dirty pages at offset 'gfn_offset' in this memory
410  *              slot to be write protected
411  *
412  * Walks bits set in mask write protects the associated pte's. Caller must
413  * acquire @kvm->mmu_lock.
414  */
415 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
416                 struct kvm_memory_slot *slot,
417                 gfn_t gfn_offset, unsigned long mask)
418 {
419         gfn_t base_gfn = slot->base_gfn + gfn_offset;
420         gfn_t start = base_gfn +  __ffs(mask);
421         gfn_t end = base_gfn + __fls(mask);
422
423         kvm_mips_mkclean_gpa_pt(kvm, start, end);
424 }
425
426 /*
427  * kvm_mips_mkold_gpa_pt.
428  * Mark a range of guest physical address space old (all accesses fault) in the
429  * VM's GPA page table to allow detection of commonly used pages.
430  */
431
432 BUILD_PTE_RANGE_OP(mkold, pte_mkold)
433
434 static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn,
435                                  gfn_t end_gfn)
436 {
437         return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd,
438                                   start_gfn << PAGE_SHIFT,
439                                   end_gfn << PAGE_SHIFT);
440 }
441
442 static int handle_hva_to_gpa(struct kvm *kvm,
443                              unsigned long start,
444                              unsigned long end,
445                              int (*handler)(struct kvm *kvm, gfn_t gfn,
446                                             gpa_t gfn_end,
447                                             struct kvm_memory_slot *memslot,
448                                             void *data),
449                              void *data)
450 {
451         struct kvm_memslots *slots;
452         struct kvm_memory_slot *memslot;
453         int ret = 0;
454
455         slots = kvm_memslots(kvm);
456
457         /* we only care about the pages that the guest sees */
458         kvm_for_each_memslot(memslot, slots) {
459                 unsigned long hva_start, hva_end;
460                 gfn_t gfn, gfn_end;
461
462                 hva_start = max(start, memslot->userspace_addr);
463                 hva_end = min(end, memslot->userspace_addr +
464                                         (memslot->npages << PAGE_SHIFT));
465                 if (hva_start >= hva_end)
466                         continue;
467
468                 /*
469                  * {gfn(page) | page intersects with [hva_start, hva_end)} =
470                  * {gfn_start, gfn_start+1, ..., gfn_end-1}.
471                  */
472                 gfn = hva_to_gfn_memslot(hva_start, memslot);
473                 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
474
475                 ret |= handler(kvm, gfn, gfn_end, memslot, data);
476         }
477
478         return ret;
479 }
480
481
482 static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
483                                  struct kvm_memory_slot *memslot, void *data)
484 {
485         kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end);
486         return 1;
487 }
488
489 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
490                         unsigned flags)
491 {
492         handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
493
494         kvm_mips_callbacks->flush_shadow_all(kvm);
495         return 0;
496 }
497
498 static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
499                                 struct kvm_memory_slot *memslot, void *data)
500 {
501         gpa_t gpa = gfn << PAGE_SHIFT;
502         pte_t hva_pte = *(pte_t *)data;
503         pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
504         pte_t old_pte;
505
506         if (!gpa_pte)
507                 return 0;
508
509         /* Mapping may need adjusting depending on memslot flags */
510         old_pte = *gpa_pte;
511         if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte))
512                 hva_pte = pte_mkclean(hva_pte);
513         else if (memslot->flags & KVM_MEM_READONLY)
514                 hva_pte = pte_wrprotect(hva_pte);
515
516         set_pte(gpa_pte, hva_pte);
517
518         /* Replacing an absent or old page doesn't need flushes */
519         if (!pte_present(old_pte) || !pte_young(old_pte))
520                 return 0;
521
522         /* Pages swapped, aged, moved, or cleaned require flushes */
523         return !pte_present(hva_pte) ||
524                !pte_young(hva_pte) ||
525                pte_pfn(old_pte) != pte_pfn(hva_pte) ||
526                (pte_dirty(old_pte) && !pte_dirty(hva_pte));
527 }
528
529 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
530 {
531         unsigned long end = hva + PAGE_SIZE;
532         int ret;
533
534         ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte);
535         if (ret)
536                 kvm_mips_callbacks->flush_shadow_all(kvm);
537         return 0;
538 }
539
540 static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
541                                struct kvm_memory_slot *memslot, void *data)
542 {
543         return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end);
544 }
545
546 static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
547                                     struct kvm_memory_slot *memslot, void *data)
548 {
549         gpa_t gpa = gfn << PAGE_SHIFT;
550         pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
551
552         if (!gpa_pte)
553                 return 0;
554         return pte_young(*gpa_pte);
555 }
556
557 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
558 {
559         return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
560 }
561
562 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
563 {
564         return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
565 }
566
567 /**
568  * _kvm_mips_map_page_fast() - Fast path GPA fault handler.
569  * @vcpu:               VCPU pointer.
570  * @gpa:                Guest physical address of fault.
571  * @write_fault:        Whether the fault was due to a write.
572  * @out_entry:          New PTE for @gpa (written on success unless NULL).
573  * @out_buddy:          New PTE for @gpa's buddy (written on success unless
574  *                      NULL).
575  *
576  * Perform fast path GPA fault handling, doing all that can be done without
577  * calling into KVM. This handles marking old pages young (for idle page
578  * tracking), and dirtying of clean pages (for dirty page logging).
579  *
580  * Returns:     0 on success, in which case we can update derived mappings and
581  *              resume guest execution.
582  *              -EFAULT on failure due to absent GPA mapping or write to
583  *              read-only page, in which case KVM must be consulted.
584  */
585 static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa,
586                                    bool write_fault,
587                                    pte_t *out_entry, pte_t *out_buddy)
588 {
589         struct kvm *kvm = vcpu->kvm;
590         gfn_t gfn = gpa >> PAGE_SHIFT;
591         pte_t *ptep;
592         kvm_pfn_t pfn = 0;      /* silence bogus GCC warning */
593         bool pfn_valid = false;
594         int ret = 0;
595
596         spin_lock(&kvm->mmu_lock);
597
598         /* Fast path - just check GPA page table for an existing entry */
599         ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
600         if (!ptep || !pte_present(*ptep)) {
601                 ret = -EFAULT;
602                 goto out;
603         }
604
605         /* Track access to pages marked old */
606         if (!pte_young(*ptep)) {
607                 set_pte(ptep, pte_mkyoung(*ptep));
608                 pfn = pte_pfn(*ptep);
609                 pfn_valid = true;
610                 /* call kvm_set_pfn_accessed() after unlock */
611         }
612         if (write_fault && !pte_dirty(*ptep)) {
613                 if (!pte_write(*ptep)) {
614                         ret = -EFAULT;
615                         goto out;
616                 }
617
618                 /* Track dirtying of writeable pages */
619                 set_pte(ptep, pte_mkdirty(*ptep));
620                 pfn = pte_pfn(*ptep);
621                 mark_page_dirty(kvm, gfn);
622                 kvm_set_pfn_dirty(pfn);
623         }
624
625         if (out_entry)
626                 *out_entry = *ptep;
627         if (out_buddy)
628                 *out_buddy = *ptep_buddy(ptep);
629
630 out:
631         spin_unlock(&kvm->mmu_lock);
632         if (pfn_valid)
633                 kvm_set_pfn_accessed(pfn);
634         return ret;
635 }
636
637 /**
638  * kvm_mips_map_page() - Map a guest physical page.
639  * @vcpu:               VCPU pointer.
640  * @gpa:                Guest physical address of fault.
641  * @write_fault:        Whether the fault was due to a write.
642  * @out_entry:          New PTE for @gpa (written on success unless NULL).
643  * @out_buddy:          New PTE for @gpa's buddy (written on success unless
644  *                      NULL).
645  *
646  * Handle GPA faults by creating a new GPA mapping (or updating an existing
647  * one).
648  *
649  * This takes care of marking pages young or dirty (idle/dirty page tracking),
650  * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
651  * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
652  * caller.
653  *
654  * Returns:     0 on success, in which case the caller may use the @out_entry
655  *              and @out_buddy PTEs to update derived mappings and resume guest
656  *              execution.
657  *              -EFAULT if there is no memory region at @gpa or a write was
658  *              attempted to a read-only memory region. This is usually handled
659  *              as an MMIO access.
660  */
661 static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
662                              bool write_fault,
663                              pte_t *out_entry, pte_t *out_buddy)
664 {
665         struct kvm *kvm = vcpu->kvm;
666         struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
667         gfn_t gfn = gpa >> PAGE_SHIFT;
668         int srcu_idx, err;
669         kvm_pfn_t pfn;
670         pte_t *ptep, entry;
671         bool writeable;
672         unsigned long prot_bits;
673         unsigned long mmu_seq;
674
675         /* Try the fast path to handle old / clean pages */
676         srcu_idx = srcu_read_lock(&kvm->srcu);
677         err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry,
678                                       out_buddy);
679         if (!err)
680                 goto out;
681
682         /* We need a minimum of cached pages ready for page table creation */
683         err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
684         if (err)
685                 goto out;
686
687 retry:
688         /*
689          * Used to check for invalidations in progress, of the pfn that is
690          * returned by pfn_to_pfn_prot below.
691          */
692         mmu_seq = kvm->mmu_notifier_seq;
693         /*
694          * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
695          * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
696          * risk the page we get a reference to getting unmapped before we have a
697          * chance to grab the mmu_lock without mmu_notifier_retry() noticing.
698          *
699          * This smp_rmb() pairs with the effective smp_wmb() of the combination
700          * of the pte_unmap_unlock() after the PTE is zapped, and the
701          * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
702          * mmu_notifier_seq is incremented.
703          */
704         smp_rmb();
705
706         /* Slow path - ask KVM core whether we can access this GPA */
707         pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable);
708         if (is_error_noslot_pfn(pfn)) {
709                 err = -EFAULT;
710                 goto out;
711         }
712
713         spin_lock(&kvm->mmu_lock);
714         /* Check if an invalidation has taken place since we got pfn */
715         if (mmu_notifier_retry(kvm, mmu_seq)) {
716                 /*
717                  * This can happen when mappings are changed asynchronously, but
718                  * also synchronously if a COW is triggered by
719                  * gfn_to_pfn_prot().
720                  */
721                 spin_unlock(&kvm->mmu_lock);
722                 kvm_release_pfn_clean(pfn);
723                 goto retry;
724         }
725
726         /* Ensure page tables are allocated */
727         ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa);
728
729         /* Set up the PTE */
730         prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default;
731         if (writeable) {
732                 prot_bits |= _PAGE_WRITE;
733                 if (write_fault) {
734                         prot_bits |= __WRITEABLE;
735                         mark_page_dirty(kvm, gfn);
736                         kvm_set_pfn_dirty(pfn);
737                 }
738         }
739         entry = pfn_pte(pfn, __pgprot(prot_bits));
740
741         /* Write the PTE */
742         set_pte(ptep, entry);
743
744         err = 0;
745         if (out_entry)
746                 *out_entry = *ptep;
747         if (out_buddy)
748                 *out_buddy = *ptep_buddy(ptep);
749
750         spin_unlock(&kvm->mmu_lock);
751         kvm_release_pfn_clean(pfn);
752         kvm_set_pfn_accessed(pfn);
753 out:
754         srcu_read_unlock(&kvm->srcu, srcu_idx);
755         return err;
756 }
757
758 static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu,
759                                         unsigned long addr)
760 {
761         struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
762         pgd_t *pgdp;
763         int ret;
764
765         /* We need a minimum of cached pages ready for page table creation */
766         ret = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
767         if (ret)
768                 return NULL;
769
770         if (KVM_GUEST_KERNEL_MODE(vcpu))
771                 pgdp = vcpu->arch.guest_kernel_mm.pgd;
772         else
773                 pgdp = vcpu->arch.guest_user_mm.pgd;
774
775         return kvm_mips_walk_pgd(pgdp, memcache, addr);
776 }
777
778 void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr,
779                                   bool user)
780 {
781         pgd_t *pgdp;
782         pte_t *ptep;
783
784         addr &= PAGE_MASK << 1;
785
786         pgdp = vcpu->arch.guest_kernel_mm.pgd;
787         ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
788         if (ptep) {
789                 ptep[0] = pfn_pte(0, __pgprot(0));
790                 ptep[1] = pfn_pte(0, __pgprot(0));
791         }
792
793         if (user) {
794                 pgdp = vcpu->arch.guest_user_mm.pgd;
795                 ptep = kvm_mips_walk_pgd(pgdp, NULL, addr);
796                 if (ptep) {
797                         ptep[0] = pfn_pte(0, __pgprot(0));
798                         ptep[1] = pfn_pte(0, __pgprot(0));
799                 }
800         }
801 }
802
803 /*
804  * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}.
805  * Flush a range of guest physical address space from the VM's GPA page tables.
806  */
807
808 static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva,
809                                    unsigned long end_gva)
810 {
811         int i_min = pte_index(start_gva);
812         int i_max = pte_index(end_gva);
813         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1);
814         int i;
815
816         /*
817          * There's no freeing to do, so there's no point clearing individual
818          * entries unless only part of the last level page table needs flushing.
819          */
820         if (safe_to_remove)
821                 return true;
822
823         for (i = i_min; i <= i_max; ++i) {
824                 if (!pte_present(pte[i]))
825                         continue;
826
827                 set_pte(pte + i, __pte(0));
828         }
829         return false;
830 }
831
832 static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva,
833                                    unsigned long end_gva)
834 {
835         pte_t *pte;
836         unsigned long end = ~0ul;
837         int i_min = pmd_index(start_gva);
838         int i_max = pmd_index(end_gva);
839         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1);
840         int i;
841
842         for (i = i_min; i <= i_max; ++i, start_gva = 0) {
843                 if (!pmd_present(pmd[i]))
844                         continue;
845
846                 pte = pte_offset_kernel(pmd + i, 0);
847                 if (i == i_max)
848                         end = end_gva;
849
850                 if (kvm_mips_flush_gva_pte(pte, start_gva, end)) {
851                         pmd_clear(pmd + i);
852                         pte_free_kernel(NULL, pte);
853                 } else {
854                         safe_to_remove = false;
855                 }
856         }
857         return safe_to_remove;
858 }
859
860 static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva,
861                                    unsigned long end_gva)
862 {
863         pmd_t *pmd;
864         unsigned long end = ~0ul;
865         int i_min = pud_index(start_gva);
866         int i_max = pud_index(end_gva);
867         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1);
868         int i;
869
870         for (i = i_min; i <= i_max; ++i, start_gva = 0) {
871                 if (!pud_present(pud[i]))
872                         continue;
873
874                 pmd = pmd_offset(pud + i, 0);
875                 if (i == i_max)
876                         end = end_gva;
877
878                 if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) {
879                         pud_clear(pud + i);
880                         pmd_free(NULL, pmd);
881                 } else {
882                         safe_to_remove = false;
883                 }
884         }
885         return safe_to_remove;
886 }
887
888 static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva,
889                                    unsigned long end_gva)
890 {
891         p4d_t *p4d;
892         pud_t *pud;
893         unsigned long end = ~0ul;
894         int i_min = pgd_index(start_gva);
895         int i_max = pgd_index(end_gva);
896         bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1);
897         int i;
898
899         for (i = i_min; i <= i_max; ++i, start_gva = 0) {
900                 if (!pgd_present(pgd[i]))
901                         continue;
902
903                 p4d = p4d_offset(pgd, 0);
904                 pud = pud_offset(p4d + i, 0);
905                 if (i == i_max)
906                         end = end_gva;
907
908                 if (kvm_mips_flush_gva_pud(pud, start_gva, end)) {
909                         pgd_clear(pgd + i);
910                         pud_free(NULL, pud);
911                 } else {
912                         safe_to_remove = false;
913                 }
914         }
915         return safe_to_remove;
916 }
917
918 void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags)
919 {
920         if (flags & KMF_GPA) {
921                 /* all of guest virtual address space could be affected */
922                 if (flags & KMF_KERN)
923                         /* useg, kseg0, seg2/3 */
924                         kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff);
925                 else
926                         /* useg */
927                         kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
928         } else {
929                 /* useg */
930                 kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff);
931
932                 /* kseg2/3 */
933                 if (flags & KMF_KERN)
934                         kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff);
935         }
936 }
937
938 static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte)
939 {
940         /*
941          * Don't leak writeable but clean entries from GPA page tables. We don't
942          * want the normal Linux tlbmod handler to handle dirtying when KVM
943          * accesses guest memory.
944          */
945         if (!pte_dirty(pte))
946                 pte = pte_wrprotect(pte);
947
948         return pte;
949 }
950
951 static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
952 {
953         /* Guest EntryLo overrides host EntryLo */
954         if (!(entrylo & ENTRYLO_D))
955                 pte = pte_mkclean(pte);
956
957         return kvm_mips_gpa_pte_to_gva_unmapped(pte);
958 }
959
960 #ifdef CONFIG_KVM_MIPS_VZ
961 int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
962                                       struct kvm_vcpu *vcpu,
963                                       bool write_fault)
964 {
965         int ret;
966
967         ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
968         if (ret)
969                 return ret;
970
971         /* Invalidate this entry in the TLB */
972         return kvm_vz_host_tlb_inv(vcpu, badvaddr);
973 }
974 #endif
975
976 /* XXXKYMA: Must be called with interrupts disabled */
977 int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
978                                     struct kvm_vcpu *vcpu,
979                                     bool write_fault)
980 {
981         unsigned long gpa;
982         pte_t pte_gpa[2], *ptep_gva;
983         int idx;
984
985         if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
986                 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
987                 kvm_mips_dump_host_tlbs();
988                 return -1;
989         }
990
991         /* Get the GPA page table entry */
992         gpa = KVM_GUEST_CPHYSADDR(badvaddr);
993         idx = (badvaddr >> PAGE_SHIFT) & 1;
994         if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx],
995                               &pte_gpa[!idx]) < 0)
996                 return -1;
997
998         /* Get the GVA page table entry */
999         ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE);
1000         if (!ptep_gva) {
1001                 kvm_err("No ptep for gva %lx\n", badvaddr);
1002                 return -1;
1003         }
1004
1005         /* Copy a pair of entries from GPA page table to GVA page table */
1006         ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]);
1007         ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]);
1008
1009         /* Invalidate this entry in the TLB, guest kernel ASID only */
1010         kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
1011         return 0;
1012 }
1013
1014 int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
1015                                          struct kvm_mips_tlb *tlb,
1016                                          unsigned long gva,
1017                                          bool write_fault)
1018 {
1019         struct kvm *kvm = vcpu->kvm;
1020         long tlb_lo[2];
1021         pte_t pte_gpa[2], *ptep_buddy, *ptep_gva;
1022         unsigned int idx = TLB_LO_IDX(*tlb, gva);
1023         bool kernel = KVM_GUEST_KERNEL_MODE(vcpu);
1024
1025         tlb_lo[0] = tlb->tlb_lo[0];
1026         tlb_lo[1] = tlb->tlb_lo[1];
1027
1028         /*
1029          * The commpage address must not be mapped to anything else if the guest
1030          * TLB contains entries nearby, or commpage accesses will break.
1031          */
1032         if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1)))
1033                 tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0;
1034
1035         /* Get the GPA page table entry */
1036         if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]),
1037                               write_fault, &pte_gpa[idx], NULL) < 0)
1038                 return -1;
1039
1040         /* And its GVA buddy's GPA page table entry if it also exists */
1041         pte_gpa[!idx] = pfn_pte(0, __pgprot(0));
1042         if (tlb_lo[!idx] & ENTRYLO_V) {
1043                 spin_lock(&kvm->mmu_lock);
1044                 ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL,
1045                                         mips3_tlbpfn_to_paddr(tlb_lo[!idx]));
1046                 if (ptep_buddy)
1047                         pte_gpa[!idx] = *ptep_buddy;
1048                 spin_unlock(&kvm->mmu_lock);
1049         }
1050
1051         /* Get the GVA page table entry pair */
1052         ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE);
1053         if (!ptep_gva) {
1054                 kvm_err("No ptep for gva %lx\n", gva);
1055                 return -1;
1056         }
1057
1058         /* Copy a pair of entries from GPA page table to GVA page table */
1059         ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]);
1060         ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]);
1061
1062         /* Invalidate this entry in the TLB, current guest mode ASID only */
1063         kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel);
1064
1065         kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
1066                   tlb->tlb_lo[0], tlb->tlb_lo[1]);
1067
1068         return 0;
1069 }
1070
1071 int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
1072                                        struct kvm_vcpu *vcpu)
1073 {
1074         kvm_pfn_t pfn;
1075         pte_t *ptep;
1076
1077         ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr);
1078         if (!ptep) {
1079                 kvm_err("No ptep for commpage %lx\n", badvaddr);
1080                 return -1;
1081         }
1082
1083         pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage));
1084         /* Also set valid and dirty, so refill handler doesn't have to */
1085         *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED)));
1086
1087         /* Invalidate this entry in the TLB, guest kernel ASID only */
1088         kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true);
1089         return 0;
1090 }
1091
1092 /**
1093  * kvm_mips_migrate_count() - Migrate timer.
1094  * @vcpu:       Virtual CPU.
1095  *
1096  * Migrate CP0_Count hrtimer to the current CPU by cancelling and restarting it
1097  * if it was running prior to being cancelled.
1098  *
1099  * Must be called when the VCPU is migrated to a different CPU to ensure that
1100  * timer expiry during guest execution interrupts the guest and causes the
1101  * interrupt to be delivered in a timely manner.
1102  */
1103 static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu)
1104 {
1105         if (hrtimer_cancel(&vcpu->arch.comparecount_timer))
1106                 hrtimer_restart(&vcpu->arch.comparecount_timer);
1107 }
1108
1109 /* Restore ASID once we are scheduled back after preemption */
1110 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1111 {
1112         unsigned long flags;
1113
1114         kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu);
1115
1116         local_irq_save(flags);
1117
1118         vcpu->cpu = cpu;
1119         if (vcpu->arch.last_sched_cpu != cpu) {
1120                 kvm_debug("[%d->%d]KVM VCPU[%d] switch\n",
1121                           vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
1122                 /*
1123                  * Migrate the timer interrupt to the current CPU so that it
1124                  * always interrupts the guest and synchronously triggers a
1125                  * guest timer interrupt.
1126                  */
1127                 kvm_mips_migrate_count(vcpu);
1128         }
1129
1130         /* restore guest state to registers */
1131         kvm_mips_callbacks->vcpu_load(vcpu, cpu);
1132
1133         local_irq_restore(flags);
1134 }
1135
1136 /* ASID can change if another task is scheduled during preemption */
1137 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1138 {
1139         unsigned long flags;
1140         int cpu;
1141
1142         local_irq_save(flags);
1143
1144         cpu = smp_processor_id();
1145         vcpu->arch.last_sched_cpu = cpu;
1146         vcpu->cpu = -1;
1147
1148         /* save guest state in registers */
1149         kvm_mips_callbacks->vcpu_put(vcpu, cpu);
1150
1151         local_irq_restore(flags);
1152 }
1153
1154 /**
1155  * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault.
1156  * @vcpu:       Virtual CPU.
1157  * @gva:        Guest virtual address to be accessed.
1158  * @write:      True if write attempted (must be dirtied and made writable).
1159  *
1160  * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and
1161  * dirtying the page if @write so that guest instructions can be modified.
1162  *
1163  * Returns:     KVM_MIPS_MAPPED on success.
1164  *              KVM_MIPS_GVA if bad guest virtual address.
1165  *              KVM_MIPS_GPA if bad guest physical address.
1166  *              KVM_MIPS_TLB if guest TLB not present.
1167  *              KVM_MIPS_TLBINV if guest TLB present but not valid.
1168  *              KVM_MIPS_TLBMOD if guest TLB read only.
1169  */
1170 enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
1171                                                    unsigned long gva,
1172                                                    bool write)
1173 {
1174         struct mips_coproc *cop0 = vcpu->arch.cop0;
1175         struct kvm_mips_tlb *tlb;
1176         int index;
1177
1178         if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) {
1179                 if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0)
1180                         return KVM_MIPS_GPA;
1181         } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) ||
1182                    KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) {
1183                 /* Address should be in the guest TLB */
1184                 index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) |
1185                           (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID));
1186                 if (index < 0)
1187                         return KVM_MIPS_TLB;
1188                 tlb = &vcpu->arch.guest_tlb[index];
1189
1190                 /* Entry should be valid, and dirty for writes */
1191                 if (!TLB_IS_VALID(*tlb, gva))
1192                         return KVM_MIPS_TLBINV;
1193                 if (write && !TLB_IS_DIRTY(*tlb, gva))
1194                         return KVM_MIPS_TLBMOD;
1195
1196                 if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write))
1197                         return KVM_MIPS_GPA;
1198         } else {
1199                 return KVM_MIPS_GVA;
1200         }
1201
1202         return KVM_MIPS_MAPPED;
1203 }
1204
1205 int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
1206 {
1207         int err;
1208
1209         if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ),
1210                  "Expect BadInstr/BadInstrP registers to be used with VZ\n"))
1211                 return -EINVAL;
1212
1213 retry:
1214         kvm_trap_emul_gva_lockless_begin(vcpu);
1215         err = get_user(*out, opc);
1216         kvm_trap_emul_gva_lockless_end(vcpu);
1217
1218         if (unlikely(err)) {
1219                 /*
1220                  * Try to handle the fault, maybe we just raced with a GVA
1221                  * invalidation.
1222                  */
1223                 err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc,
1224                                               false);
1225                 if (unlikely(err)) {
1226                         kvm_err("%s: illegal address: %p\n",
1227                                 __func__, opc);
1228                         return -EFAULT;
1229                 }
1230
1231                 /* Hopefully it'll work now */
1232                 goto retry;
1233         }
1234         return 0;
1235 }