arch/x86/lib/memmove_64.S

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * Normally compiler builtins are used, but sometimes the compiler calls out
   4  * of line code. Based on asm-i386/string.h.
   5  *
   6  * This assembly file is re-written from memmove_64.c file.
   7  *      - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
   8  */
   9 #include <linux/export.h>
  10 #include <linux/linkage.h>
  11 #include <asm/cpufeatures.h>
  12 #include <asm/alternative.h>
  13
  14 #undef memmove
  15
  16 .section .noinstr.text, "ax"
  17
  18 /*
  19  * Implement memmove(). This can handle overlap between src and dst.
  20  *
  21  * Input:
  22  * rdi: dest
  23  * rsi: src
  24  * rdx: count
  25  *
  26  * Output:
  27  * rax: dest
  28  */
  29 SYM_FUNC_START(__memmove)
  30
  31         mov %rdi, %rax
  32
  33         /* Decide forward/backward copy mode */
  34         cmp %rdi, %rsi
  35         jge .Lmemmove_begin_forward
  36         mov %rsi, %r8
  37         add %rdx, %r8
  38         cmp %rdi, %r8
  39         jg 2f
  40
  41 #define CHECK_LEN       cmp $0x20, %rdx; jb 1f
  42 #define MEMMOVE_BYTES   movq %rdx, %rcx; rep movsb; RET
  43 .Lmemmove_begin_forward:
  44         ALTERNATIVE_2 __stringify(CHECK_LEN), \
  45                       __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \
  46                       __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM
  47
  48         /*
  49          * movsq instruction have many startup latency
  50          * so we handle small size by general register.
  51          */
  52         cmp  $680, %rdx
  53         jb      3f
  54         /*
  55          * movsq instruction is only good for aligned case.
  56          */
  57
  58         cmpb %dil, %sil
  59         je 4f
  60 3:
  61         sub $0x20, %rdx
  62         /*
  63          * We gobble 32 bytes forward in each loop.
  64          */
  65 5:
  66         sub $0x20, %rdx
  67         movq 0*8(%rsi), %r11
  68         movq 1*8(%rsi), %r10
  69         movq 2*8(%rsi), %r9
  70         movq 3*8(%rsi), %r8
  71         leaq 4*8(%rsi), %rsi
  72
  73         movq %r11, 0*8(%rdi)
  74         movq %r10, 1*8(%rdi)
  75         movq %r9, 2*8(%rdi)
  76         movq %r8, 3*8(%rdi)
  77         leaq 4*8(%rdi), %rdi
  78         jae 5b
  79         addq $0x20, %rdx
  80         jmp 1f
  81         /*
  82          * Handle data forward by movsq.
  83          */
  84         .p2align 4
  85 4:
  86         movq %rdx, %rcx
  87         movq -8(%rsi, %rdx), %r11
  88         lea -8(%rdi, %rdx), %r10
  89         shrq $3, %rcx
  90         rep movsq
  91         movq %r11, (%r10)
  92         jmp 13f
  93 .Lmemmove_end_forward:
  94
  95         /*
  96          * Handle data backward by movsq.
  97          */
  98         .p2align 4
  99 7:
 100         movq %rdx, %rcx
 101         movq (%rsi), %r11
 102         movq %rdi, %r10
 103         leaq -8(%rsi, %rdx), %rsi
 104         leaq -8(%rdi, %rdx), %rdi
 105         shrq $3, %rcx
 106         std
 107         rep movsq
 108         cld
 109         movq %r11, (%r10)
 110         jmp 13f
 111
 112         /*
 113          * Start to prepare for backward copy.
 114          */
 115         .p2align 4
 116 2:
 117         cmp $0x20, %rdx
 118         jb 1f
 119         cmp $680, %rdx
 120         jb 6f
 121         cmp %dil, %sil
 122         je 7b
 123 6:
 124         /*
 125          * Calculate copy position to tail.
 126          */
 127         addq %rdx, %rsi
 128         addq %rdx, %rdi
 129         subq $0x20, %rdx
 130         /*
 131          * We gobble 32 bytes backward in each loop.
 132          */
 133 8:
 134         subq $0x20, %rdx
 135         movq -1*8(%rsi), %r11
 136         movq -2*8(%rsi), %r10
 137         movq -3*8(%rsi), %r9
 138         movq -4*8(%rsi), %r8
 139         leaq -4*8(%rsi), %rsi
 140
 141         movq %r11, -1*8(%rdi)
 142         movq %r10, -2*8(%rdi)
 143         movq %r9, -3*8(%rdi)
 144         movq %r8, -4*8(%rdi)
 145         leaq -4*8(%rdi), %rdi
 146         jae 8b
 147         /*
 148          * Calculate copy position to head.
 149          */
 150         addq $0x20, %rdx
 151         subq %rdx, %rsi
 152         subq %rdx, %rdi
 153 1:
 154         cmpq $16, %rdx
 155         jb 9f
 156         /*
 157          * Move data from 16 bytes to 31 bytes.
 158          */
 159         movq 0*8(%rsi), %r11
 160         movq 1*8(%rsi), %r10
 161         movq -2*8(%rsi, %rdx), %r9
 162         movq -1*8(%rsi, %rdx), %r8
 163         movq %r11, 0*8(%rdi)
 164         movq %r10, 1*8(%rdi)
 165         movq %r9, -2*8(%rdi, %rdx)
 166         movq %r8, -1*8(%rdi, %rdx)
 167         jmp 13f
 168         .p2align 4
 169 9:
 170         cmpq $8, %rdx
 171         jb 10f
 172         /*
 173          * Move data from 8 bytes to 15 bytes.
 174          */
 175         movq 0*8(%rsi), %r11
 176         movq -1*8(%rsi, %rdx), %r10
 177         movq %r11, 0*8(%rdi)
 178         movq %r10, -1*8(%rdi, %rdx)
 179         jmp 13f
 180 10:
 181         cmpq $4, %rdx
 182         jb 11f
 183         /*
 184          * Move data from 4 bytes to 7 bytes.
 185          */
 186         movl (%rsi), %r11d
 187         movl -4(%rsi, %rdx), %r10d
 188         movl %r11d, (%rdi)
 189         movl %r10d, -4(%rdi, %rdx)
 190         jmp 13f
 191 11:
 192         cmp $2, %rdx
 193         jb 12f
 194         /*
 195          * Move data from 2 bytes to 3 bytes.
 196          */
 197         movw (%rsi), %r11w
 198         movw -2(%rsi, %rdx), %r10w
 199         movw %r11w, (%rdi)
 200         movw %r10w, -2(%rdi, %rdx)
 201         jmp 13f
 202 12:
 203         cmp $1, %rdx
 204         jb 13f
 205         /*
 206          * Move data for 1 byte.
 207          */
 208         movb (%rsi), %r11b
 209         movb %r11b, (%rdi)
 210 13:
 211         RET
 212 SYM_FUNC_END(__memmove)
 213 EXPORT_SYMBOL(__memmove)
 214
 215 SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove)
 216 EXPORT_SYMBOL(memmove)