Linux 6.7-rc7
[linux-modified.git] / arch / loongarch / lib / xor_template.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
4  *
5  * Template for XOR operations, instantiated in xor_simd.c.
6  *
7  * Expected preprocessor definitions:
8  *
9  * - LINE_WIDTH
10  * - XOR_FUNC_NAME(nr)
11  * - LD_INOUT_LINE(buf)
12  * - LD_AND_XOR_LINE(buf)
13  * - ST_LINE(buf)
14  */
15
16 void XOR_FUNC_NAME(2)(unsigned long bytes,
17                       unsigned long * __restrict v1,
18                       const unsigned long * __restrict v2)
19 {
20         unsigned long lines = bytes / LINE_WIDTH;
21
22         do {
23                 __asm__ __volatile__ (
24                         LD_INOUT_LINE(v1)
25                         LD_AND_XOR_LINE(v2)
26                         ST_LINE(v1)
27                 : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
28                 );
29
30                 v1 += LINE_WIDTH / sizeof(unsigned long);
31                 v2 += LINE_WIDTH / sizeof(unsigned long);
32         } while (--lines > 0);
33 }
34
35 void XOR_FUNC_NAME(3)(unsigned long bytes,
36                       unsigned long * __restrict v1,
37                       const unsigned long * __restrict v2,
38                       const unsigned long * __restrict v3)
39 {
40         unsigned long lines = bytes / LINE_WIDTH;
41
42         do {
43                 __asm__ __volatile__ (
44                         LD_INOUT_LINE(v1)
45                         LD_AND_XOR_LINE(v2)
46                         LD_AND_XOR_LINE(v3)
47                         ST_LINE(v1)
48                 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
49                 );
50
51                 v1 += LINE_WIDTH / sizeof(unsigned long);
52                 v2 += LINE_WIDTH / sizeof(unsigned long);
53                 v3 += LINE_WIDTH / sizeof(unsigned long);
54         } while (--lines > 0);
55 }
56
57 void XOR_FUNC_NAME(4)(unsigned long bytes,
58                       unsigned long * __restrict v1,
59                       const unsigned long * __restrict v2,
60                       const unsigned long * __restrict v3,
61                       const unsigned long * __restrict v4)
62 {
63         unsigned long lines = bytes / LINE_WIDTH;
64
65         do {
66                 __asm__ __volatile__ (
67                         LD_INOUT_LINE(v1)
68                         LD_AND_XOR_LINE(v2)
69                         LD_AND_XOR_LINE(v3)
70                         LD_AND_XOR_LINE(v4)
71                         ST_LINE(v1)
72                 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
73                 : "memory"
74                 );
75
76                 v1 += LINE_WIDTH / sizeof(unsigned long);
77                 v2 += LINE_WIDTH / sizeof(unsigned long);
78                 v3 += LINE_WIDTH / sizeof(unsigned long);
79                 v4 += LINE_WIDTH / sizeof(unsigned long);
80         } while (--lines > 0);
81 }
82
83 void XOR_FUNC_NAME(5)(unsigned long bytes,
84                       unsigned long * __restrict v1,
85                       const unsigned long * __restrict v2,
86                       const unsigned long * __restrict v3,
87                       const unsigned long * __restrict v4,
88                       const unsigned long * __restrict v5)
89 {
90         unsigned long lines = bytes / LINE_WIDTH;
91
92         do {
93                 __asm__ __volatile__ (
94                         LD_INOUT_LINE(v1)
95                         LD_AND_XOR_LINE(v2)
96                         LD_AND_XOR_LINE(v3)
97                         LD_AND_XOR_LINE(v4)
98                         LD_AND_XOR_LINE(v5)
99                         ST_LINE(v1)
100                 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
101                     [v5] "r"(v5) : "memory"
102                 );
103
104                 v1 += LINE_WIDTH / sizeof(unsigned long);
105                 v2 += LINE_WIDTH / sizeof(unsigned long);
106                 v3 += LINE_WIDTH / sizeof(unsigned long);
107                 v4 += LINE_WIDTH / sizeof(unsigned long);
108                 v5 += LINE_WIDTH / sizeof(unsigned long);
109         } while (--lines > 0);
110 }