GNU Linux-libre 4.14.332-gnu1
[releases.git] / arch / tile / kernel / unaligned.c
1 /*
2  * Copyright 2013 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * A code-rewriter that handles unaligned exception.
15  */
16
17 #include <linux/smp.h>
18 #include <linux/ptrace.h>
19 #include <linux/slab.h>
20 #include <linux/sched/debug.h>
21 #include <linux/sched/task.h>
22 #include <linux/thread_info.h>
23 #include <linux/uaccess.h>
24 #include <linux/mman.h>
25 #include <linux/types.h>
26 #include <linux/err.h>
27 #include <linux/extable.h>
28 #include <linux/compat.h>
29 #include <linux/prctl.h>
30 #include <asm/cacheflush.h>
31 #include <asm/traps.h>
32 #include <linux/uaccess.h>
33 #include <asm/unaligned.h>
34 #include <arch/abi.h>
35 #include <arch/spr_def.h>
36 #include <arch/opcode.h>
37
38
39 /*
40  * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
41  * exception is supported out of single_step.c
42  */
43
44 int unaligned_printk;
45
46 static int __init setup_unaligned_printk(char *str)
47 {
48         long val;
49         if (kstrtol(str, 0, &val) != 0)
50                 return 0;
51         unaligned_printk = val;
52         pr_info("Printk for each unaligned data accesses is %s\n",
53                 unaligned_printk ? "enabled" : "disabled");
54         return 1;
55 }
56 __setup("unaligned_printk=", setup_unaligned_printk);
57
58 unsigned int unaligned_fixup_count;
59
60 #ifdef __tilegx__
61
62 /*
63  * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
64  * The 1st 64-bit word saves fault PC address, 2nd word is the fault
65  * instruction bundle followed by 14 JIT bundles.
66  */
67
68 struct unaligned_jit_fragment {
69         unsigned long       pc;
70         tilegx_bundle_bits  bundle;
71         tilegx_bundle_bits  insn[14];
72 };
73
74 /*
75  * Check if a nop or fnop at bundle's pipeline X0.
76  */
77
78 static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
79 {
80         return (((get_UnaryOpcodeExtension_X0(bundle) ==
81                   NOP_UNARY_OPCODE_X0) &&
82                  (get_RRROpcodeExtension_X0(bundle) ==
83                   UNARY_RRR_0_OPCODE_X0) &&
84                  (get_Opcode_X0(bundle) ==
85                   RRR_0_OPCODE_X0)) ||
86                 ((get_UnaryOpcodeExtension_X0(bundle) ==
87                   FNOP_UNARY_OPCODE_X0) &&
88                  (get_RRROpcodeExtension_X0(bundle) ==
89                   UNARY_RRR_0_OPCODE_X0) &&
90                  (get_Opcode_X0(bundle) ==
91                   RRR_0_OPCODE_X0)));
92 }
93
94 /*
95  * Check if nop or fnop at bundle's pipeline X1.
96  */
97
98 static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
99 {
100         return (((get_UnaryOpcodeExtension_X1(bundle) ==
101                   NOP_UNARY_OPCODE_X1) &&
102                  (get_RRROpcodeExtension_X1(bundle) ==
103                   UNARY_RRR_0_OPCODE_X1) &&
104                  (get_Opcode_X1(bundle) ==
105                   RRR_0_OPCODE_X1)) ||
106                 ((get_UnaryOpcodeExtension_X1(bundle) ==
107                   FNOP_UNARY_OPCODE_X1) &&
108                  (get_RRROpcodeExtension_X1(bundle) ==
109                   UNARY_RRR_0_OPCODE_X1) &&
110                  (get_Opcode_X1(bundle) ==
111                   RRR_0_OPCODE_X1)));
112 }
113
114 /*
115  * Check if nop or fnop at bundle's Y0 pipeline.
116  */
117
118 static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
119 {
120         return (((get_UnaryOpcodeExtension_Y0(bundle) ==
121                   NOP_UNARY_OPCODE_Y0) &&
122                  (get_RRROpcodeExtension_Y0(bundle) ==
123                   UNARY_RRR_1_OPCODE_Y0) &&
124                  (get_Opcode_Y0(bundle) ==
125                   RRR_1_OPCODE_Y0)) ||
126                 ((get_UnaryOpcodeExtension_Y0(bundle) ==
127                   FNOP_UNARY_OPCODE_Y0) &&
128                  (get_RRROpcodeExtension_Y0(bundle) ==
129                   UNARY_RRR_1_OPCODE_Y0) &&
130                  (get_Opcode_Y0(bundle) ==
131                   RRR_1_OPCODE_Y0)));
132 }
133
134 /*
135  * Check if nop or fnop at bundle's pipeline Y1.
136  */
137
138 static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
139 {
140         return (((get_UnaryOpcodeExtension_Y1(bundle) ==
141                   NOP_UNARY_OPCODE_Y1) &&
142                  (get_RRROpcodeExtension_Y1(bundle) ==
143                   UNARY_RRR_1_OPCODE_Y1) &&
144                  (get_Opcode_Y1(bundle) ==
145                   RRR_1_OPCODE_Y1)) ||
146                 ((get_UnaryOpcodeExtension_Y1(bundle) ==
147                   FNOP_UNARY_OPCODE_Y1) &&
148                  (get_RRROpcodeExtension_Y1(bundle) ==
149                   UNARY_RRR_1_OPCODE_Y1) &&
150                  (get_Opcode_Y1(bundle) ==
151                   RRR_1_OPCODE_Y1)));
152 }
153
154 /*
155  * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
156  */
157
158 static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
159 {
160         return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
161 }
162
163 /*
164  * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
165  */
166
167 static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
168 {
169         return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
170 }
171
172 /*
173  * Find the destination, source registers of fault unalign access instruction
174  * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
175  * clob3, which are guaranteed different from any register used in the fault
176  * bundle. r_alias is used to return if the other instructions other than the
177  * unalign load/store shares same register with ra, rb and rd.
178  */
179
180 static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
181                       uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
182                       uint64_t *clob3, bool *r_alias)
183 {
184         int i;
185         uint64_t reg;
186         uint64_t reg_map = 0, alias_reg_map = 0, map;
187         bool alias = false;
188
189         /*
190          * Parse fault bundle, find potential used registers and mark
191          * corresponding bits in reg_map and alias_map. These 2 bit maps
192          * are used to find the scratch registers and determine if there
193          * is register alias.
194          */
195         if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
196
197                 reg = get_SrcA_Y2(bundle);
198                 reg_map |= 1ULL << reg;
199                 *ra = reg;
200                 reg = get_SrcBDest_Y2(bundle);
201                 reg_map |= 1ULL << reg;
202
203                 if (rd) {
204                         /* Load. */
205                         *rd = reg;
206                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
207                 } else {
208                         /* Store. */
209                         *rb = reg;
210                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
211                 }
212
213                 if (!is_bundle_y1_nop(bundle)) {
214                         reg = get_SrcA_Y1(bundle);
215                         reg_map |= (1ULL << reg);
216                         map = (1ULL << reg);
217
218                         reg = get_SrcB_Y1(bundle);
219                         reg_map |= (1ULL << reg);
220                         map |= (1ULL << reg);
221
222                         reg = get_Dest_Y1(bundle);
223                         reg_map |= (1ULL << reg);
224                         map |= (1ULL << reg);
225
226                         if (map & alias_reg_map)
227                                 alias = true;
228                 }
229
230                 if (!is_bundle_y0_nop(bundle)) {
231                         reg = get_SrcA_Y0(bundle);
232                         reg_map |= (1ULL << reg);
233                         map = (1ULL << reg);
234
235                         reg = get_SrcB_Y0(bundle);
236                         reg_map |= (1ULL << reg);
237                         map |= (1ULL << reg);
238
239                         reg = get_Dest_Y0(bundle);
240                         reg_map |= (1ULL << reg);
241                         map |= (1ULL << reg);
242
243                         if (map & alias_reg_map)
244                                 alias = true;
245                 }
246         } else  { /* X Mode Bundle. */
247
248                 reg = get_SrcA_X1(bundle);
249                 reg_map |= (1ULL << reg);
250                 *ra = reg;
251                 if (rd) {
252                         /* Load. */
253                         reg = get_Dest_X1(bundle);
254                         reg_map |= (1ULL << reg);
255                         *rd = reg;
256                         alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
257                 } else {
258                         /* Store. */
259                         reg = get_SrcB_X1(bundle);
260                         reg_map |= (1ULL << reg);
261                         *rb = reg;
262                         alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
263                 }
264
265                 if (!is_bundle_x0_nop(bundle)) {
266                         reg = get_SrcA_X0(bundle);
267                         reg_map |= (1ULL << reg);
268                         map = (1ULL << reg);
269
270                         reg = get_SrcB_X0(bundle);
271                         reg_map |= (1ULL << reg);
272                         map |= (1ULL << reg);
273
274                         reg = get_Dest_X0(bundle);
275                         reg_map |= (1ULL << reg);
276                         map |= (1ULL << reg);
277
278                         if (map & alias_reg_map)
279                                 alias = true;
280                 }
281         }
282
283         /*
284          * "alias" indicates if the unalign access registers have collision
285          * with others in the same bundle. We jsut simply test all register
286          * operands case (RRR), ignored the case with immidate. If a bundle
287          * has no register alias, we may do fixup in a simple or fast manner.
288          * So if an immidata field happens to hit with a register, we may end
289          * up fall back to the generic handling.
290          */
291
292         *r_alias = alias;
293
294         /* Flip bits on reg_map. */
295         reg_map ^= -1ULL;
296
297         /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
298         for (i = 0; i < TREG_SP; i++) {
299                 if (reg_map & (0x1ULL << i)) {
300                         if (*clob1 == -1) {
301                                 *clob1 = i;
302                         } else if (*clob2 == -1) {
303                                 *clob2 = i;
304                         } else if (*clob3 == -1) {
305                                 *clob3 = i;
306                                 return;
307                         }
308                 }
309         }
310 }
311
312 /*
313  * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
314  * is unexpected.
315  */
316
317 static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
318                        uint64_t clob1, uint64_t clob2,  uint64_t clob3)
319 {
320         bool unexpected = false;
321         if ((ra >= 56) && (ra != TREG_ZERO))
322                 unexpected = true;
323
324         if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
325                 unexpected = true;
326
327         if (rd != -1) {
328                 if ((rd >= 56) && (rd != TREG_ZERO))
329                         unexpected = true;
330         } else {
331                 if ((rb >= 56) && (rb != TREG_ZERO))
332                         unexpected = true;
333         }
334         return unexpected;
335 }
336
337
338 #define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
339 #define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
340 #define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
341 #define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
342 #define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
343
344 #ifdef __LITTLE_ENDIAN
345 #define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
346 #else
347 #define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
348 #endif /* __LITTLE_ENDIAN */
349
350 /*
351  * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
352  * The corresponding static function jix_x#_###(.) generates partial or
353  * whole bundle based on the template and given arguments.
354  */
355
356 #define __JIT_CODE(_X_)                                         \
357         asm (".pushsection .rodata.unalign_data, \"a\"\n"       \
358              _X_"\n"                                            \
359              ".popsection\n")
360
361 __JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
362 static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
363 {
364         extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
365         return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
366                 create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
367 }
368
369 __JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
370 static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
371 {
372         extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
373         return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
374                 create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
375 }
376
377 __JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
378 static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
379 {
380         extern  tilegx_bundle_bits __unalign_jit_x0_addi;
381         return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
382                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
383                 create_Imm8_X0(imm8);
384 }
385
386 __JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
387 static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
388 {
389         extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
390         return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
391                 create_Dest_X1(rd) | create_SrcA_X1(ra);
392 }
393
394 __JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
395 static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
396 {
397         extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
398         return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
399                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
400                 create_SrcB_X0(rb);
401 }
402
403 __JIT_CODE("__unalign_jit_x1_iret:   {iret}");
404 static tilegx_bundle_bits  jit_x1_iret(void)
405 {
406         extern  tilegx_bundle_bits __unalign_jit_x1_iret;
407         return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
408 }
409
410 __JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
411 static tilegx_bundle_bits  jit_x0_fnop(void)
412 {
413         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
414         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
415 }
416
417 static tilegx_bundle_bits  jit_x1_fnop(void)
418 {
419         extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
420         return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
421 }
422
423 __JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
424 static tilegx_bundle_bits  jit_y2_dummy(void)
425 {
426         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
427         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
428 }
429
430 static tilegx_bundle_bits  jit_y1_fnop(void)
431 {
432         extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
433         return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
434 }
435
436 __JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
437 static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
438 {
439         extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
440         return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
441                 (~create_SrcA_X1(-1)) &
442                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
443                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
444 }
445
446 __JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
447 static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
448 {
449         extern  tilegx_bundle_bits __unalign_jit_x1_st;
450         return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
451                 create_SrcA_X1(ra) | create_SrcB_X1(rb);
452 }
453
454 __JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
455 static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
456 {
457         extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
458         return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
459                 (~create_SrcA_X1(-1)) &
460                 GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
461                 create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
462 }
463
464 __JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
465 static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
466 {
467         extern  tilegx_bundle_bits __unalign_jit_x1_ld;
468         return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
469                 create_Dest_X1(rd) | create_SrcA_X1(ra);
470 }
471
472 __JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
473 static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
474 {
475         extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
476         return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
477                 (~create_Dest_X1(-1)) &
478                 GX_INSN_X1_MASK) | create_Dest_X1(rd) |
479                 create_SrcA_X1(ra) | create_Imm8_X1(imm8);
480 }
481
482 __JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
483 static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
484 {
485         extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
486         return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
487                 GX_INSN_X0_MASK) |
488                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
489                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
490 }
491
492 __JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
493 static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
494 {
495         extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
496         return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
497                 GX_INSN_X0_MASK) |
498                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
499                 create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
500 }
501
502 __JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
503 static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
504 {
505         extern  tilegx_bundle_bits __unalign_jit_x1_addi;
506         return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
507                 create_Dest_X1(rd) | create_SrcA_X1(ra) |
508                 create_Imm8_X1(imm8);
509 }
510
511 __JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
512 static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
513 {
514         extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
515         return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
516                 GX_INSN_X0_MASK) |
517                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
518                 create_ShAmt_X0(imm6);
519 }
520
521 __JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
522 static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
523 {
524         extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
525         return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
526                 GX_INSN_X0_MASK) |
527                 create_Dest_X0(rd) | create_SrcA_X0(ra) |
528                 create_ShAmt_X0(imm6);
529 }
530
531 __JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
532 static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
533 {
534         extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
535         return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
536                 GX_INSN_X1_MASK) |
537                 create_SrcA_X1(ra) | create_BrOff_X1(broff);
538 }
539
540 #undef __JIT_CODE
541
542 /*
543  * This function generates unalign fixup JIT.
544  *
545  * We first find unalign load/store instruction's destination, source
546  * registers: ra, rb and rd. and 3 scratch registers by calling
547  * find_regs(...). 3 scratch clobbers should not alias with any register
548  * used in the fault bundle. Then analyze the fault bundle to determine
549  * if it's a load or store, operand width, branch or address increment etc.
550  * At last generated JIT is copied into JIT code area in user space.
551  */
552
553 static
554 void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
555                     int align_ctl)
556 {
557         struct thread_info *info = current_thread_info();
558         struct unaligned_jit_fragment frag;
559         struct unaligned_jit_fragment *jit_code_area;
560         tilegx_bundle_bits bundle_2 = 0;
561         /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
562         bool     bundle_2_enable = true;
563         uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
564         /*
565          * Indicate if the unalign access
566          * instruction's registers hit with
567          * others in the same bundle.
568          */
569         bool     alias = false;
570         bool     load_n_store = true;
571         bool     load_store_signed = false;
572         unsigned int  load_store_size = 8;
573         bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
574         int      y1_br_reg = 0;
575         /* True for link operation. i.e. jalr or lnk at Y1 */
576         bool     y1_lr = false;
577         int      y1_lr_reg = 0;
578         bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
579         int      x1_add_imm8 = 0;
580         bool     unexpected = false;
581         int      n = 0, k;
582
583         jit_code_area =
584                 (struct unaligned_jit_fragment *)(info->unalign_jit_base);
585
586         memset((void *)&frag, 0, sizeof(frag));
587
588         /* 0: X mode, Otherwise: Y mode. */
589         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
590                 unsigned int mod, opcode;
591
592                 if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
593                     get_RRROpcodeExtension_Y1(bundle) ==
594                     UNARY_RRR_1_OPCODE_Y1) {
595
596                         opcode = get_UnaryOpcodeExtension_Y1(bundle);
597
598                         /*
599                          * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
600                          * pipeline.
601                          */
602                         switch (opcode) {
603                         case JALR_UNARY_OPCODE_Y1:
604                         case JALRP_UNARY_OPCODE_Y1:
605                                 y1_lr = true;
606                                 y1_lr_reg = 55; /* Link register. */
607                                 /* FALLTHROUGH */
608                         case JR_UNARY_OPCODE_Y1:
609                         case JRP_UNARY_OPCODE_Y1:
610                                 y1_br = true;
611                                 y1_br_reg = get_SrcA_Y1(bundle);
612                                 break;
613                         case LNK_UNARY_OPCODE_Y1:
614                                 /* "lnk" at Y1 pipeline. */
615                                 y1_lr = true;
616                                 y1_lr_reg = get_Dest_Y1(bundle);
617                                 break;
618                         }
619                 }
620
621                 opcode = get_Opcode_Y2(bundle);
622                 mod = get_Mode(bundle);
623
624                 /*
625                  *  bundle_2 is bundle after making Y2 as a dummy operation
626                  *  - ld zero, sp
627                  */
628                 bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
629
630                 /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
631                 if (y1_br || y1_lr) {
632                         bundle_2 &= ~(GX_INSN_Y1_MASK);
633                         bundle_2 |= jit_y1_fnop();
634                 }
635
636                 if (is_y0_y1_nop(bundle_2))
637                         bundle_2_enable = false;
638
639                 if (mod == MODE_OPCODE_YC2) {
640                         /* Store. */
641                         load_n_store = false;
642                         load_store_size = 1 << opcode;
643                         load_store_signed = false;
644                         find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
645                                   &clob3, &alias);
646                         if (load_store_size > 8)
647                                 unexpected = true;
648                 } else {
649                         /* Load. */
650                         load_n_store = true;
651                         if (mod == MODE_OPCODE_YB2) {
652                                 switch (opcode) {
653                                 case LD_OPCODE_Y2:
654                                         load_store_signed = false;
655                                         load_store_size = 8;
656                                         break;
657                                 case LD4S_OPCODE_Y2:
658                                         load_store_signed = true;
659                                         load_store_size = 4;
660                                         break;
661                                 case LD4U_OPCODE_Y2:
662                                         load_store_signed = false;
663                                         load_store_size = 4;
664                                         break;
665                                 default:
666                                         unexpected = true;
667                                 }
668                         } else if (mod == MODE_OPCODE_YA2) {
669                                 if (opcode == LD2S_OPCODE_Y2) {
670                                         load_store_signed = true;
671                                         load_store_size = 2;
672                                 } else if (opcode == LD2U_OPCODE_Y2) {
673                                         load_store_signed = false;
674                                         load_store_size = 2;
675                                 } else
676                                         unexpected = true;
677                         } else
678                                 unexpected = true;
679                         find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
680                                   &clob3, &alias);
681                 }
682         } else {
683                 unsigned int opcode;
684
685                 /* bundle_2 is bundle after making X1 as "fnop". */
686                 bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
687
688                 if (is_x0_x1_nop(bundle_2))
689                         bundle_2_enable = false;
690
691                 if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
692                         opcode = get_UnaryOpcodeExtension_X1(bundle);
693
694                         if (get_RRROpcodeExtension_X1(bundle) ==
695                             UNARY_RRR_0_OPCODE_X1) {
696                                 load_n_store = true;
697                                 find_regs(bundle, &rd, &ra, &rb, &clob1,
698                                           &clob2, &clob3, &alias);
699
700                                 switch (opcode) {
701                                 case LD_UNARY_OPCODE_X1:
702                                         load_store_signed = false;
703                                         load_store_size = 8;
704                                         break;
705                                 case LD4S_UNARY_OPCODE_X1:
706                                         load_store_signed = true;
707                                         /* FALLTHROUGH */
708                                 case LD4U_UNARY_OPCODE_X1:
709                                         load_store_size = 4;
710                                         break;
711
712                                 case LD2S_UNARY_OPCODE_X1:
713                                         load_store_signed = true;
714                                         /* FALLTHROUGH */
715                                 case LD2U_UNARY_OPCODE_X1:
716                                         load_store_size = 2;
717                                         break;
718                                 default:
719                                         unexpected = true;
720                                 }
721                         } else {
722                                 load_n_store = false;
723                                 load_store_signed = false;
724                                 find_regs(bundle, 0, &ra, &rb,
725                                           &clob1, &clob2, &clob3,
726                                           &alias);
727
728                                 opcode = get_RRROpcodeExtension_X1(bundle);
729                                 switch (opcode) {
730                                 case ST_RRR_0_OPCODE_X1:
731                                         load_store_size = 8;
732                                         break;
733                                 case ST4_RRR_0_OPCODE_X1:
734                                         load_store_size = 4;
735                                         break;
736                                 case ST2_RRR_0_OPCODE_X1:
737                                         load_store_size = 2;
738                                         break;
739                                 default:
740                                         unexpected = true;
741                                 }
742                         }
743                 } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
744                         load_n_store = true;
745                         opcode = get_Imm8OpcodeExtension_X1(bundle);
746                         switch (opcode) {
747                         case LD_ADD_IMM8_OPCODE_X1:
748                                 load_store_size = 8;
749                                 break;
750
751                         case LD4S_ADD_IMM8_OPCODE_X1:
752                                 load_store_signed = true;
753                                 /* FALLTHROUGH */
754                         case LD4U_ADD_IMM8_OPCODE_X1:
755                                 load_store_size = 4;
756                                 break;
757
758                         case LD2S_ADD_IMM8_OPCODE_X1:
759                                 load_store_signed = true;
760                                 /* FALLTHROUGH */
761                         case LD2U_ADD_IMM8_OPCODE_X1:
762                                 load_store_size = 2;
763                                 break;
764
765                         case ST_ADD_IMM8_OPCODE_X1:
766                                 load_n_store = false;
767                                 load_store_size = 8;
768                                 break;
769                         case ST4_ADD_IMM8_OPCODE_X1:
770                                 load_n_store = false;
771                                 load_store_size = 4;
772                                 break;
773                         case ST2_ADD_IMM8_OPCODE_X1:
774                                 load_n_store = false;
775                                 load_store_size = 2;
776                                 break;
777                         default:
778                                 unexpected = true;
779                         }
780
781                         if (!unexpected) {
782                                 x1_add = true;
783                                 if (load_n_store)
784                                         x1_add_imm8 = get_Imm8_X1(bundle);
785                                 else
786                                         x1_add_imm8 = get_Dest_Imm8_X1(bundle);
787                         }
788
789                         find_regs(bundle, load_n_store ? (&rd) : NULL,
790                                   &ra, &rb, &clob1, &clob2, &clob3, &alias);
791                 } else
792                         unexpected = true;
793         }
794
795         /*
796          * Some sanity check for register numbers extracted from fault bundle.
797          */
798         if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
799                 unexpected = true;
800
801         /* Give warning if register ra has an aligned address. */
802         if (!unexpected)
803                 WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
804
805
806         /*
807          * Fault came from kernel space, here we only need take care of
808          * unaligned "get_user/put_user" macros defined in "uaccess.h".
809          * Basically, we will handle bundle like this:
810          * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
811          * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
812          * For either load or store, byte-wise operation is performed by calling
813          * get_user() or put_user(). If the macro returns non-zero value,
814          * set the value to rx, otherwise set zero to rx. Finally make pc point
815          * to next bundle and return.
816          */
817
818         if (EX1_PL(regs->ex1) != USER_PL) {
819
820                 unsigned long rx = 0;
821                 unsigned long x = 0, ret = 0;
822
823                 if (y1_br || y1_lr || x1_add ||
824                     (load_store_signed !=
825                      (load_n_store && load_store_size == 4))) {
826                         /* No branch, link, wrong sign-ext or load/store add. */
827                         unexpected = true;
828                 } else if (!unexpected) {
829                         if (bundle & TILEGX_BUNDLE_MODE_MASK) {
830                                 /*
831                                  * Fault bundle is Y mode.
832                                  * Check if the Y1 and Y0 is the form of
833                                  * { movei rx, 0; nop/fnop }, if yes,
834                                  * find the rx.
835                                  */
836
837                                 if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
838                                     && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
839                                     (get_Imm8_Y1(bundle) == 0) &&
840                                     is_bundle_y0_nop(bundle)) {
841                                         rx = get_Dest_Y1(bundle);
842                                 } else if ((get_Opcode_Y0(bundle) ==
843                                             ADDI_OPCODE_Y0) &&
844                                            (get_SrcA_Y0(bundle) == TREG_ZERO) &&
845                                            (get_Imm8_Y0(bundle) == 0) &&
846                                            is_bundle_y1_nop(bundle)) {
847                                         rx = get_Dest_Y0(bundle);
848                                 } else {
849                                         unexpected = true;
850                                 }
851                         } else {
852                                 /*
853                                  * Fault bundle is X mode.
854                                  * Check if the X0 is 'movei rx, 0',
855                                  * if yes, find the rx.
856                                  */
857
858                                 if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
859                                     && (get_Imm8OpcodeExtension_X0(bundle) ==
860                                         ADDI_IMM8_OPCODE_X0) &&
861                                     (get_SrcA_X0(bundle) == TREG_ZERO) &&
862                                     (get_Imm8_X0(bundle) == 0)) {
863                                         rx = get_Dest_X0(bundle);
864                                 } else {
865                                         unexpected = true;
866                                 }
867                         }
868
869                         /* rx should be less than 56. */
870                         if (!unexpected && (rx >= 56))
871                                 unexpected = true;
872                 }
873
874                 if (!search_exception_tables(regs->pc)) {
875                         /* No fixup in the exception tables for the pc. */
876                         unexpected = true;
877                 }
878
879                 if (unexpected) {
880                         /* Unexpected unalign kernel fault. */
881                         struct task_struct *tsk = validate_current();
882
883                         bust_spinlocks(1);
884
885                         show_regs(regs);
886
887                         if (unlikely(tsk->pid < 2)) {
888                                 panic("Kernel unalign fault running %s!",
889                                       tsk->pid ? "init" : "the idle task");
890                         }
891 #ifdef SUPPORT_DIE
892                         die("Oops", regs);
893 #endif
894                         bust_spinlocks(1);
895
896                         do_group_exit(SIGKILL);
897
898                 } else {
899                         unsigned long i, b = 0;
900                         unsigned char *ptr =
901                                 (unsigned char *)regs->regs[ra];
902                         if (load_n_store) {
903                                 /* handle get_user(x, ptr) */
904                                 for (i = 0; i < load_store_size; i++) {
905                                         ret = get_user(b, ptr++);
906                                         if (!ret) {
907                                                 /* Success! update x. */
908 #ifdef __LITTLE_ENDIAN
909                                                 x |= (b << (8 * i));
910 #else
911                                                 x <<= 8;
912                                                 x |= b;
913 #endif /* __LITTLE_ENDIAN */
914                                         } else {
915                                                 x = 0;
916                                                 break;
917                                         }
918                                 }
919
920                                 /* Sign-extend 4-byte loads. */
921                                 if (load_store_size == 4)
922                                         x = (long)(int)x;
923
924                                 /* Set register rd. */
925                                 regs->regs[rd] = x;
926
927                                 /* Set register rx. */
928                                 regs->regs[rx] = ret;
929
930                                 /* Bump pc. */
931                                 regs->pc += 8;
932
933                         } else {
934                                 /* Handle put_user(x, ptr) */
935                                 x = regs->regs[rb];
936 #ifdef __LITTLE_ENDIAN
937                                 b = x;
938 #else
939                                 /*
940                                  * Swap x in order to store x from low
941                                  * to high memory same as the
942                                  * little-endian case.
943                                  */
944                                 switch (load_store_size) {
945                                 case 8:
946                                         b = swab64(x);
947                                         break;
948                                 case 4:
949                                         b = swab32(x);
950                                         break;
951                                 case 2:
952                                         b = swab16(x);
953                                         break;
954                                 }
955 #endif /* __LITTLE_ENDIAN */
956                                 for (i = 0; i < load_store_size; i++) {
957                                         ret = put_user(b, ptr++);
958                                         if (ret)
959                                                 break;
960                                         /* Success! shift 1 byte. */
961                                         b >>= 8;
962                                 }
963                                 /* Set register rx. */
964                                 regs->regs[rx] = ret;
965
966                                 /* Bump pc. */
967                                 regs->pc += 8;
968                         }
969                 }
970
971                 unaligned_fixup_count++;
972
973                 if (unaligned_printk) {
974                         pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
975                                 current->comm, current->pid, regs->regs[ra]);
976                 }
977
978                 /* Done! Return to the exception handler. */
979                 return;
980         }
981
982         if ((align_ctl == 0) || unexpected) {
983                 siginfo_t info = {
984                         .si_signo = SIGBUS,
985                         .si_code = BUS_ADRALN,
986                         .si_addr = (unsigned char __user *)0
987                 };
988                 if (unaligned_printk)
989                         pr_info("Unalign bundle: unexp @%llx, %llx\n",
990                                 (unsigned long long)regs->pc,
991                                 (unsigned long long)bundle);
992
993                 if (ra < 56) {
994                         unsigned long uaa = (unsigned long)regs->regs[ra];
995                         /* Set bus Address. */
996                         info.si_addr = (unsigned char __user *)uaa;
997                 }
998
999                 unaligned_fixup_count++;
1000
1001                 trace_unhandled_signal("unaligned fixup trap", regs,
1002                                        (unsigned long)info.si_addr, SIGBUS);
1003                 force_sig_info(info.si_signo, &info, current);
1004                 return;
1005         }
1006
1007 #ifdef __LITTLE_ENDIAN
1008 #define UA_FIXUP_ADDR_DELTA          1
1009 #define UA_FIXUP_BFEXT_START(_B_)    0
1010 #define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1011 #else /* __BIG_ENDIAN */
1012 #define UA_FIXUP_ADDR_DELTA          -1
1013 #define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1014 #define UA_FIXUP_BFEXT_END(_B_)      63
1015 #endif /* __LITTLE_ENDIAN */
1016
1017
1018
1019         if ((ra != rb) && (rd != TREG_SP) && !alias &&
1020             !y1_br && !y1_lr && !x1_add) {
1021                 /*
1022                  * Simple case: ra != rb and no register alias found,
1023                  * and no branch or link. This will be the majority.
1024                  * We can do a little better for simplae case than the
1025                  * generic scheme below.
1026                  */
1027                 if (!load_n_store) {
1028                         /*
1029                          * Simple store: ra != rb, no need for scratch register.
1030                          * Just store and rotate to right bytewise.
1031                          */
1032 #ifdef __BIG_ENDIAN
1033                         frag.insn[n++] =
1034                                 jit_x0_addi(ra, ra, load_store_size - 1) |
1035                                 jit_x1_fnop();
1036 #endif /* __BIG_ENDIAN */
1037                         for (k = 0; k < load_store_size; k++) {
1038                                 /* Store a byte. */
1039                                 frag.insn[n++] =
1040                                         jit_x0_rotli(rb, rb, 56) |
1041                                         jit_x1_st1_add(ra, rb,
1042                                                        UA_FIXUP_ADDR_DELTA);
1043                         }
1044 #ifdef __BIG_ENDIAN
1045                         frag.insn[n] = jit_x1_addi(ra, ra, 1);
1046 #else
1047                         frag.insn[n] = jit_x1_addi(ra, ra,
1048                                                    -1 * load_store_size);
1049 #endif /* __LITTLE_ENDIAN */
1050
1051                         if (load_store_size == 8) {
1052                                 frag.insn[n] |= jit_x0_fnop();
1053                         } else if (load_store_size == 4) {
1054                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1055                         } else { /* = 2 */
1056                                 frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1057                         }
1058                         n++;
1059                         if (bundle_2_enable)
1060                                 frag.insn[n++] = bundle_2;
1061                         frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1062                 } else {
1063                         if (rd == ra) {
1064                                 /* Use two clobber registers: clob1/2. */
1065                                 frag.insn[n++] =
1066                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1067                                         jit_x1_fnop();
1068                                 frag.insn[n++] =
1069                                         jit_x0_addi(clob1, ra, 7) |
1070                                         jit_x1_st_add(TREG_SP, clob1, -8);
1071                                 frag.insn[n++] =
1072                                         jit_x0_addi(clob2, ra, 0) |
1073                                         jit_x1_st(TREG_SP, clob2);
1074                                 frag.insn[n++] =
1075                                         jit_x0_fnop() |
1076                                         jit_x1_ldna(rd, ra);
1077                                 frag.insn[n++] =
1078                                         jit_x0_fnop() |
1079                                         jit_x1_ldna(clob1, clob1);
1080                                 /*
1081                                  * Note: we must make sure that rd must not
1082                                  * be sp. Recover clob1/2 from stack.
1083                                  */
1084                                 frag.insn[n++] =
1085                                         jit_x0_dblalign(rd, clob1, clob2) |
1086                                         jit_x1_ld_add(clob2, TREG_SP, 8);
1087                                 frag.insn[n++] =
1088                                         jit_x0_fnop() |
1089                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1090                         } else {
1091                                 /* Use one clobber register: clob1 only. */
1092                                 frag.insn[n++] =
1093                                         jit_x0_addi(TREG_SP, TREG_SP, -16) |
1094                                         jit_x1_fnop();
1095                                 frag.insn[n++] =
1096                                         jit_x0_addi(clob1, ra, 7) |
1097                                         jit_x1_st(TREG_SP, clob1);
1098                                 frag.insn[n++] =
1099                                         jit_x0_fnop() |
1100                                         jit_x1_ldna(rd, ra);
1101                                 frag.insn[n++] =
1102                                         jit_x0_fnop() |
1103                                         jit_x1_ldna(clob1, clob1);
1104                                 /*
1105                                  * Note: we must make sure that rd must not
1106                                  * be sp. Recover clob1 from stack.
1107                                  */
1108                                 frag.insn[n++] =
1109                                         jit_x0_dblalign(rd, clob1, ra) |
1110                                         jit_x1_ld_add(clob1, TREG_SP, 16);
1111                         }
1112
1113                         if (bundle_2_enable)
1114                                 frag.insn[n++] = bundle_2;
1115                         /*
1116                          * For non 8-byte load, extract corresponding bytes and
1117                          * signed extension.
1118                          */
1119                         if (load_store_size == 4) {
1120                                 if (load_store_signed)
1121                                         frag.insn[n++] =
1122                                                 jit_x0_bfexts(
1123                                                         rd, rd,
1124                                                         UA_FIXUP_BFEXT_START(4),
1125                                                         UA_FIXUP_BFEXT_END(4)) |
1126                                                 jit_x1_fnop();
1127                                 else
1128                                         frag.insn[n++] =
1129                                                 jit_x0_bfextu(
1130                                                         rd, rd,
1131                                                         UA_FIXUP_BFEXT_START(4),
1132                                                         UA_FIXUP_BFEXT_END(4)) |
1133                                                 jit_x1_fnop();
1134                         } else if (load_store_size == 2) {
1135                                 if (load_store_signed)
1136                                         frag.insn[n++] =
1137                                                 jit_x0_bfexts(
1138                                                         rd, rd,
1139                                                         UA_FIXUP_BFEXT_START(2),
1140                                                         UA_FIXUP_BFEXT_END(2)) |
1141                                                 jit_x1_fnop();
1142                                 else
1143                                         frag.insn[n++] =
1144                                                 jit_x0_bfextu(
1145                                                         rd, rd,
1146                                                         UA_FIXUP_BFEXT_START(2),
1147                                                         UA_FIXUP_BFEXT_END(2)) |
1148                                                 jit_x1_fnop();
1149                         }
1150
1151                         frag.insn[n++] =
1152                                 jit_x0_fnop()  |
1153                                 jit_x1_iret();
1154                 }
1155         } else if (!load_n_store) {
1156
1157                 /*
1158                  * Generic memory store cases: use 3 clobber registers.
1159                  *
1160                  * Alloc space for saveing clob2,1,3 on user's stack.
1161                  * register clob3 points to where clob2 saved, followed by
1162                  * clob1 and 3 from high to low memory.
1163                  */
1164                 frag.insn[n++] =
1165                         jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1166                         jit_x1_fnop();
1167                 frag.insn[n++] =
1168                         jit_x0_addi(clob3, TREG_SP, 16)  |
1169                         jit_x1_st_add(TREG_SP, clob3, 8);
1170 #ifdef __LITTLE_ENDIAN
1171                 frag.insn[n++] =
1172                         jit_x0_addi(clob1, ra, 0)   |
1173                         jit_x1_st_add(TREG_SP, clob1, 8);
1174 #else
1175                 frag.insn[n++] =
1176                         jit_x0_addi(clob1, ra, load_store_size - 1)   |
1177                         jit_x1_st_add(TREG_SP, clob1, 8);
1178 #endif
1179                 if (load_store_size == 8) {
1180                         /*
1181                          * We save one byte a time, not for fast, but compact
1182                          * code. After each store, data source register shift
1183                          * right one byte. unchanged after 8 stores.
1184                          */
1185                         frag.insn[n++] =
1186                                 jit_x0_addi(clob2, TREG_ZERO, 7)     |
1187                                 jit_x1_st_add(TREG_SP, clob2, 16);
1188                         frag.insn[n++] =
1189                                 jit_x0_rotli(rb, rb, 56)      |
1190                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1191                         frag.insn[n++] =
1192                                 jit_x0_addi(clob2, clob2, -1) |
1193                                 jit_x1_bnezt(clob2, -1);
1194                         frag.insn[n++] =
1195                                 jit_x0_fnop()                 |
1196                                 jit_x1_addi(clob2, y1_br_reg, 0);
1197                 } else if (load_store_size == 4) {
1198                         frag.insn[n++] =
1199                                 jit_x0_addi(clob2, TREG_ZERO, 3)     |
1200                                 jit_x1_st_add(TREG_SP, clob2, 16);
1201                         frag.insn[n++] =
1202                                 jit_x0_rotli(rb, rb, 56)      |
1203                                 jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1204                         frag.insn[n++] =
1205                                 jit_x0_addi(clob2, clob2, -1) |
1206                                 jit_x1_bnezt(clob2, -1);
1207                         /*
1208                          * same as 8-byte case, but need shift another 4
1209                          * byte to recover rb for 4-byte store.
1210                          */
1211                         frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1212                                 jit_x1_addi(clob2, y1_br_reg, 0);
1213                 } else { /* =2 */
1214                         frag.insn[n++] =
1215                                 jit_x0_addi(clob2, rb, 0)     |
1216                                 jit_x1_st_add(TREG_SP, clob2, 16);
1217                         for (k = 0; k < 2; k++) {
1218                                 frag.insn[n++] =
1219                                         jit_x0_shrui(rb, rb, 8)  |
1220                                         jit_x1_st1_add(clob1, rb,
1221                                                        UA_FIXUP_ADDR_DELTA);
1222                         }
1223                         frag.insn[n++] =
1224                                 jit_x0_addi(rb, clob2, 0)       |
1225                                 jit_x1_addi(clob2, y1_br_reg, 0);
1226                 }
1227
1228                 if (bundle_2_enable)
1229                         frag.insn[n++] = bundle_2;
1230
1231                 if (y1_lr) {
1232                         frag.insn[n++] =
1233                                 jit_x0_fnop()                    |
1234                                 jit_x1_mfspr(y1_lr_reg,
1235                                              SPR_EX_CONTEXT_0_0);
1236                 }
1237                 if (y1_br) {
1238                         frag.insn[n++] =
1239                                 jit_x0_fnop()                    |
1240                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1241                                              clob2);
1242                 }
1243                 if (x1_add) {
1244                         frag.insn[n++] =
1245                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1246                                 jit_x1_ld_add(clob2, clob3, -8);
1247                 } else {
1248                         frag.insn[n++] =
1249                                 jit_x0_fnop()                    |
1250                                 jit_x1_ld_add(clob2, clob3, -8);
1251                 }
1252                 frag.insn[n++] =
1253                         jit_x0_fnop()   |
1254                         jit_x1_ld_add(clob1, clob3, -8);
1255                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1256                 frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1257
1258         } else {
1259                 /*
1260                  * Generic memory load cases.
1261                  *
1262                  * Alloc space for saveing clob1,2,3 on user's stack.
1263                  * register clob3 points to where clob1 saved, followed
1264                  * by clob2 and 3 from high to low memory.
1265                  */
1266
1267                 frag.insn[n++] =
1268                         jit_x0_addi(TREG_SP, TREG_SP, -32) |
1269                         jit_x1_fnop();
1270                 frag.insn[n++] =
1271                         jit_x0_addi(clob3, TREG_SP, 16) |
1272                         jit_x1_st_add(TREG_SP, clob3, 8);
1273                 frag.insn[n++] =
1274                         jit_x0_addi(clob2, ra, 0) |
1275                         jit_x1_st_add(TREG_SP, clob2, 8);
1276
1277                 if (y1_br) {
1278                         frag.insn[n++] =
1279                                 jit_x0_addi(clob1, y1_br_reg, 0) |
1280                                 jit_x1_st_add(TREG_SP, clob1, 16);
1281                 } else {
1282                         frag.insn[n++] =
1283                                 jit_x0_fnop() |
1284                                 jit_x1_st_add(TREG_SP, clob1, 16);
1285                 }
1286
1287                 if (bundle_2_enable)
1288                         frag.insn[n++] = bundle_2;
1289
1290                 if (y1_lr) {
1291                         frag.insn[n++] =
1292                                 jit_x0_fnop()  |
1293                                 jit_x1_mfspr(y1_lr_reg,
1294                                              SPR_EX_CONTEXT_0_0);
1295                 }
1296
1297                 if (y1_br) {
1298                         frag.insn[n++] =
1299                                 jit_x0_fnop() |
1300                                 jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1301                                              clob1);
1302                 }
1303
1304                 frag.insn[n++] =
1305                         jit_x0_addi(clob1, clob2, 7)      |
1306                         jit_x1_ldna(rd, clob2);
1307                 frag.insn[n++] =
1308                         jit_x0_fnop()                     |
1309                         jit_x1_ldna(clob1, clob1);
1310                 frag.insn[n++] =
1311                         jit_x0_dblalign(rd, clob1, clob2) |
1312                         jit_x1_ld_add(clob1, clob3, -8);
1313                 if (x1_add) {
1314                         frag.insn[n++] =
1315                                 jit_x0_addi(ra, ra, x1_add_imm8) |
1316                                 jit_x1_ld_add(clob2, clob3, -8);
1317                 } else {
1318                         frag.insn[n++] =
1319                                 jit_x0_fnop()  |
1320                                 jit_x1_ld_add(clob2, clob3, -8);
1321                 }
1322
1323                 frag.insn[n++] =
1324                         jit_x0_fnop() |
1325                         jit_x1_ld(clob3, clob3);
1326
1327                 if (load_store_size == 4) {
1328                         if (load_store_signed)
1329                                 frag.insn[n++] =
1330                                         jit_x0_bfexts(
1331                                                 rd, rd,
1332                                                 UA_FIXUP_BFEXT_START(4),
1333                                                 UA_FIXUP_BFEXT_END(4)) |
1334                                         jit_x1_fnop();
1335                         else
1336                                 frag.insn[n++] =
1337                                         jit_x0_bfextu(
1338                                                 rd, rd,
1339                                                 UA_FIXUP_BFEXT_START(4),
1340                                                 UA_FIXUP_BFEXT_END(4)) |
1341                                         jit_x1_fnop();
1342                 } else if (load_store_size == 2) {
1343                         if (load_store_signed)
1344                                 frag.insn[n++] =
1345                                         jit_x0_bfexts(
1346                                                 rd, rd,
1347                                                 UA_FIXUP_BFEXT_START(2),
1348                                                 UA_FIXUP_BFEXT_END(2)) |
1349                                         jit_x1_fnop();
1350                         else
1351                                 frag.insn[n++] =
1352                                         jit_x0_bfextu(
1353                                                 rd, rd,
1354                                                 UA_FIXUP_BFEXT_START(2),
1355                                                 UA_FIXUP_BFEXT_END(2)) |
1356                                         jit_x1_fnop();
1357                 }
1358
1359                 frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1360         }
1361
1362         /* Max JIT bundle count is 14. */
1363         WARN_ON(n > 14);
1364
1365         if (!unexpected) {
1366                 int status = 0;
1367                 int idx = (regs->pc >> 3) &
1368                         ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1369
1370                 frag.pc = regs->pc;
1371                 frag.bundle = bundle;
1372
1373                 if (unaligned_printk) {
1374                         pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1375                                 current->comm, current->pid,
1376                                 (unsigned long)frag.pc,
1377                                 (unsigned long)frag.bundle,
1378                                 (int)alias, (int)rd, (int)ra,
1379                                 (int)rb, (int)bundle_2_enable,
1380                                 (int)y1_lr, (int)y1_br, (int)x1_add);
1381
1382                         for (k = 0; k < n; k += 2)
1383                                 pr_info("[%d] %016llx %016llx\n",
1384                                         k, (unsigned long long)frag.insn[k],
1385                                         (unsigned long long)frag.insn[k+1]);
1386                 }
1387
1388                 /* Swap bundle byte order for big endian sys. */
1389 #ifdef __BIG_ENDIAN
1390                 frag.bundle = GX_INSN_BSWAP(frag.bundle);
1391                 for (k = 0; k < n; k++)
1392                         frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1393 #endif /* __BIG_ENDIAN */
1394
1395                 status = copy_to_user((void __user *)&jit_code_area[idx],
1396                                       &frag, sizeof(frag));
1397                 if (status) {
1398                         /* Fail to copy JIT into user land. send SIGSEGV. */
1399                         siginfo_t info = {
1400                                 .si_signo = SIGSEGV,
1401                                 .si_code = SEGV_MAPERR,
1402                                 .si_addr = (void __user *)&jit_code_area[idx]
1403                         };
1404
1405                         pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1406                                 current->pid, current->comm,
1407                                 (unsigned long long)&jit_code_area[idx]);
1408
1409                         trace_unhandled_signal("segfault in unalign fixup",
1410                                                regs,
1411                                                (unsigned long)info.si_addr,
1412                                                SIGSEGV);
1413                         force_sig_info(info.si_signo, &info, current);
1414                         return;
1415                 }
1416
1417
1418                 /* Do a cheaper increment, not accurate. */
1419                 unaligned_fixup_count++;
1420                 __flush_icache_range((unsigned long)&jit_code_area[idx],
1421                                      (unsigned long)&jit_code_area[idx] +
1422                                      sizeof(frag));
1423
1424                 /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1425                 __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1426                 __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1427
1428                 /* Modify pc at the start of new JIT. */
1429                 regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1430                 /* Set ICS in SPR_EX_CONTEXT_K_1. */
1431                 regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1432         }
1433 }
1434
1435
1436 /*
1437  * C function to generate unalign data JIT. Called from unalign data
1438  * interrupt handler.
1439  *
1440  * First check if unalign fix is disabled or exception did not not come from
1441  * user space or sp register points to unalign address, if true, generate a
1442  * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1443  * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1444  * back to exception handler.
1445  *
1446  * The exception handler will "iret" to new generated JIT code after
1447  * restoring caller saved registers. In theory, the JIT code will perform
1448  * another "iret" to resume user's program.
1449  */
1450
1451 void do_unaligned(struct pt_regs *regs, int vecnum)
1452 {
1453         tilegx_bundle_bits __user  *pc;
1454         tilegx_bundle_bits bundle;
1455         struct thread_info *info = current_thread_info();
1456         int align_ctl;
1457
1458         /* Checks the per-process unaligned JIT flags */
1459         align_ctl = unaligned_fixup;
1460         switch (task_thread_info(current)->align_ctl) {
1461         case PR_UNALIGN_NOPRINT:
1462                 align_ctl = 1;
1463                 break;
1464         case PR_UNALIGN_SIGBUS:
1465                 align_ctl = 0;
1466                 break;
1467         }
1468
1469         /* Enable iterrupt in order to access user land. */
1470         local_irq_enable();
1471
1472         /*
1473          * The fault came from kernel space. Two choices:
1474          * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1475          *     to return -EFAULT. If no fixup, simply panic the kernel.
1476          * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1477          *     if it was triggered by get_user/put_user() macros. Panic the
1478          *     kernel if it is not fixable.
1479          */
1480
1481         if (EX1_PL(regs->ex1) != USER_PL) {
1482
1483                 if (align_ctl < 1) {
1484                         unaligned_fixup_count++;
1485                         /* If exception came from kernel, try fix it up. */
1486                         if (fixup_exception(regs)) {
1487                                 if (unaligned_printk)
1488                                         pr_info("Unalign fixup: %d %llx @%llx\n",
1489                                                 (int)unaligned_fixup,
1490                                                 (unsigned long long)regs->ex1,
1491                                                 (unsigned long long)regs->pc);
1492                         } else {
1493                                 /* Not fixable. Go panic. */
1494                                 panic("Unalign exception in Kernel. pc=%lx",
1495                                       regs->pc);
1496                         }
1497                 } else {
1498                         /*
1499                          * Try to fix the exception. If we can't, panic the
1500                          * kernel.
1501                          */
1502                         bundle = GX_INSN_BSWAP(
1503                                 *((tilegx_bundle_bits *)(regs->pc)));
1504                         jit_bundle_gen(regs, bundle, align_ctl);
1505                 }
1506                 return;
1507         }
1508
1509         /*
1510          * Fault came from user with ICS or stack is not aligned.
1511          * If so, we will trigger SIGBUS.
1512          */
1513         if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1514                 siginfo_t info = {
1515                         .si_signo = SIGBUS,
1516                         .si_code = BUS_ADRALN,
1517                         .si_addr = (unsigned char __user *)0
1518                 };
1519
1520                 if (unaligned_printk)
1521                         pr_info("Unalign fixup: %d %llx @%llx\n",
1522                                 (int)unaligned_fixup,
1523                                 (unsigned long long)regs->ex1,
1524                                 (unsigned long long)regs->pc);
1525
1526                 unaligned_fixup_count++;
1527
1528                 trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1529                 force_sig_info(info.si_signo, &info, current);
1530                 return;
1531         }
1532
1533
1534         /* Read the bundle caused the exception! */
1535         pc = (tilegx_bundle_bits __user *)(regs->pc);
1536         if (get_user(bundle, pc) != 0) {
1537                 /* Probably never be here since pc is valid user address.*/
1538                 siginfo_t info = {
1539                         .si_signo = SIGSEGV,
1540                         .si_code = SEGV_MAPERR,
1541                         .si_addr = (void __user *)pc
1542                 };
1543                 pr_err("Couldn't read instruction at %p trying to step\n", pc);
1544                 trace_unhandled_signal("segfault in unalign fixup", regs,
1545                                        (unsigned long)info.si_addr, SIGSEGV);
1546                 force_sig_info(info.si_signo, &info, current);
1547                 return;
1548         }
1549
1550         if (!info->unalign_jit_base) {
1551                 void __user *user_page;
1552
1553                 /*
1554                  * Allocate a page in userland.
1555                  * For 64-bit processes we try to place the mapping far
1556                  * from anything else that might be going on (specifically
1557                  * 64 GB below the top of the user address space).  If it
1558                  * happens not to be possible to put it there, it's OK;
1559                  * the kernel will choose another location and we'll
1560                  * remember it for later.
1561                  */
1562                 if (is_compat_task())
1563                         user_page = NULL;
1564                 else
1565                         user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1566                                 (current->pid << PAGE_SHIFT);
1567
1568                 user_page = (void __user *) vm_mmap(NULL,
1569                                                     (unsigned long)user_page,
1570                                                     PAGE_SIZE,
1571                                                     PROT_EXEC | PROT_READ |
1572                                                     PROT_WRITE,
1573 #ifdef CONFIG_HOMECACHE
1574                                                     MAP_CACHE_HOME_TASK |
1575 #endif
1576                                                     MAP_PRIVATE |
1577                                                     MAP_ANONYMOUS,
1578                                                     0);
1579
1580                 if (IS_ERR((void __force *)user_page)) {
1581                         pr_err("Out of kernel pages trying do_mmap\n");
1582                         return;
1583                 }
1584
1585                 /* Save the address in the thread_info struct */
1586                 info->unalign_jit_base = user_page;
1587                 if (unaligned_printk)
1588                         pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1589                                 raw_smp_processor_id(), current->pid,
1590                                 (unsigned long long)user_page);
1591         }
1592
1593         /* Generate unalign JIT */
1594         jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1595 }
1596
1597 #endif /* __tilegx__ */