GNU Linux-libre 6.7.9-gnu
[releases.git] / drivers / crypto / vmx / aesp8-ppc.pl
1 #! /usr/bin/env perl
2 # SPDX-License-Identifier: GPL-2.0
3
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
8 # Foundation.
9 #
10 # [1] https://www.openssl.org/~appro/cryptogams/
11
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
14 #
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
17 # are met:
18 #
19 #       * Redistributions of source code must retain copyright notices,
20 #         this list of conditions and the following disclaimer.
21 #
22 #       * Redistributions in binary form must reproduce the above
23 #         copyright notice, this list of conditions and the following
24 #         disclaimer in the documentation and/or other materials
25 #         provided with the distribution.
26 #
27 #       * Neither the name of the CRYPTOGAMS nor the names of its
28 #         copyright holder and contributors may be used to endorse or
29 #         promote products derived from this software without specific
30 #         prior written permission.
31 #
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35 # those given above.
36 #
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see https://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
55 #
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
69 #
70 # May 2016
71 #
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
74 #
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
78 #
79 #               CBC en-/decrypt CTR     XTS
80 # POWER8[le]    3.96/0.72       0.74    1.1
81 # POWER8[be]    3.75/0.65       0.66    1.0
82
83 $flavour = shift;
84
85 if ($flavour =~ /64/) {
86         $SIZE_T =8;
87         $LRSAVE =2*$SIZE_T;
88         $STU    ="stdu";
89         $POP    ="ld";
90         $PUSH   ="std";
91         $UCMP   ="cmpld";
92         $SHL    ="sldi";
93 } elsif ($flavour =~ /32/) {
94         $SIZE_T =4;
95         $LRSAVE =$SIZE_T;
96         $STU    ="stwu";
97         $POP    ="lwz";
98         $PUSH   ="stw";
99         $UCMP   ="cmplw";
100         $SHL    ="slwi";
101 } else { die "nonsense $flavour"; }
102
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
109
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112 $FRAME=8*$SIZE_T;
113 $prefix="aes_p8";
114
115 $sp="r1";
116 $vrsave="r12";
117
118 #########################################################################
119 {{{     # Key setup procedures                                          #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124 $code.=<<___;
125 .machine        "any"
126
127 .text
128
129 .align  7
130 rcon:
131 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
132 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
133 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
134 .long   0,0,0,0                                         ?asis
135 .long   0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
136 Lconsts:
137         mflr    r0
138         bcl     20,31,\$+4
139         mflr    $ptr     #vvvvv "distance between . and rcon
140         addi    $ptr,$ptr,-0x58
141         mtlr    r0
142         blr
143         .long   0
144         .byte   0,12,0x14,0,0,0,0,0
145 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146
147 .globl  .${prefix}_set_encrypt_key
148 Lset_encrypt_key:
149         mflr            r11
150         $PUSH           r11,$LRSAVE($sp)
151
152         li              $ptr,-1
153         ${UCMP}i        $inp,0
154         beq-            Lenc_key_abort          # if ($inp==0) return -1;
155         ${UCMP}i        $out,0
156         beq-            Lenc_key_abort          # if ($out==0) return -1;
157         li              $ptr,-2
158         cmpwi           $bits,128
159         blt-            Lenc_key_abort
160         cmpwi           $bits,256
161         bgt-            Lenc_key_abort
162         andi.           r0,$bits,0x3f
163         bne-            Lenc_key_abort
164
165         lis             r0,0xfff0
166         mfspr           $vrsave,256
167         mtspr           256,r0
168
169         bl              Lconsts
170         mtlr            r11
171
172         neg             r9,$inp
173         lvx             $in0,0,$inp
174         addi            $inp,$inp,15            # 15 is not typo
175         lvsr            $key,0,r9               # borrow $key
176         li              r8,0x20
177         cmpwi           $bits,192
178         lvx             $in1,0,$inp
179         le?vspltisb     $mask,0x0f              # borrow $mask
180         lvx             $rcon,0,$ptr
181         le?vxor         $key,$key,$mask         # adjust for byte swap
182         lvx             $mask,r8,$ptr
183         addi            $ptr,$ptr,0x10
184         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
185         li              $cnt,8
186         vxor            $zero,$zero,$zero
187         mtctr           $cnt
188
189         ?lvsr           $outperm,0,$out
190         vspltisb        $outmask,-1
191         lvx             $outhead,0,$out
192         ?vperm          $outmask,$zero,$outmask,$outperm
193
194         blt             Loop128
195         addi            $inp,$inp,8
196         beq             L192
197         addi            $inp,$inp,8
198         b               L256
199
200 .align  4
201 Loop128:
202         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
203         vsldoi          $tmp,$zero,$in0,12      # >>32
204          vperm          $outtail,$in0,$in0,$outperm     # rotate
205          vsel           $stage,$outhead,$outtail,$outmask
206          vmr            $outhead,$outtail
207         vcipherlast     $key,$key,$rcon
208          stvx           $stage,0,$out
209          addi           $out,$out,16
210
211         vxor            $in0,$in0,$tmp
212         vsldoi          $tmp,$zero,$tmp,12      # >>32
213         vxor            $in0,$in0,$tmp
214         vsldoi          $tmp,$zero,$tmp,12      # >>32
215         vxor            $in0,$in0,$tmp
216          vadduwm        $rcon,$rcon,$rcon
217         vxor            $in0,$in0,$key
218         bdnz            Loop128
219
220         lvx             $rcon,0,$ptr            # last two round keys
221
222         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
223         vsldoi          $tmp,$zero,$in0,12      # >>32
224          vperm          $outtail,$in0,$in0,$outperm     # rotate
225          vsel           $stage,$outhead,$outtail,$outmask
226          vmr            $outhead,$outtail
227         vcipherlast     $key,$key,$rcon
228          stvx           $stage,0,$out
229          addi           $out,$out,16
230
231         vxor            $in0,$in0,$tmp
232         vsldoi          $tmp,$zero,$tmp,12      # >>32
233         vxor            $in0,$in0,$tmp
234         vsldoi          $tmp,$zero,$tmp,12      # >>32
235         vxor            $in0,$in0,$tmp
236          vadduwm        $rcon,$rcon,$rcon
237         vxor            $in0,$in0,$key
238
239         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
240         vsldoi          $tmp,$zero,$in0,12      # >>32
241          vperm          $outtail,$in0,$in0,$outperm     # rotate
242          vsel           $stage,$outhead,$outtail,$outmask
243          vmr            $outhead,$outtail
244         vcipherlast     $key,$key,$rcon
245          stvx           $stage,0,$out
246          addi           $out,$out,16
247
248         vxor            $in0,$in0,$tmp
249         vsldoi          $tmp,$zero,$tmp,12      # >>32
250         vxor            $in0,$in0,$tmp
251         vsldoi          $tmp,$zero,$tmp,12      # >>32
252         vxor            $in0,$in0,$tmp
253         vxor            $in0,$in0,$key
254          vperm          $outtail,$in0,$in0,$outperm     # rotate
255          vsel           $stage,$outhead,$outtail,$outmask
256          vmr            $outhead,$outtail
257          stvx           $stage,0,$out
258
259         addi            $inp,$out,15            # 15 is not typo
260         addi            $out,$out,0x50
261
262         li              $rounds,10
263         b               Ldone
264
265 .align  4
266 L192:
267         lvx             $tmp,0,$inp
268         li              $cnt,4
269          vperm          $outtail,$in0,$in0,$outperm     # rotate
270          vsel           $stage,$outhead,$outtail,$outmask
271          vmr            $outhead,$outtail
272          stvx           $stage,0,$out
273          addi           $out,$out,16
274         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
275         vspltisb        $key,8                  # borrow $key
276         mtctr           $cnt
277         vsububm         $mask,$mask,$key        # adjust the mask
278
279 Loop192:
280         vperm           $key,$in1,$in1,$mask    # roate-n-splat
281         vsldoi          $tmp,$zero,$in0,12      # >>32
282         vcipherlast     $key,$key,$rcon
283
284         vxor            $in0,$in0,$tmp
285         vsldoi          $tmp,$zero,$tmp,12      # >>32
286         vxor            $in0,$in0,$tmp
287         vsldoi          $tmp,$zero,$tmp,12      # >>32
288         vxor            $in0,$in0,$tmp
289
290          vsldoi         $stage,$zero,$in1,8
291         vspltw          $tmp,$in0,3
292         vxor            $tmp,$tmp,$in1
293         vsldoi          $in1,$zero,$in1,12      # >>32
294          vadduwm        $rcon,$rcon,$rcon
295         vxor            $in1,$in1,$tmp
296         vxor            $in0,$in0,$key
297         vxor            $in1,$in1,$key
298          vsldoi         $stage,$stage,$in0,8
299
300         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
301         vsldoi          $tmp,$zero,$in0,12      # >>32
302          vperm          $outtail,$stage,$stage,$outperm # rotate
303          vsel           $stage,$outhead,$outtail,$outmask
304          vmr            $outhead,$outtail
305         vcipherlast     $key,$key,$rcon
306          stvx           $stage,0,$out
307          addi           $out,$out,16
308
309          vsldoi         $stage,$in0,$in1,8
310         vxor            $in0,$in0,$tmp
311         vsldoi          $tmp,$zero,$tmp,12      # >>32
312          vperm          $outtail,$stage,$stage,$outperm # rotate
313          vsel           $stage,$outhead,$outtail,$outmask
314          vmr            $outhead,$outtail
315         vxor            $in0,$in0,$tmp
316         vsldoi          $tmp,$zero,$tmp,12      # >>32
317         vxor            $in0,$in0,$tmp
318          stvx           $stage,0,$out
319          addi           $out,$out,16
320
321         vspltw          $tmp,$in0,3
322         vxor            $tmp,$tmp,$in1
323         vsldoi          $in1,$zero,$in1,12      # >>32
324          vadduwm        $rcon,$rcon,$rcon
325         vxor            $in1,$in1,$tmp
326         vxor            $in0,$in0,$key
327         vxor            $in1,$in1,$key
328          vperm          $outtail,$in0,$in0,$outperm     # rotate
329          vsel           $stage,$outhead,$outtail,$outmask
330          vmr            $outhead,$outtail
331          stvx           $stage,0,$out
332          addi           $inp,$out,15            # 15 is not typo
333          addi           $out,$out,16
334         bdnz            Loop192
335
336         li              $rounds,12
337         addi            $out,$out,0x20
338         b               Ldone
339
340 .align  4
341 L256:
342         lvx             $tmp,0,$inp
343         li              $cnt,7
344         li              $rounds,14
345          vperm          $outtail,$in0,$in0,$outperm     # rotate
346          vsel           $stage,$outhead,$outtail,$outmask
347          vmr            $outhead,$outtail
348          stvx           $stage,0,$out
349          addi           $out,$out,16
350         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
351         mtctr           $cnt
352
353 Loop256:
354         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
355         vsldoi          $tmp,$zero,$in0,12      # >>32
356          vperm          $outtail,$in1,$in1,$outperm     # rotate
357          vsel           $stage,$outhead,$outtail,$outmask
358          vmr            $outhead,$outtail
359         vcipherlast     $key,$key,$rcon
360          stvx           $stage,0,$out
361          addi           $out,$out,16
362
363         vxor            $in0,$in0,$tmp
364         vsldoi          $tmp,$zero,$tmp,12      # >>32
365         vxor            $in0,$in0,$tmp
366         vsldoi          $tmp,$zero,$tmp,12      # >>32
367         vxor            $in0,$in0,$tmp
368          vadduwm        $rcon,$rcon,$rcon
369         vxor            $in0,$in0,$key
370          vperm          $outtail,$in0,$in0,$outperm     # rotate
371          vsel           $stage,$outhead,$outtail,$outmask
372          vmr            $outhead,$outtail
373          stvx           $stage,0,$out
374          addi           $inp,$out,15            # 15 is not typo
375          addi           $out,$out,16
376         bdz             Ldone
377
378         vspltw          $key,$in0,3             # just splat
379         vsldoi          $tmp,$zero,$in1,12      # >>32
380         vsbox           $key,$key
381
382         vxor            $in1,$in1,$tmp
383         vsldoi          $tmp,$zero,$tmp,12      # >>32
384         vxor            $in1,$in1,$tmp
385         vsldoi          $tmp,$zero,$tmp,12      # >>32
386         vxor            $in1,$in1,$tmp
387
388         vxor            $in1,$in1,$key
389         b               Loop256
390
391 .align  4
392 Ldone:
393         lvx             $in1,0,$inp             # redundant in aligned case
394         vsel            $in1,$outhead,$in1,$outmask
395         stvx            $in1,0,$inp
396         li              $ptr,0
397         mtspr           256,$vrsave
398         stw             $rounds,0($out)
399
400 Lenc_key_abort:
401         mr              r3,$ptr
402         blr
403         .long           0
404         .byte           0,12,0x14,1,0,0,3,0
405         .long           0
406 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
407
408 .globl  .${prefix}_set_decrypt_key
409         $STU            $sp,-$FRAME($sp)
410         mflr            r10
411         $PUSH           r10,$FRAME+$LRSAVE($sp)
412         bl              Lset_encrypt_key
413         mtlr            r10
414
415         cmpwi           r3,0
416         bne-            Ldec_key_abort
417
418         slwi            $cnt,$rounds,4
419         subi            $inp,$out,240           # first round key
420         srwi            $rounds,$rounds,1
421         add             $out,$inp,$cnt          # last round key
422         mtctr           $rounds
423
424 Ldeckey:
425         lwz             r0, 0($inp)
426         lwz             r6, 4($inp)
427         lwz             r7, 8($inp)
428         lwz             r8, 12($inp)
429         addi            $inp,$inp,16
430         lwz             r9, 0($out)
431         lwz             r10,4($out)
432         lwz             r11,8($out)
433         lwz             r12,12($out)
434         stw             r0, 0($out)
435         stw             r6, 4($out)
436         stw             r7, 8($out)
437         stw             r8, 12($out)
438         subi            $out,$out,16
439         stw             r9, -16($inp)
440         stw             r10,-12($inp)
441         stw             r11,-8($inp)
442         stw             r12,-4($inp)
443         bdnz            Ldeckey
444
445         xor             r3,r3,r3                # return value
446 Ldec_key_abort:
447         addi            $sp,$sp,$FRAME
448         blr
449         .long           0
450         .byte           0,12,4,1,0x80,0,3,0
451         .long           0
452 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
453 ___
454 }}}
455 #########################################################################
456 {{{     # Single block en- and decrypt procedures                       #
457 sub gen_block () {
458 my $dir = shift;
459 my $n   = $dir eq "de" ? "n" : "";
460 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
461
462 $code.=<<___;
463 .globl  .${prefix}_${dir}crypt
464         lwz             $rounds,240($key)
465         lis             r0,0xfc00
466         mfspr           $vrsave,256
467         li              $idx,15                 # 15 is not typo
468         mtspr           256,r0
469
470         lvx             v0,0,$inp
471         neg             r11,$out
472         lvx             v1,$idx,$inp
473         lvsl            v2,0,$inp               # inpperm
474         le?vspltisb     v4,0x0f
475         ?lvsl           v3,0,r11                # outperm
476         le?vxor         v2,v2,v4
477         li              $idx,16
478         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
479         lvx             v1,0,$key
480         ?lvsl           v5,0,$key               # keyperm
481         srwi            $rounds,$rounds,1
482         lvx             v2,$idx,$key
483         addi            $idx,$idx,16
484         subi            $rounds,$rounds,1
485         ?vperm          v1,v1,v2,v5             # align round key
486
487         vxor            v0,v0,v1
488         lvx             v1,$idx,$key
489         addi            $idx,$idx,16
490         mtctr           $rounds
491
492 Loop_${dir}c:
493         ?vperm          v2,v2,v1,v5
494         v${n}cipher     v0,v0,v2
495         lvx             v2,$idx,$key
496         addi            $idx,$idx,16
497         ?vperm          v1,v1,v2,v5
498         v${n}cipher     v0,v0,v1
499         lvx             v1,$idx,$key
500         addi            $idx,$idx,16
501         bdnz            Loop_${dir}c
502
503         ?vperm          v2,v2,v1,v5
504         v${n}cipher     v0,v0,v2
505         lvx             v2,$idx,$key
506         ?vperm          v1,v1,v2,v5
507         v${n}cipherlast v0,v0,v1
508
509         vspltisb        v2,-1
510         vxor            v1,v1,v1
511         li              $idx,15                 # 15 is not typo
512         ?vperm          v2,v1,v2,v3             # outmask
513         le?vxor         v3,v3,v4
514         lvx             v1,0,$out               # outhead
515         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
516         vsel            v1,v1,v0,v2
517         lvx             v4,$idx,$out
518         stvx            v1,0,$out
519         vsel            v0,v0,v4,v2
520         stvx            v0,$idx,$out
521
522         mtspr           256,$vrsave
523         blr
524         .long           0
525         .byte           0,12,0x14,0,0,0,3,0
526         .long           0
527 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
528 ___
529 }
530 &gen_block("en");
531 &gen_block("de");
532 }}}
533 #########################################################################
534 {{{     # CBC en- and decrypt procedures                                #
535 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
536 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
537 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
538                                                 map("v$_",(4..10));
539 $code.=<<___;
540 .globl  .${prefix}_cbc_encrypt
541         ${UCMP}i        $len,16
542         bltlr-
543
544         cmpwi           $enc,0                  # test direction
545         lis             r0,0xffe0
546         mfspr           $vrsave,256
547         mtspr           256,r0
548
549         li              $idx,15
550         vxor            $rndkey0,$rndkey0,$rndkey0
551         le?vspltisb     $tmp,0x0f
552
553         lvx             $ivec,0,$ivp            # load [unaligned] iv
554         lvsl            $inpperm,0,$ivp
555         lvx             $inptail,$idx,$ivp
556         le?vxor         $inpperm,$inpperm,$tmp
557         vperm           $ivec,$ivec,$inptail,$inpperm
558
559         neg             r11,$inp
560         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
561         lwz             $rounds,240($key)
562
563         lvsr            $inpperm,0,r11          # prepare for unaligned load
564         lvx             $inptail,0,$inp
565         addi            $inp,$inp,15            # 15 is not typo
566         le?vxor         $inpperm,$inpperm,$tmp
567
568         ?lvsr           $outperm,0,$out         # prepare for unaligned store
569         vspltisb        $outmask,-1
570         lvx             $outhead,0,$out
571         ?vperm          $outmask,$rndkey0,$outmask,$outperm
572         le?vxor         $outperm,$outperm,$tmp
573
574         srwi            $rounds,$rounds,1
575         li              $idx,16
576         subi            $rounds,$rounds,1
577         beq             Lcbc_dec
578
579 Lcbc_enc:
580         vmr             $inout,$inptail
581         lvx             $inptail,0,$inp
582         addi            $inp,$inp,16
583         mtctr           $rounds
584         subi            $len,$len,16            # len-=16
585
586         lvx             $rndkey0,0,$key
587          vperm          $inout,$inout,$inptail,$inpperm
588         lvx             $rndkey1,$idx,$key
589         addi            $idx,$idx,16
590         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
591         vxor            $inout,$inout,$rndkey0
592         lvx             $rndkey0,$idx,$key
593         addi            $idx,$idx,16
594         vxor            $inout,$inout,$ivec
595
596 Loop_cbc_enc:
597         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
598         vcipher         $inout,$inout,$rndkey1
599         lvx             $rndkey1,$idx,$key
600         addi            $idx,$idx,16
601         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
602         vcipher         $inout,$inout,$rndkey0
603         lvx             $rndkey0,$idx,$key
604         addi            $idx,$idx,16
605         bdnz            Loop_cbc_enc
606
607         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
608         vcipher         $inout,$inout,$rndkey1
609         lvx             $rndkey1,$idx,$key
610         li              $idx,16
611         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
612         vcipherlast     $ivec,$inout,$rndkey0
613         ${UCMP}i        $len,16
614
615         vperm           $tmp,$ivec,$ivec,$outperm
616         vsel            $inout,$outhead,$tmp,$outmask
617         vmr             $outhead,$tmp
618         stvx            $inout,0,$out
619         addi            $out,$out,16
620         bge             Lcbc_enc
621
622         b               Lcbc_done
623
624 .align  4
625 Lcbc_dec:
626         ${UCMP}i        $len,128
627         bge             _aesp8_cbc_decrypt8x
628         vmr             $tmp,$inptail
629         lvx             $inptail,0,$inp
630         addi            $inp,$inp,16
631         mtctr           $rounds
632         subi            $len,$len,16            # len-=16
633
634         lvx             $rndkey0,0,$key
635          vperm          $tmp,$tmp,$inptail,$inpperm
636         lvx             $rndkey1,$idx,$key
637         addi            $idx,$idx,16
638         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
639         vxor            $inout,$tmp,$rndkey0
640         lvx             $rndkey0,$idx,$key
641         addi            $idx,$idx,16
642
643 Loop_cbc_dec:
644         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
645         vncipher        $inout,$inout,$rndkey1
646         lvx             $rndkey1,$idx,$key
647         addi            $idx,$idx,16
648         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
649         vncipher        $inout,$inout,$rndkey0
650         lvx             $rndkey0,$idx,$key
651         addi            $idx,$idx,16
652         bdnz            Loop_cbc_dec
653
654         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
655         vncipher        $inout,$inout,$rndkey1
656         lvx             $rndkey1,$idx,$key
657         li              $idx,16
658         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
659         vncipherlast    $inout,$inout,$rndkey0
660         ${UCMP}i        $len,16
661
662         vxor            $inout,$inout,$ivec
663         vmr             $ivec,$tmp
664         vperm           $tmp,$inout,$inout,$outperm
665         vsel            $inout,$outhead,$tmp,$outmask
666         vmr             $outhead,$tmp
667         stvx            $inout,0,$out
668         addi            $out,$out,16
669         bge             Lcbc_dec
670
671 Lcbc_done:
672         addi            $out,$out,-1
673         lvx             $inout,0,$out           # redundant in aligned case
674         vsel            $inout,$outhead,$inout,$outmask
675         stvx            $inout,0,$out
676
677         neg             $enc,$ivp               # write [unaligned] iv
678         li              $idx,15                 # 15 is not typo
679         vxor            $rndkey0,$rndkey0,$rndkey0
680         vspltisb        $outmask,-1
681         le?vspltisb     $tmp,0x0f
682         ?lvsl           $outperm,0,$enc
683         ?vperm          $outmask,$rndkey0,$outmask,$outperm
684         le?vxor         $outperm,$outperm,$tmp
685         lvx             $outhead,0,$ivp
686         vperm           $ivec,$ivec,$ivec,$outperm
687         vsel            $inout,$outhead,$ivec,$outmask
688         lvx             $inptail,$idx,$ivp
689         stvx            $inout,0,$ivp
690         vsel            $inout,$ivec,$inptail,$outmask
691         stvx            $inout,$idx,$ivp
692
693         mtspr           256,$vrsave
694         blr
695         .long           0
696         .byte           0,12,0x14,0,0,0,6,0
697         .long           0
698 ___
699 #########################################################################
700 {{      # Optimized CBC decrypt procedure                               #
701 my $key_="r11";
702 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
703 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
704 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
705 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
706                         # v26-v31 last 6 round keys
707 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
708
709 $code.=<<___;
710 .align  5
711 _aesp8_cbc_decrypt8x:
712         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
713         li              r10,`$FRAME+8*16+15`
714         li              r11,`$FRAME+8*16+31`
715         stvx            v20,r10,$sp             # ABI says so
716         addi            r10,r10,32
717         stvx            v21,r11,$sp
718         addi            r11,r11,32
719         stvx            v22,r10,$sp
720         addi            r10,r10,32
721         stvx            v23,r11,$sp
722         addi            r11,r11,32
723         stvx            v24,r10,$sp
724         addi            r10,r10,32
725         stvx            v25,r11,$sp
726         addi            r11,r11,32
727         stvx            v26,r10,$sp
728         addi            r10,r10,32
729         stvx            v27,r11,$sp
730         addi            r11,r11,32
731         stvx            v28,r10,$sp
732         addi            r10,r10,32
733         stvx            v29,r11,$sp
734         addi            r11,r11,32
735         stvx            v30,r10,$sp
736         stvx            v31,r11,$sp
737         li              r0,-1
738         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
739         li              $x10,0x10
740         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
741         li              $x20,0x20
742         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
743         li              $x30,0x30
744         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
745         li              $x40,0x40
746         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
747         li              $x50,0x50
748         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
749         li              $x60,0x60
750         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
751         li              $x70,0x70
752         mtspr           256,r0
753
754         subi            $rounds,$rounds,3       # -4 in total
755         subi            $len,$len,128           # bias
756
757         lvx             $rndkey0,$x00,$key      # load key schedule
758         lvx             v30,$x10,$key
759         addi            $key,$key,0x20
760         lvx             v31,$x00,$key
761         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
762         addi            $key_,$sp,$FRAME+15
763         mtctr           $rounds
764
765 Load_cbc_dec_key:
766         ?vperm          v24,v30,v31,$keyperm
767         lvx             v30,$x10,$key
768         addi            $key,$key,0x20
769         stvx            v24,$x00,$key_          # off-load round[1]
770         ?vperm          v25,v31,v30,$keyperm
771         lvx             v31,$x00,$key
772         stvx            v25,$x10,$key_          # off-load round[2]
773         addi            $key_,$key_,0x20
774         bdnz            Load_cbc_dec_key
775
776         lvx             v26,$x10,$key
777         ?vperm          v24,v30,v31,$keyperm
778         lvx             v27,$x20,$key
779         stvx            v24,$x00,$key_          # off-load round[3]
780         ?vperm          v25,v31,v26,$keyperm
781         lvx             v28,$x30,$key
782         stvx            v25,$x10,$key_          # off-load round[4]
783         addi            $key_,$sp,$FRAME+15     # rewind $key_
784         ?vperm          v26,v26,v27,$keyperm
785         lvx             v29,$x40,$key
786         ?vperm          v27,v27,v28,$keyperm
787         lvx             v30,$x50,$key
788         ?vperm          v28,v28,v29,$keyperm
789         lvx             v31,$x60,$key
790         ?vperm          v29,v29,v30,$keyperm
791         lvx             $out0,$x70,$key         # borrow $out0
792         ?vperm          v30,v30,v31,$keyperm
793         lvx             v24,$x00,$key_          # pre-load round[1]
794         ?vperm          v31,v31,$out0,$keyperm
795         lvx             v25,$x10,$key_          # pre-load round[2]
796
797         #lvx            $inptail,0,$inp         # "caller" already did this
798         #addi           $inp,$inp,15            # 15 is not typo
799         subi            $inp,$inp,15            # undo "caller"
800
801          le?li          $idx,8
802         lvx_u           $in0,$x00,$inp          # load first 8 "words"
803          le?lvsl        $inpperm,0,$idx
804          le?vspltisb    $tmp,0x0f
805         lvx_u           $in1,$x10,$inp
806          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
807         lvx_u           $in2,$x20,$inp
808          le?vperm       $in0,$in0,$in0,$inpperm
809         lvx_u           $in3,$x30,$inp
810          le?vperm       $in1,$in1,$in1,$inpperm
811         lvx_u           $in4,$x40,$inp
812          le?vperm       $in2,$in2,$in2,$inpperm
813         vxor            $out0,$in0,$rndkey0
814         lvx_u           $in5,$x50,$inp
815          le?vperm       $in3,$in3,$in3,$inpperm
816         vxor            $out1,$in1,$rndkey0
817         lvx_u           $in6,$x60,$inp
818          le?vperm       $in4,$in4,$in4,$inpperm
819         vxor            $out2,$in2,$rndkey0
820         lvx_u           $in7,$x70,$inp
821         addi            $inp,$inp,0x80
822          le?vperm       $in5,$in5,$in5,$inpperm
823         vxor            $out3,$in3,$rndkey0
824          le?vperm       $in6,$in6,$in6,$inpperm
825         vxor            $out4,$in4,$rndkey0
826          le?vperm       $in7,$in7,$in7,$inpperm
827         vxor            $out5,$in5,$rndkey0
828         vxor            $out6,$in6,$rndkey0
829         vxor            $out7,$in7,$rndkey0
830
831         mtctr           $rounds
832         b               Loop_cbc_dec8x
833 .align  5
834 Loop_cbc_dec8x:
835         vncipher        $out0,$out0,v24
836         vncipher        $out1,$out1,v24
837         vncipher        $out2,$out2,v24
838         vncipher        $out3,$out3,v24
839         vncipher        $out4,$out4,v24
840         vncipher        $out5,$out5,v24
841         vncipher        $out6,$out6,v24
842         vncipher        $out7,$out7,v24
843         lvx             v24,$x20,$key_          # round[3]
844         addi            $key_,$key_,0x20
845
846         vncipher        $out0,$out0,v25
847         vncipher        $out1,$out1,v25
848         vncipher        $out2,$out2,v25
849         vncipher        $out3,$out3,v25
850         vncipher        $out4,$out4,v25
851         vncipher        $out5,$out5,v25
852         vncipher        $out6,$out6,v25
853         vncipher        $out7,$out7,v25
854         lvx             v25,$x10,$key_          # round[4]
855         bdnz            Loop_cbc_dec8x
856
857         subic           $len,$len,128           # $len-=128
858         vncipher        $out0,$out0,v24
859         vncipher        $out1,$out1,v24
860         vncipher        $out2,$out2,v24
861         vncipher        $out3,$out3,v24
862         vncipher        $out4,$out4,v24
863         vncipher        $out5,$out5,v24
864         vncipher        $out6,$out6,v24
865         vncipher        $out7,$out7,v24
866
867         subfe.          r0,r0,r0                # borrow?-1:0
868         vncipher        $out0,$out0,v25
869         vncipher        $out1,$out1,v25
870         vncipher        $out2,$out2,v25
871         vncipher        $out3,$out3,v25
872         vncipher        $out4,$out4,v25
873         vncipher        $out5,$out5,v25
874         vncipher        $out6,$out6,v25
875         vncipher        $out7,$out7,v25
876
877         and             r0,r0,$len
878         vncipher        $out0,$out0,v26
879         vncipher        $out1,$out1,v26
880         vncipher        $out2,$out2,v26
881         vncipher        $out3,$out3,v26
882         vncipher        $out4,$out4,v26
883         vncipher        $out5,$out5,v26
884         vncipher        $out6,$out6,v26
885         vncipher        $out7,$out7,v26
886
887         add             $inp,$inp,r0            # $inp is adjusted in such
888                                                 # way that at exit from the
889                                                 # loop inX-in7 are loaded
890                                                 # with last "words"
891         vncipher        $out0,$out0,v27
892         vncipher        $out1,$out1,v27
893         vncipher        $out2,$out2,v27
894         vncipher        $out3,$out3,v27
895         vncipher        $out4,$out4,v27
896         vncipher        $out5,$out5,v27
897         vncipher        $out6,$out6,v27
898         vncipher        $out7,$out7,v27
899
900         addi            $key_,$sp,$FRAME+15     # rewind $key_
901         vncipher        $out0,$out0,v28
902         vncipher        $out1,$out1,v28
903         vncipher        $out2,$out2,v28
904         vncipher        $out3,$out3,v28
905         vncipher        $out4,$out4,v28
906         vncipher        $out5,$out5,v28
907         vncipher        $out6,$out6,v28
908         vncipher        $out7,$out7,v28
909         lvx             v24,$x00,$key_          # re-pre-load round[1]
910
911         vncipher        $out0,$out0,v29
912         vncipher        $out1,$out1,v29
913         vncipher        $out2,$out2,v29
914         vncipher        $out3,$out3,v29
915         vncipher        $out4,$out4,v29
916         vncipher        $out5,$out5,v29
917         vncipher        $out6,$out6,v29
918         vncipher        $out7,$out7,v29
919         lvx             v25,$x10,$key_          # re-pre-load round[2]
920
921         vncipher        $out0,$out0,v30
922          vxor           $ivec,$ivec,v31         # xor with last round key
923         vncipher        $out1,$out1,v30
924          vxor           $in0,$in0,v31
925         vncipher        $out2,$out2,v30
926          vxor           $in1,$in1,v31
927         vncipher        $out3,$out3,v30
928          vxor           $in2,$in2,v31
929         vncipher        $out4,$out4,v30
930          vxor           $in3,$in3,v31
931         vncipher        $out5,$out5,v30
932          vxor           $in4,$in4,v31
933         vncipher        $out6,$out6,v30
934          vxor           $in5,$in5,v31
935         vncipher        $out7,$out7,v30
936          vxor           $in6,$in6,v31
937
938         vncipherlast    $out0,$out0,$ivec
939         vncipherlast    $out1,$out1,$in0
940          lvx_u          $in0,$x00,$inp          # load next input block
941         vncipherlast    $out2,$out2,$in1
942          lvx_u          $in1,$x10,$inp
943         vncipherlast    $out3,$out3,$in2
944          le?vperm       $in0,$in0,$in0,$inpperm
945          lvx_u          $in2,$x20,$inp
946         vncipherlast    $out4,$out4,$in3
947          le?vperm       $in1,$in1,$in1,$inpperm
948          lvx_u          $in3,$x30,$inp
949         vncipherlast    $out5,$out5,$in4
950          le?vperm       $in2,$in2,$in2,$inpperm
951          lvx_u          $in4,$x40,$inp
952         vncipherlast    $out6,$out6,$in5
953          le?vperm       $in3,$in3,$in3,$inpperm
954          lvx_u          $in5,$x50,$inp
955         vncipherlast    $out7,$out7,$in6
956          le?vperm       $in4,$in4,$in4,$inpperm
957          lvx_u          $in6,$x60,$inp
958         vmr             $ivec,$in7
959          le?vperm       $in5,$in5,$in5,$inpperm
960          lvx_u          $in7,$x70,$inp
961          addi           $inp,$inp,0x80
962
963         le?vperm        $out0,$out0,$out0,$inpperm
964         le?vperm        $out1,$out1,$out1,$inpperm
965         stvx_u          $out0,$x00,$out
966          le?vperm       $in6,$in6,$in6,$inpperm
967          vxor           $out0,$in0,$rndkey0
968         le?vperm        $out2,$out2,$out2,$inpperm
969         stvx_u          $out1,$x10,$out
970          le?vperm       $in7,$in7,$in7,$inpperm
971          vxor           $out1,$in1,$rndkey0
972         le?vperm        $out3,$out3,$out3,$inpperm
973         stvx_u          $out2,$x20,$out
974          vxor           $out2,$in2,$rndkey0
975         le?vperm        $out4,$out4,$out4,$inpperm
976         stvx_u          $out3,$x30,$out
977          vxor           $out3,$in3,$rndkey0
978         le?vperm        $out5,$out5,$out5,$inpperm
979         stvx_u          $out4,$x40,$out
980          vxor           $out4,$in4,$rndkey0
981         le?vperm        $out6,$out6,$out6,$inpperm
982         stvx_u          $out5,$x50,$out
983          vxor           $out5,$in5,$rndkey0
984         le?vperm        $out7,$out7,$out7,$inpperm
985         stvx_u          $out6,$x60,$out
986          vxor           $out6,$in6,$rndkey0
987         stvx_u          $out7,$x70,$out
988         addi            $out,$out,0x80
989          vxor           $out7,$in7,$rndkey0
990
991         mtctr           $rounds
992         beq             Loop_cbc_dec8x          # did $len-=128 borrow?
993
994         addic.          $len,$len,128
995         beq             Lcbc_dec8x_done
996         nop
997         nop
998
999 Loop_cbc_dec8x_tail:                            # up to 7 "words" tail...
1000         vncipher        $out1,$out1,v24
1001         vncipher        $out2,$out2,v24
1002         vncipher        $out3,$out3,v24
1003         vncipher        $out4,$out4,v24
1004         vncipher        $out5,$out5,v24
1005         vncipher        $out6,$out6,v24
1006         vncipher        $out7,$out7,v24
1007         lvx             v24,$x20,$key_          # round[3]
1008         addi            $key_,$key_,0x20
1009
1010         vncipher        $out1,$out1,v25
1011         vncipher        $out2,$out2,v25
1012         vncipher        $out3,$out3,v25
1013         vncipher        $out4,$out4,v25
1014         vncipher        $out5,$out5,v25
1015         vncipher        $out6,$out6,v25
1016         vncipher        $out7,$out7,v25
1017         lvx             v25,$x10,$key_          # round[4]
1018         bdnz            Loop_cbc_dec8x_tail
1019
1020         vncipher        $out1,$out1,v24
1021         vncipher        $out2,$out2,v24
1022         vncipher        $out3,$out3,v24
1023         vncipher        $out4,$out4,v24
1024         vncipher        $out5,$out5,v24
1025         vncipher        $out6,$out6,v24
1026         vncipher        $out7,$out7,v24
1027
1028         vncipher        $out1,$out1,v25
1029         vncipher        $out2,$out2,v25
1030         vncipher        $out3,$out3,v25
1031         vncipher        $out4,$out4,v25
1032         vncipher        $out5,$out5,v25
1033         vncipher        $out6,$out6,v25
1034         vncipher        $out7,$out7,v25
1035
1036         vncipher        $out1,$out1,v26
1037         vncipher        $out2,$out2,v26
1038         vncipher        $out3,$out3,v26
1039         vncipher        $out4,$out4,v26
1040         vncipher        $out5,$out5,v26
1041         vncipher        $out6,$out6,v26
1042         vncipher        $out7,$out7,v26
1043
1044         vncipher        $out1,$out1,v27
1045         vncipher        $out2,$out2,v27
1046         vncipher        $out3,$out3,v27
1047         vncipher        $out4,$out4,v27
1048         vncipher        $out5,$out5,v27
1049         vncipher        $out6,$out6,v27
1050         vncipher        $out7,$out7,v27
1051
1052         vncipher        $out1,$out1,v28
1053         vncipher        $out2,$out2,v28
1054         vncipher        $out3,$out3,v28
1055         vncipher        $out4,$out4,v28
1056         vncipher        $out5,$out5,v28
1057         vncipher        $out6,$out6,v28
1058         vncipher        $out7,$out7,v28
1059
1060         vncipher        $out1,$out1,v29
1061         vncipher        $out2,$out2,v29
1062         vncipher        $out3,$out3,v29
1063         vncipher        $out4,$out4,v29
1064         vncipher        $out5,$out5,v29
1065         vncipher        $out6,$out6,v29
1066         vncipher        $out7,$out7,v29
1067
1068         vncipher        $out1,$out1,v30
1069          vxor           $ivec,$ivec,v31         # last round key
1070         vncipher        $out2,$out2,v30
1071          vxor           $in1,$in1,v31
1072         vncipher        $out3,$out3,v30
1073          vxor           $in2,$in2,v31
1074         vncipher        $out4,$out4,v30
1075          vxor           $in3,$in3,v31
1076         vncipher        $out5,$out5,v30
1077          vxor           $in4,$in4,v31
1078         vncipher        $out6,$out6,v30
1079          vxor           $in5,$in5,v31
1080         vncipher        $out7,$out7,v30
1081          vxor           $in6,$in6,v31
1082
1083         cmplwi          $len,32                 # switch($len)
1084         blt             Lcbc_dec8x_one
1085         nop
1086         beq             Lcbc_dec8x_two
1087         cmplwi          $len,64
1088         blt             Lcbc_dec8x_three
1089         nop
1090         beq             Lcbc_dec8x_four
1091         cmplwi          $len,96
1092         blt             Lcbc_dec8x_five
1093         nop
1094         beq             Lcbc_dec8x_six
1095
1096 Lcbc_dec8x_seven:
1097         vncipherlast    $out1,$out1,$ivec
1098         vncipherlast    $out2,$out2,$in1
1099         vncipherlast    $out3,$out3,$in2
1100         vncipherlast    $out4,$out4,$in3
1101         vncipherlast    $out5,$out5,$in4
1102         vncipherlast    $out6,$out6,$in5
1103         vncipherlast    $out7,$out7,$in6
1104         vmr             $ivec,$in7
1105
1106         le?vperm        $out1,$out1,$out1,$inpperm
1107         le?vperm        $out2,$out2,$out2,$inpperm
1108         stvx_u          $out1,$x00,$out
1109         le?vperm        $out3,$out3,$out3,$inpperm
1110         stvx_u          $out2,$x10,$out
1111         le?vperm        $out4,$out4,$out4,$inpperm
1112         stvx_u          $out3,$x20,$out
1113         le?vperm        $out5,$out5,$out5,$inpperm
1114         stvx_u          $out4,$x30,$out
1115         le?vperm        $out6,$out6,$out6,$inpperm
1116         stvx_u          $out5,$x40,$out
1117         le?vperm        $out7,$out7,$out7,$inpperm
1118         stvx_u          $out6,$x50,$out
1119         stvx_u          $out7,$x60,$out
1120         addi            $out,$out,0x70
1121         b               Lcbc_dec8x_done
1122
1123 .align  5
1124 Lcbc_dec8x_six:
1125         vncipherlast    $out2,$out2,$ivec
1126         vncipherlast    $out3,$out3,$in2
1127         vncipherlast    $out4,$out4,$in3
1128         vncipherlast    $out5,$out5,$in4
1129         vncipherlast    $out6,$out6,$in5
1130         vncipherlast    $out7,$out7,$in6
1131         vmr             $ivec,$in7
1132
1133         le?vperm        $out2,$out2,$out2,$inpperm
1134         le?vperm        $out3,$out3,$out3,$inpperm
1135         stvx_u          $out2,$x00,$out
1136         le?vperm        $out4,$out4,$out4,$inpperm
1137         stvx_u          $out3,$x10,$out
1138         le?vperm        $out5,$out5,$out5,$inpperm
1139         stvx_u          $out4,$x20,$out
1140         le?vperm        $out6,$out6,$out6,$inpperm
1141         stvx_u          $out5,$x30,$out
1142         le?vperm        $out7,$out7,$out7,$inpperm
1143         stvx_u          $out6,$x40,$out
1144         stvx_u          $out7,$x50,$out
1145         addi            $out,$out,0x60
1146         b               Lcbc_dec8x_done
1147
1148 .align  5
1149 Lcbc_dec8x_five:
1150         vncipherlast    $out3,$out3,$ivec
1151         vncipherlast    $out4,$out4,$in3
1152         vncipherlast    $out5,$out5,$in4
1153         vncipherlast    $out6,$out6,$in5
1154         vncipherlast    $out7,$out7,$in6
1155         vmr             $ivec,$in7
1156
1157         le?vperm        $out3,$out3,$out3,$inpperm
1158         le?vperm        $out4,$out4,$out4,$inpperm
1159         stvx_u          $out3,$x00,$out
1160         le?vperm        $out5,$out5,$out5,$inpperm
1161         stvx_u          $out4,$x10,$out
1162         le?vperm        $out6,$out6,$out6,$inpperm
1163         stvx_u          $out5,$x20,$out
1164         le?vperm        $out7,$out7,$out7,$inpperm
1165         stvx_u          $out6,$x30,$out
1166         stvx_u          $out7,$x40,$out
1167         addi            $out,$out,0x50
1168         b               Lcbc_dec8x_done
1169
1170 .align  5
1171 Lcbc_dec8x_four:
1172         vncipherlast    $out4,$out4,$ivec
1173         vncipherlast    $out5,$out5,$in4
1174         vncipherlast    $out6,$out6,$in5
1175         vncipherlast    $out7,$out7,$in6
1176         vmr             $ivec,$in7
1177
1178         le?vperm        $out4,$out4,$out4,$inpperm
1179         le?vperm        $out5,$out5,$out5,$inpperm
1180         stvx_u          $out4,$x00,$out
1181         le?vperm        $out6,$out6,$out6,$inpperm
1182         stvx_u          $out5,$x10,$out
1183         le?vperm        $out7,$out7,$out7,$inpperm
1184         stvx_u          $out6,$x20,$out
1185         stvx_u          $out7,$x30,$out
1186         addi            $out,$out,0x40
1187         b               Lcbc_dec8x_done
1188
1189 .align  5
1190 Lcbc_dec8x_three:
1191         vncipherlast    $out5,$out5,$ivec
1192         vncipherlast    $out6,$out6,$in5
1193         vncipherlast    $out7,$out7,$in6
1194         vmr             $ivec,$in7
1195
1196         le?vperm        $out5,$out5,$out5,$inpperm
1197         le?vperm        $out6,$out6,$out6,$inpperm
1198         stvx_u          $out5,$x00,$out
1199         le?vperm        $out7,$out7,$out7,$inpperm
1200         stvx_u          $out6,$x10,$out
1201         stvx_u          $out7,$x20,$out
1202         addi            $out,$out,0x30
1203         b               Lcbc_dec8x_done
1204
1205 .align  5
1206 Lcbc_dec8x_two:
1207         vncipherlast    $out6,$out6,$ivec
1208         vncipherlast    $out7,$out7,$in6
1209         vmr             $ivec,$in7
1210
1211         le?vperm        $out6,$out6,$out6,$inpperm
1212         le?vperm        $out7,$out7,$out7,$inpperm
1213         stvx_u          $out6,$x00,$out
1214         stvx_u          $out7,$x10,$out
1215         addi            $out,$out,0x20
1216         b               Lcbc_dec8x_done
1217
1218 .align  5
1219 Lcbc_dec8x_one:
1220         vncipherlast    $out7,$out7,$ivec
1221         vmr             $ivec,$in7
1222
1223         le?vperm        $out7,$out7,$out7,$inpperm
1224         stvx_u          $out7,0,$out
1225         addi            $out,$out,0x10
1226
1227 Lcbc_dec8x_done:
1228         le?vperm        $ivec,$ivec,$ivec,$inpperm
1229         stvx_u          $ivec,0,$ivp            # write [unaligned] iv
1230
1231         li              r10,`$FRAME+15`
1232         li              r11,`$FRAME+31`
1233         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1234         addi            r10,r10,32
1235         stvx            $inpperm,r11,$sp
1236         addi            r11,r11,32
1237         stvx            $inpperm,r10,$sp
1238         addi            r10,r10,32
1239         stvx            $inpperm,r11,$sp
1240         addi            r11,r11,32
1241         stvx            $inpperm,r10,$sp
1242         addi            r10,r10,32
1243         stvx            $inpperm,r11,$sp
1244         addi            r11,r11,32
1245         stvx            $inpperm,r10,$sp
1246         addi            r10,r10,32
1247         stvx            $inpperm,r11,$sp
1248         addi            r11,r11,32
1249
1250         mtspr           256,$vrsave
1251         lvx             v20,r10,$sp             # ABI says so
1252         addi            r10,r10,32
1253         lvx             v21,r11,$sp
1254         addi            r11,r11,32
1255         lvx             v22,r10,$sp
1256         addi            r10,r10,32
1257         lvx             v23,r11,$sp
1258         addi            r11,r11,32
1259         lvx             v24,r10,$sp
1260         addi            r10,r10,32
1261         lvx             v25,r11,$sp
1262         addi            r11,r11,32
1263         lvx             v26,r10,$sp
1264         addi            r10,r10,32
1265         lvx             v27,r11,$sp
1266         addi            r11,r11,32
1267         lvx             v28,r10,$sp
1268         addi            r10,r10,32
1269         lvx             v29,r11,$sp
1270         addi            r11,r11,32
1271         lvx             v30,r10,$sp
1272         lvx             v31,r11,$sp
1273         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1274         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1275         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1276         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1277         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1278         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1279         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1280         blr
1281         .long           0
1282         .byte           0,12,0x14,0,0x80,6,6,0
1283         .long           0
1284 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1285 ___
1286 }}      }}}
1287
1288 #########################################################################
1289 {{{     # CTR procedure[s]                                              #
1290
1291 ####################### WARNING: Here be dragons! #######################
1292 #
1293 # This code is written as 'ctr32', based on a 32-bit counter used
1294 # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1295 # a 128-bit counter.
1296 #
1297 # This leads to subtle changes from the upstream code: the counter
1298 # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1299 # both the bulk (8 blocks at a time) path, and in the individual block
1300 # path. Be aware of this when doing updates.
1301 #
1302 # See:
1303 # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1304 # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1305 # https://github.com/openssl/openssl/pull/8942
1306 #
1307 #########################################################################
1308 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1309 my ($rndkey0,$rndkey1,$inout,$tmp)=             map("v$_",(0..3));
1310 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1311                                                 map("v$_",(4..11));
1312 my $dat=$tmp;
1313
1314 $code.=<<___;
1315 .globl  .${prefix}_ctr32_encrypt_blocks
1316         ${UCMP}i        $len,1
1317         bltlr-
1318
1319         lis             r0,0xfff0
1320         mfspr           $vrsave,256
1321         mtspr           256,r0
1322
1323         li              $idx,15
1324         vxor            $rndkey0,$rndkey0,$rndkey0
1325         le?vspltisb     $tmp,0x0f
1326
1327         lvx             $ivec,0,$ivp            # load [unaligned] iv
1328         lvsl            $inpperm,0,$ivp
1329         lvx             $inptail,$idx,$ivp
1330          vspltisb       $one,1
1331         le?vxor         $inpperm,$inpperm,$tmp
1332         vperm           $ivec,$ivec,$inptail,$inpperm
1333          vsldoi         $one,$rndkey0,$one,1
1334
1335         neg             r11,$inp
1336         ?lvsl           $keyperm,0,$key         # prepare for unaligned key
1337         lwz             $rounds,240($key)
1338
1339         lvsr            $inpperm,0,r11          # prepare for unaligned load
1340         lvx             $inptail,0,$inp
1341         addi            $inp,$inp,15            # 15 is not typo
1342         le?vxor         $inpperm,$inpperm,$tmp
1343
1344         srwi            $rounds,$rounds,1
1345         li              $idx,16
1346         subi            $rounds,$rounds,1
1347
1348         ${UCMP}i        $len,8
1349         bge             _aesp8_ctr32_encrypt8x
1350
1351         ?lvsr           $outperm,0,$out         # prepare for unaligned store
1352         vspltisb        $outmask,-1
1353         lvx             $outhead,0,$out
1354         ?vperm          $outmask,$rndkey0,$outmask,$outperm
1355         le?vxor         $outperm,$outperm,$tmp
1356
1357         lvx             $rndkey0,0,$key
1358         mtctr           $rounds
1359         lvx             $rndkey1,$idx,$key
1360         addi            $idx,$idx,16
1361         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1362         vxor            $inout,$ivec,$rndkey0
1363         lvx             $rndkey0,$idx,$key
1364         addi            $idx,$idx,16
1365         b               Loop_ctr32_enc
1366
1367 .align  5
1368 Loop_ctr32_enc:
1369         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1370         vcipher         $inout,$inout,$rndkey1
1371         lvx             $rndkey1,$idx,$key
1372         addi            $idx,$idx,16
1373         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
1374         vcipher         $inout,$inout,$rndkey0
1375         lvx             $rndkey0,$idx,$key
1376         addi            $idx,$idx,16
1377         bdnz            Loop_ctr32_enc
1378
1379         vadduqm         $ivec,$ivec,$one        # Kernel change for 128-bit
1380          vmr            $dat,$inptail
1381          lvx            $inptail,0,$inp
1382          addi           $inp,$inp,16
1383          subic.         $len,$len,1             # blocks--
1384
1385         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
1386         vcipher         $inout,$inout,$rndkey1
1387         lvx             $rndkey1,$idx,$key
1388          vperm          $dat,$dat,$inptail,$inpperm
1389          li             $idx,16
1390         ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
1391          lvx            $rndkey0,0,$key
1392         vxor            $dat,$dat,$rndkey1      # last round key
1393         vcipherlast     $inout,$inout,$dat
1394
1395          lvx            $rndkey1,$idx,$key
1396          addi           $idx,$idx,16
1397         vperm           $inout,$inout,$inout,$outperm
1398         vsel            $dat,$outhead,$inout,$outmask
1399          mtctr          $rounds
1400          ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
1401         vmr             $outhead,$inout
1402          vxor           $inout,$ivec,$rndkey0
1403          lvx            $rndkey0,$idx,$key
1404          addi           $idx,$idx,16
1405         stvx            $dat,0,$out
1406         addi            $out,$out,16
1407         bne             Loop_ctr32_enc
1408
1409         addi            $out,$out,-1
1410         lvx             $inout,0,$out           # redundant in aligned case
1411         vsel            $inout,$outhead,$inout,$outmask
1412         stvx            $inout,0,$out
1413
1414         mtspr           256,$vrsave
1415         blr
1416         .long           0
1417         .byte           0,12,0x14,0,0,0,6,0
1418         .long           0
1419 ___
1420 #########################################################################
1421 {{      # Optimized CTR procedure                                       #
1422 my $key_="r11";
1423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1424 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1425 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1426 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
1427                         # v26-v31 last 6 round keys
1428 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1429 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1430
1431 $code.=<<___;
1432 .align  5
1433 _aesp8_ctr32_encrypt8x:
1434         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1435         li              r10,`$FRAME+8*16+15`
1436         li              r11,`$FRAME+8*16+31`
1437         stvx            v20,r10,$sp             # ABI says so
1438         addi            r10,r10,32
1439         stvx            v21,r11,$sp
1440         addi            r11,r11,32
1441         stvx            v22,r10,$sp
1442         addi            r10,r10,32
1443         stvx            v23,r11,$sp
1444         addi            r11,r11,32
1445         stvx            v24,r10,$sp
1446         addi            r10,r10,32
1447         stvx            v25,r11,$sp
1448         addi            r11,r11,32
1449         stvx            v26,r10,$sp
1450         addi            r10,r10,32
1451         stvx            v27,r11,$sp
1452         addi            r11,r11,32
1453         stvx            v28,r10,$sp
1454         addi            r10,r10,32
1455         stvx            v29,r11,$sp
1456         addi            r11,r11,32
1457         stvx            v30,r10,$sp
1458         stvx            v31,r11,$sp
1459         li              r0,-1
1460         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1461         li              $x10,0x10
1462         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1463         li              $x20,0x20
1464         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1465         li              $x30,0x30
1466         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1467         li              $x40,0x40
1468         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1469         li              $x50,0x50
1470         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1471         li              $x60,0x60
1472         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1473         li              $x70,0x70
1474         mtspr           256,r0
1475
1476         subi            $rounds,$rounds,3       # -4 in total
1477
1478         lvx             $rndkey0,$x00,$key      # load key schedule
1479         lvx             v30,$x10,$key
1480         addi            $key,$key,0x20
1481         lvx             v31,$x00,$key
1482         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
1483         addi            $key_,$sp,$FRAME+15
1484         mtctr           $rounds
1485
1486 Load_ctr32_enc_key:
1487         ?vperm          v24,v30,v31,$keyperm
1488         lvx             v30,$x10,$key
1489         addi            $key,$key,0x20
1490         stvx            v24,$x00,$key_          # off-load round[1]
1491         ?vperm          v25,v31,v30,$keyperm
1492         lvx             v31,$x00,$key
1493         stvx            v25,$x10,$key_          # off-load round[2]
1494         addi            $key_,$key_,0x20
1495         bdnz            Load_ctr32_enc_key
1496
1497         lvx             v26,$x10,$key
1498         ?vperm          v24,v30,v31,$keyperm
1499         lvx             v27,$x20,$key
1500         stvx            v24,$x00,$key_          # off-load round[3]
1501         ?vperm          v25,v31,v26,$keyperm
1502         lvx             v28,$x30,$key
1503         stvx            v25,$x10,$key_          # off-load round[4]
1504         addi            $key_,$sp,$FRAME+15     # rewind $key_
1505         ?vperm          v26,v26,v27,$keyperm
1506         lvx             v29,$x40,$key
1507         ?vperm          v27,v27,v28,$keyperm
1508         lvx             v30,$x50,$key
1509         ?vperm          v28,v28,v29,$keyperm
1510         lvx             v31,$x60,$key
1511         ?vperm          v29,v29,v30,$keyperm
1512         lvx             $out0,$x70,$key         # borrow $out0
1513         ?vperm          v30,v30,v31,$keyperm
1514         lvx             v24,$x00,$key_          # pre-load round[1]
1515         ?vperm          v31,v31,$out0,$keyperm
1516         lvx             v25,$x10,$key_          # pre-load round[2]
1517
1518         vadduqm         $two,$one,$one
1519         subi            $inp,$inp,15            # undo "caller"
1520         $SHL            $len,$len,4
1521
1522         vadduqm         $out1,$ivec,$one        # counter values ...
1523         vadduqm         $out2,$ivec,$two        # (do all ctr adds as 128-bit)
1524         vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1525          le?li          $idx,8
1526         vadduqm         $out3,$out1,$two
1527         vxor            $out1,$out1,$rndkey0
1528          le?lvsl        $inpperm,0,$idx
1529         vadduqm         $out4,$out2,$two
1530         vxor            $out2,$out2,$rndkey0
1531          le?vspltisb    $tmp,0x0f
1532         vadduqm         $out5,$out3,$two
1533         vxor            $out3,$out3,$rndkey0
1534          le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1535         vadduqm         $out6,$out4,$two
1536         vxor            $out4,$out4,$rndkey0
1537         vadduqm         $out7,$out5,$two
1538         vxor            $out5,$out5,$rndkey0
1539         vadduqm         $ivec,$out6,$two        # next counter value
1540         vxor            $out6,$out6,$rndkey0
1541         vxor            $out7,$out7,$rndkey0
1542
1543         mtctr           $rounds
1544         b               Loop_ctr32_enc8x
1545 .align  5
1546 Loop_ctr32_enc8x:
1547         vcipher         $out0,$out0,v24
1548         vcipher         $out1,$out1,v24
1549         vcipher         $out2,$out2,v24
1550         vcipher         $out3,$out3,v24
1551         vcipher         $out4,$out4,v24
1552         vcipher         $out5,$out5,v24
1553         vcipher         $out6,$out6,v24
1554         vcipher         $out7,$out7,v24
1555 Loop_ctr32_enc8x_middle:
1556         lvx             v24,$x20,$key_          # round[3]
1557         addi            $key_,$key_,0x20
1558
1559         vcipher         $out0,$out0,v25
1560         vcipher         $out1,$out1,v25
1561         vcipher         $out2,$out2,v25
1562         vcipher         $out3,$out3,v25
1563         vcipher         $out4,$out4,v25
1564         vcipher         $out5,$out5,v25
1565         vcipher         $out6,$out6,v25
1566         vcipher         $out7,$out7,v25
1567         lvx             v25,$x10,$key_          # round[4]
1568         bdnz            Loop_ctr32_enc8x
1569
1570         subic           r11,$len,256            # $len-256, borrow $key_
1571         vcipher         $out0,$out0,v24
1572         vcipher         $out1,$out1,v24
1573         vcipher         $out2,$out2,v24
1574         vcipher         $out3,$out3,v24
1575         vcipher         $out4,$out4,v24
1576         vcipher         $out5,$out5,v24
1577         vcipher         $out6,$out6,v24
1578         vcipher         $out7,$out7,v24
1579
1580         subfe           r0,r0,r0                # borrow?-1:0
1581         vcipher         $out0,$out0,v25
1582         vcipher         $out1,$out1,v25
1583         vcipher         $out2,$out2,v25
1584         vcipher         $out3,$out3,v25
1585         vcipher         $out4,$out4,v25
1586         vcipher         $out5,$out5,v25
1587         vcipher         $out6,$out6,v25
1588         vcipher         $out7,$out7,v25
1589
1590         and             r0,r0,r11
1591         addi            $key_,$sp,$FRAME+15     # rewind $key_
1592         vcipher         $out0,$out0,v26
1593         vcipher         $out1,$out1,v26
1594         vcipher         $out2,$out2,v26
1595         vcipher         $out3,$out3,v26
1596         vcipher         $out4,$out4,v26
1597         vcipher         $out5,$out5,v26
1598         vcipher         $out6,$out6,v26
1599         vcipher         $out7,$out7,v26
1600         lvx             v24,$x00,$key_          # re-pre-load round[1]
1601
1602         subic           $len,$len,129           # $len-=129
1603         vcipher         $out0,$out0,v27
1604         addi            $len,$len,1             # $len-=128 really
1605         vcipher         $out1,$out1,v27
1606         vcipher         $out2,$out2,v27
1607         vcipher         $out3,$out3,v27
1608         vcipher         $out4,$out4,v27
1609         vcipher         $out5,$out5,v27
1610         vcipher         $out6,$out6,v27
1611         vcipher         $out7,$out7,v27
1612         lvx             v25,$x10,$key_          # re-pre-load round[2]
1613
1614         vcipher         $out0,$out0,v28
1615          lvx_u          $in0,$x00,$inp          # load input
1616         vcipher         $out1,$out1,v28
1617          lvx_u          $in1,$x10,$inp
1618         vcipher         $out2,$out2,v28
1619          lvx_u          $in2,$x20,$inp
1620         vcipher         $out3,$out3,v28
1621          lvx_u          $in3,$x30,$inp
1622         vcipher         $out4,$out4,v28
1623          lvx_u          $in4,$x40,$inp
1624         vcipher         $out5,$out5,v28
1625          lvx_u          $in5,$x50,$inp
1626         vcipher         $out6,$out6,v28
1627          lvx_u          $in6,$x60,$inp
1628         vcipher         $out7,$out7,v28
1629          lvx_u          $in7,$x70,$inp
1630          addi           $inp,$inp,0x80
1631
1632         vcipher         $out0,$out0,v29
1633          le?vperm       $in0,$in0,$in0,$inpperm
1634         vcipher         $out1,$out1,v29
1635          le?vperm       $in1,$in1,$in1,$inpperm
1636         vcipher         $out2,$out2,v29
1637          le?vperm       $in2,$in2,$in2,$inpperm
1638         vcipher         $out3,$out3,v29
1639          le?vperm       $in3,$in3,$in3,$inpperm
1640         vcipher         $out4,$out4,v29
1641          le?vperm       $in4,$in4,$in4,$inpperm
1642         vcipher         $out5,$out5,v29
1643          le?vperm       $in5,$in5,$in5,$inpperm
1644         vcipher         $out6,$out6,v29
1645          le?vperm       $in6,$in6,$in6,$inpperm
1646         vcipher         $out7,$out7,v29
1647          le?vperm       $in7,$in7,$in7,$inpperm
1648
1649         add             $inp,$inp,r0            # $inp is adjusted in such
1650                                                 # way that at exit from the
1651                                                 # loop inX-in7 are loaded
1652                                                 # with last "words"
1653         subfe.          r0,r0,r0                # borrow?-1:0
1654         vcipher         $out0,$out0,v30
1655          vxor           $in0,$in0,v31           # xor with last round key
1656         vcipher         $out1,$out1,v30
1657          vxor           $in1,$in1,v31
1658         vcipher         $out2,$out2,v30
1659          vxor           $in2,$in2,v31
1660         vcipher         $out3,$out3,v30
1661          vxor           $in3,$in3,v31
1662         vcipher         $out4,$out4,v30
1663          vxor           $in4,$in4,v31
1664         vcipher         $out5,$out5,v30
1665          vxor           $in5,$in5,v31
1666         vcipher         $out6,$out6,v30
1667          vxor           $in6,$in6,v31
1668         vcipher         $out7,$out7,v30
1669          vxor           $in7,$in7,v31
1670
1671         bne             Lctr32_enc8x_break      # did $len-129 borrow?
1672
1673         vcipherlast     $in0,$out0,$in0
1674         vcipherlast     $in1,$out1,$in1
1675          vadduqm        $out1,$ivec,$one        # counter values ...
1676         vcipherlast     $in2,$out2,$in2
1677          vadduqm        $out2,$ivec,$two
1678          vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1679         vcipherlast     $in3,$out3,$in3
1680          vadduqm        $out3,$out1,$two
1681          vxor           $out1,$out1,$rndkey0
1682         vcipherlast     $in4,$out4,$in4
1683          vadduqm        $out4,$out2,$two
1684          vxor           $out2,$out2,$rndkey0
1685         vcipherlast     $in5,$out5,$in5
1686          vadduqm        $out5,$out3,$two
1687          vxor           $out3,$out3,$rndkey0
1688         vcipherlast     $in6,$out6,$in6
1689          vadduqm        $out6,$out4,$two
1690          vxor           $out4,$out4,$rndkey0
1691         vcipherlast     $in7,$out7,$in7
1692          vadduqm        $out7,$out5,$two
1693          vxor           $out5,$out5,$rndkey0
1694         le?vperm        $in0,$in0,$in0,$inpperm
1695          vadduqm        $ivec,$out6,$two        # next counter value
1696          vxor           $out6,$out6,$rndkey0
1697         le?vperm        $in1,$in1,$in1,$inpperm
1698          vxor           $out7,$out7,$rndkey0
1699         mtctr           $rounds
1700
1701          vcipher        $out0,$out0,v24
1702         stvx_u          $in0,$x00,$out
1703         le?vperm        $in2,$in2,$in2,$inpperm
1704          vcipher        $out1,$out1,v24
1705         stvx_u          $in1,$x10,$out
1706         le?vperm        $in3,$in3,$in3,$inpperm
1707          vcipher        $out2,$out2,v24
1708         stvx_u          $in2,$x20,$out
1709         le?vperm        $in4,$in4,$in4,$inpperm
1710          vcipher        $out3,$out3,v24
1711         stvx_u          $in3,$x30,$out
1712         le?vperm        $in5,$in5,$in5,$inpperm
1713          vcipher        $out4,$out4,v24
1714         stvx_u          $in4,$x40,$out
1715         le?vperm        $in6,$in6,$in6,$inpperm
1716          vcipher        $out5,$out5,v24
1717         stvx_u          $in5,$x50,$out
1718         le?vperm        $in7,$in7,$in7,$inpperm
1719          vcipher        $out6,$out6,v24
1720         stvx_u          $in6,$x60,$out
1721          vcipher        $out7,$out7,v24
1722         stvx_u          $in7,$x70,$out
1723         addi            $out,$out,0x80
1724
1725         b               Loop_ctr32_enc8x_middle
1726
1727 .align  5
1728 Lctr32_enc8x_break:
1729         cmpwi           $len,-0x60
1730         blt             Lctr32_enc8x_one
1731         nop
1732         beq             Lctr32_enc8x_two
1733         cmpwi           $len,-0x40
1734         blt             Lctr32_enc8x_three
1735         nop
1736         beq             Lctr32_enc8x_four
1737         cmpwi           $len,-0x20
1738         blt             Lctr32_enc8x_five
1739         nop
1740         beq             Lctr32_enc8x_six
1741         cmpwi           $len,0x00
1742         blt             Lctr32_enc8x_seven
1743
1744 Lctr32_enc8x_eight:
1745         vcipherlast     $out0,$out0,$in0
1746         vcipherlast     $out1,$out1,$in1
1747         vcipherlast     $out2,$out2,$in2
1748         vcipherlast     $out3,$out3,$in3
1749         vcipherlast     $out4,$out4,$in4
1750         vcipherlast     $out5,$out5,$in5
1751         vcipherlast     $out6,$out6,$in6
1752         vcipherlast     $out7,$out7,$in7
1753
1754         le?vperm        $out0,$out0,$out0,$inpperm
1755         le?vperm        $out1,$out1,$out1,$inpperm
1756         stvx_u          $out0,$x00,$out
1757         le?vperm        $out2,$out2,$out2,$inpperm
1758         stvx_u          $out1,$x10,$out
1759         le?vperm        $out3,$out3,$out3,$inpperm
1760         stvx_u          $out2,$x20,$out
1761         le?vperm        $out4,$out4,$out4,$inpperm
1762         stvx_u          $out3,$x30,$out
1763         le?vperm        $out5,$out5,$out5,$inpperm
1764         stvx_u          $out4,$x40,$out
1765         le?vperm        $out6,$out6,$out6,$inpperm
1766         stvx_u          $out5,$x50,$out
1767         le?vperm        $out7,$out7,$out7,$inpperm
1768         stvx_u          $out6,$x60,$out
1769         stvx_u          $out7,$x70,$out
1770         addi            $out,$out,0x80
1771         b               Lctr32_enc8x_done
1772
1773 .align  5
1774 Lctr32_enc8x_seven:
1775         vcipherlast     $out0,$out0,$in1
1776         vcipherlast     $out1,$out1,$in2
1777         vcipherlast     $out2,$out2,$in3
1778         vcipherlast     $out3,$out3,$in4
1779         vcipherlast     $out4,$out4,$in5
1780         vcipherlast     $out5,$out5,$in6
1781         vcipherlast     $out6,$out6,$in7
1782
1783         le?vperm        $out0,$out0,$out0,$inpperm
1784         le?vperm        $out1,$out1,$out1,$inpperm
1785         stvx_u          $out0,$x00,$out
1786         le?vperm        $out2,$out2,$out2,$inpperm
1787         stvx_u          $out1,$x10,$out
1788         le?vperm        $out3,$out3,$out3,$inpperm
1789         stvx_u          $out2,$x20,$out
1790         le?vperm        $out4,$out4,$out4,$inpperm
1791         stvx_u          $out3,$x30,$out
1792         le?vperm        $out5,$out5,$out5,$inpperm
1793         stvx_u          $out4,$x40,$out
1794         le?vperm        $out6,$out6,$out6,$inpperm
1795         stvx_u          $out5,$x50,$out
1796         stvx_u          $out6,$x60,$out
1797         addi            $out,$out,0x70
1798         b               Lctr32_enc8x_done
1799
1800 .align  5
1801 Lctr32_enc8x_six:
1802         vcipherlast     $out0,$out0,$in2
1803         vcipherlast     $out1,$out1,$in3
1804         vcipherlast     $out2,$out2,$in4
1805         vcipherlast     $out3,$out3,$in5
1806         vcipherlast     $out4,$out4,$in6
1807         vcipherlast     $out5,$out5,$in7
1808
1809         le?vperm        $out0,$out0,$out0,$inpperm
1810         le?vperm        $out1,$out1,$out1,$inpperm
1811         stvx_u          $out0,$x00,$out
1812         le?vperm        $out2,$out2,$out2,$inpperm
1813         stvx_u          $out1,$x10,$out
1814         le?vperm        $out3,$out3,$out3,$inpperm
1815         stvx_u          $out2,$x20,$out
1816         le?vperm        $out4,$out4,$out4,$inpperm
1817         stvx_u          $out3,$x30,$out
1818         le?vperm        $out5,$out5,$out5,$inpperm
1819         stvx_u          $out4,$x40,$out
1820         stvx_u          $out5,$x50,$out
1821         addi            $out,$out,0x60
1822         b               Lctr32_enc8x_done
1823
1824 .align  5
1825 Lctr32_enc8x_five:
1826         vcipherlast     $out0,$out0,$in3
1827         vcipherlast     $out1,$out1,$in4
1828         vcipherlast     $out2,$out2,$in5
1829         vcipherlast     $out3,$out3,$in6
1830         vcipherlast     $out4,$out4,$in7
1831
1832         le?vperm        $out0,$out0,$out0,$inpperm
1833         le?vperm        $out1,$out1,$out1,$inpperm
1834         stvx_u          $out0,$x00,$out
1835         le?vperm        $out2,$out2,$out2,$inpperm
1836         stvx_u          $out1,$x10,$out
1837         le?vperm        $out3,$out3,$out3,$inpperm
1838         stvx_u          $out2,$x20,$out
1839         le?vperm        $out4,$out4,$out4,$inpperm
1840         stvx_u          $out3,$x30,$out
1841         stvx_u          $out4,$x40,$out
1842         addi            $out,$out,0x50
1843         b               Lctr32_enc8x_done
1844
1845 .align  5
1846 Lctr32_enc8x_four:
1847         vcipherlast     $out0,$out0,$in4
1848         vcipherlast     $out1,$out1,$in5
1849         vcipherlast     $out2,$out2,$in6
1850         vcipherlast     $out3,$out3,$in7
1851
1852         le?vperm        $out0,$out0,$out0,$inpperm
1853         le?vperm        $out1,$out1,$out1,$inpperm
1854         stvx_u          $out0,$x00,$out
1855         le?vperm        $out2,$out2,$out2,$inpperm
1856         stvx_u          $out1,$x10,$out
1857         le?vperm        $out3,$out3,$out3,$inpperm
1858         stvx_u          $out2,$x20,$out
1859         stvx_u          $out3,$x30,$out
1860         addi            $out,$out,0x40
1861         b               Lctr32_enc8x_done
1862
1863 .align  5
1864 Lctr32_enc8x_three:
1865         vcipherlast     $out0,$out0,$in5
1866         vcipherlast     $out1,$out1,$in6
1867         vcipherlast     $out2,$out2,$in7
1868
1869         le?vperm        $out0,$out0,$out0,$inpperm
1870         le?vperm        $out1,$out1,$out1,$inpperm
1871         stvx_u          $out0,$x00,$out
1872         le?vperm        $out2,$out2,$out2,$inpperm
1873         stvx_u          $out1,$x10,$out
1874         stvx_u          $out2,$x20,$out
1875         addi            $out,$out,0x30
1876         b               Lctr32_enc8x_done
1877
1878 .align  5
1879 Lctr32_enc8x_two:
1880         vcipherlast     $out0,$out0,$in6
1881         vcipherlast     $out1,$out1,$in7
1882
1883         le?vperm        $out0,$out0,$out0,$inpperm
1884         le?vperm        $out1,$out1,$out1,$inpperm
1885         stvx_u          $out0,$x00,$out
1886         stvx_u          $out1,$x10,$out
1887         addi            $out,$out,0x20
1888         b               Lctr32_enc8x_done
1889
1890 .align  5
1891 Lctr32_enc8x_one:
1892         vcipherlast     $out0,$out0,$in7
1893
1894         le?vperm        $out0,$out0,$out0,$inpperm
1895         stvx_u          $out0,0,$out
1896         addi            $out,$out,0x10
1897
1898 Lctr32_enc8x_done:
1899         li              r10,`$FRAME+15`
1900         li              r11,`$FRAME+31`
1901         stvx            $inpperm,r10,$sp        # wipe copies of round keys
1902         addi            r10,r10,32
1903         stvx            $inpperm,r11,$sp
1904         addi            r11,r11,32
1905         stvx            $inpperm,r10,$sp
1906         addi            r10,r10,32
1907         stvx            $inpperm,r11,$sp
1908         addi            r11,r11,32
1909         stvx            $inpperm,r10,$sp
1910         addi            r10,r10,32
1911         stvx            $inpperm,r11,$sp
1912         addi            r11,r11,32
1913         stvx            $inpperm,r10,$sp
1914         addi            r10,r10,32
1915         stvx            $inpperm,r11,$sp
1916         addi            r11,r11,32
1917
1918         mtspr           256,$vrsave
1919         lvx             v20,r10,$sp             # ABI says so
1920         addi            r10,r10,32
1921         lvx             v21,r11,$sp
1922         addi            r11,r11,32
1923         lvx             v22,r10,$sp
1924         addi            r10,r10,32
1925         lvx             v23,r11,$sp
1926         addi            r11,r11,32
1927         lvx             v24,r10,$sp
1928         addi            r10,r10,32
1929         lvx             v25,r11,$sp
1930         addi            r11,r11,32
1931         lvx             v26,r10,$sp
1932         addi            r10,r10,32
1933         lvx             v27,r11,$sp
1934         addi            r11,r11,32
1935         lvx             v28,r10,$sp
1936         addi            r10,r10,32
1937         lvx             v29,r11,$sp
1938         addi            r11,r11,32
1939         lvx             v30,r10,$sp
1940         lvx             v31,r11,$sp
1941         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1942         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1943         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1944         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1945         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1946         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1947         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1948         blr
1949         .long           0
1950         .byte           0,12,0x14,0,0x80,6,6,0
1951         .long           0
1952 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1953 ___
1954 }}      }}}
1955
1956 #########################################################################
1957 {{{     # XTS procedures                                                #
1958 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1959 #                             const AES_KEY *key1, const AES_KEY *key2, #
1960 #                             [const] unsigned char iv[16]);            #
1961 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1962 # input tweak value is assumed to be encrypted already, and last tweak  #
1963 # value, one suitable for consecutive call on same chunk of data, is    #
1964 # written back to original buffer. In addition, in "tweak chaining"     #
1965 # mode only complete input blocks are processed.                        #
1966
1967 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =     map("r$_",(3..10));
1968 my ($rndkey0,$rndkey1,$inout) =                         map("v$_",(0..2));
1969 my ($output,$inptail,$inpperm,$leperm,$keyperm) =       map("v$_",(3..7));
1970 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =              map("v$_",(8..12));
1971 my $taillen = $key2;
1972
1973    ($inp,$idx) = ($idx,$inp);                           # reassign
1974
1975 $code.=<<___;
1976 .globl  .${prefix}_xts_encrypt
1977         mr              $inp,r3                         # reassign
1978         li              r3,-1
1979         ${UCMP}i        $len,16
1980         bltlr-
1981
1982         lis             r0,0xfff0
1983         mfspr           r12,256                         # save vrsave
1984         li              r11,0
1985         mtspr           256,r0
1986
1987         vspltisb        $seven,0x07                     # 0x070707..07
1988         le?lvsl         $leperm,r11,r11
1989         le?vspltisb     $tmp,0x0f
1990         le?vxor         $leperm,$leperm,$seven
1991
1992         li              $idx,15
1993         lvx             $tweak,0,$ivp                   # load [unaligned] iv
1994         lvsl            $inpperm,0,$ivp
1995         lvx             $inptail,$idx,$ivp
1996         le?vxor         $inpperm,$inpperm,$tmp
1997         vperm           $tweak,$tweak,$inptail,$inpperm
1998
1999         neg             r11,$inp
2000         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2001         lvx             $inout,0,$inp
2002         addi            $inp,$inp,15                    # 15 is not typo
2003         le?vxor         $inpperm,$inpperm,$tmp
2004
2005         ${UCMP}i        $key2,0                         # key2==NULL?
2006         beq             Lxts_enc_no_key2
2007
2008         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2009         lwz             $rounds,240($key2)
2010         srwi            $rounds,$rounds,1
2011         subi            $rounds,$rounds,1
2012         li              $idx,16
2013
2014         lvx             $rndkey0,0,$key2
2015         lvx             $rndkey1,$idx,$key2
2016         addi            $idx,$idx,16
2017         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2018         vxor            $tweak,$tweak,$rndkey0
2019         lvx             $rndkey0,$idx,$key2
2020         addi            $idx,$idx,16
2021         mtctr           $rounds
2022
2023 Ltweak_xts_enc:
2024         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2025         vcipher         $tweak,$tweak,$rndkey1
2026         lvx             $rndkey1,$idx,$key2
2027         addi            $idx,$idx,16
2028         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2029         vcipher         $tweak,$tweak,$rndkey0
2030         lvx             $rndkey0,$idx,$key2
2031         addi            $idx,$idx,16
2032         bdnz            Ltweak_xts_enc
2033
2034         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2035         vcipher         $tweak,$tweak,$rndkey1
2036         lvx             $rndkey1,$idx,$key2
2037         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2038         vcipherlast     $tweak,$tweak,$rndkey0
2039
2040         li              $ivp,0                          # don't chain the tweak
2041         b               Lxts_enc
2042
2043 Lxts_enc_no_key2:
2044         li              $idx,-16
2045         and             $len,$len,$idx                  # in "tweak chaining"
2046                                                         # mode only complete
2047                                                         # blocks are processed
2048 Lxts_enc:
2049         lvx             $inptail,0,$inp
2050         addi            $inp,$inp,16
2051
2052         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2053         lwz             $rounds,240($key1)
2054         srwi            $rounds,$rounds,1
2055         subi            $rounds,$rounds,1
2056         li              $idx,16
2057
2058         vslb            $eighty7,$seven,$seven          # 0x808080..80
2059         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2060         vspltisb        $tmp,1                          # 0x010101..01
2061         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2062
2063         ${UCMP}i        $len,96
2064         bge             _aesp8_xts_encrypt6x
2065
2066         andi.           $taillen,$len,15
2067         subic           r0,$len,32
2068         subi            $taillen,$taillen,16
2069         subfe           r0,r0,r0
2070         and             r0,r0,$taillen
2071         add             $inp,$inp,r0
2072
2073         lvx             $rndkey0,0,$key1
2074         lvx             $rndkey1,$idx,$key1
2075         addi            $idx,$idx,16
2076         vperm           $inout,$inout,$inptail,$inpperm
2077         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2078         vxor            $inout,$inout,$tweak
2079         vxor            $inout,$inout,$rndkey0
2080         lvx             $rndkey0,$idx,$key1
2081         addi            $idx,$idx,16
2082         mtctr           $rounds
2083         b               Loop_xts_enc
2084
2085 .align  5
2086 Loop_xts_enc:
2087         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2088         vcipher         $inout,$inout,$rndkey1
2089         lvx             $rndkey1,$idx,$key1
2090         addi            $idx,$idx,16
2091         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2092         vcipher         $inout,$inout,$rndkey0
2093         lvx             $rndkey0,$idx,$key1
2094         addi            $idx,$idx,16
2095         bdnz            Loop_xts_enc
2096
2097         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2098         vcipher         $inout,$inout,$rndkey1
2099         lvx             $rndkey1,$idx,$key1
2100         li              $idx,16
2101         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2102         vxor            $rndkey0,$rndkey0,$tweak
2103         vcipherlast     $output,$inout,$rndkey0
2104
2105         le?vperm        $tmp,$output,$output,$leperm
2106         be?nop
2107         le?stvx_u       $tmp,0,$out
2108         be?stvx_u       $output,0,$out
2109         addi            $out,$out,16
2110
2111         subic.          $len,$len,16
2112         beq             Lxts_enc_done
2113
2114         vmr             $inout,$inptail
2115         lvx             $inptail,0,$inp
2116         addi            $inp,$inp,16
2117         lvx             $rndkey0,0,$key1
2118         lvx             $rndkey1,$idx,$key1
2119         addi            $idx,$idx,16
2120
2121         subic           r0,$len,32
2122         subfe           r0,r0,r0
2123         and             r0,r0,$taillen
2124         add             $inp,$inp,r0
2125
2126         vsrab           $tmp,$tweak,$seven              # next tweak value
2127         vaddubm         $tweak,$tweak,$tweak
2128         vsldoi          $tmp,$tmp,$tmp,15
2129         vand            $tmp,$tmp,$eighty7
2130         vxor            $tweak,$tweak,$tmp
2131
2132         vperm           $inout,$inout,$inptail,$inpperm
2133         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2134         vxor            $inout,$inout,$tweak
2135         vxor            $output,$output,$rndkey0        # just in case $len<16
2136         vxor            $inout,$inout,$rndkey0
2137         lvx             $rndkey0,$idx,$key1
2138         addi            $idx,$idx,16
2139
2140         mtctr           $rounds
2141         ${UCMP}i        $len,16
2142         bge             Loop_xts_enc
2143
2144         vxor            $output,$output,$tweak
2145         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2146         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2147         vspltisb        $tmp,-1
2148         vperm           $inptail,$inptail,$tmp,$inpperm
2149         vsel            $inout,$inout,$output,$inptail
2150
2151         subi            r11,$out,17
2152         subi            $out,$out,16
2153         mtctr           $len
2154         li              $len,16
2155 Loop_xts_enc_steal:
2156         lbzu            r0,1(r11)
2157         stb             r0,16(r11)
2158         bdnz            Loop_xts_enc_steal
2159
2160         mtctr           $rounds
2161         b               Loop_xts_enc                    # one more time...
2162
2163 Lxts_enc_done:
2164         ${UCMP}i        $ivp,0
2165         beq             Lxts_enc_ret
2166
2167         vsrab           $tmp,$tweak,$seven              # next tweak value
2168         vaddubm         $tweak,$tweak,$tweak
2169         vsldoi          $tmp,$tmp,$tmp,15
2170         vand            $tmp,$tmp,$eighty7
2171         vxor            $tweak,$tweak,$tmp
2172
2173         le?vperm        $tweak,$tweak,$tweak,$leperm
2174         stvx_u          $tweak,0,$ivp
2175
2176 Lxts_enc_ret:
2177         mtspr           256,r12                         # restore vrsave
2178         li              r3,0
2179         blr
2180         .long           0
2181         .byte           0,12,0x04,0,0x80,6,6,0
2182         .long           0
2183 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2184
2185 .globl  .${prefix}_xts_decrypt
2186         mr              $inp,r3                         # reassign
2187         li              r3,-1
2188         ${UCMP}i        $len,16
2189         bltlr-
2190
2191         lis             r0,0xfff8
2192         mfspr           r12,256                         # save vrsave
2193         li              r11,0
2194         mtspr           256,r0
2195
2196         andi.           r0,$len,15
2197         neg             r0,r0
2198         andi.           r0,r0,16
2199         sub             $len,$len,r0
2200
2201         vspltisb        $seven,0x07                     # 0x070707..07
2202         le?lvsl         $leperm,r11,r11
2203         le?vspltisb     $tmp,0x0f
2204         le?vxor         $leperm,$leperm,$seven
2205
2206         li              $idx,15
2207         lvx             $tweak,0,$ivp                   # load [unaligned] iv
2208         lvsl            $inpperm,0,$ivp
2209         lvx             $inptail,$idx,$ivp
2210         le?vxor         $inpperm,$inpperm,$tmp
2211         vperm           $tweak,$tweak,$inptail,$inpperm
2212
2213         neg             r11,$inp
2214         lvsr            $inpperm,0,r11                  # prepare for unaligned load
2215         lvx             $inout,0,$inp
2216         addi            $inp,$inp,15                    # 15 is not typo
2217         le?vxor         $inpperm,$inpperm,$tmp
2218
2219         ${UCMP}i        $key2,0                         # key2==NULL?
2220         beq             Lxts_dec_no_key2
2221
2222         ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
2223         lwz             $rounds,240($key2)
2224         srwi            $rounds,$rounds,1
2225         subi            $rounds,$rounds,1
2226         li              $idx,16
2227
2228         lvx             $rndkey0,0,$key2
2229         lvx             $rndkey1,$idx,$key2
2230         addi            $idx,$idx,16
2231         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2232         vxor            $tweak,$tweak,$rndkey0
2233         lvx             $rndkey0,$idx,$key2
2234         addi            $idx,$idx,16
2235         mtctr           $rounds
2236
2237 Ltweak_xts_dec:
2238         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2239         vcipher         $tweak,$tweak,$rndkey1
2240         lvx             $rndkey1,$idx,$key2
2241         addi            $idx,$idx,16
2242         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2243         vcipher         $tweak,$tweak,$rndkey0
2244         lvx             $rndkey0,$idx,$key2
2245         addi            $idx,$idx,16
2246         bdnz            Ltweak_xts_dec
2247
2248         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2249         vcipher         $tweak,$tweak,$rndkey1
2250         lvx             $rndkey1,$idx,$key2
2251         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2252         vcipherlast     $tweak,$tweak,$rndkey0
2253
2254         li              $ivp,0                          # don't chain the tweak
2255         b               Lxts_dec
2256
2257 Lxts_dec_no_key2:
2258         neg             $idx,$len
2259         andi.           $idx,$idx,15
2260         add             $len,$len,$idx                  # in "tweak chaining"
2261                                                         # mode only complete
2262                                                         # blocks are processed
2263 Lxts_dec:
2264         lvx             $inptail,0,$inp
2265         addi            $inp,$inp,16
2266
2267         ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
2268         lwz             $rounds,240($key1)
2269         srwi            $rounds,$rounds,1
2270         subi            $rounds,$rounds,1
2271         li              $idx,16
2272
2273         vslb            $eighty7,$seven,$seven          # 0x808080..80
2274         vor             $eighty7,$eighty7,$seven        # 0x878787..87
2275         vspltisb        $tmp,1                          # 0x010101..01
2276         vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
2277
2278         ${UCMP}i        $len,96
2279         bge             _aesp8_xts_decrypt6x
2280
2281         lvx             $rndkey0,0,$key1
2282         lvx             $rndkey1,$idx,$key1
2283         addi            $idx,$idx,16
2284         vperm           $inout,$inout,$inptail,$inpperm
2285         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2286         vxor            $inout,$inout,$tweak
2287         vxor            $inout,$inout,$rndkey0
2288         lvx             $rndkey0,$idx,$key1
2289         addi            $idx,$idx,16
2290         mtctr           $rounds
2291
2292         ${UCMP}i        $len,16
2293         blt             Ltail_xts_dec
2294         be?b            Loop_xts_dec
2295
2296 .align  5
2297 Loop_xts_dec:
2298         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2299         vncipher        $inout,$inout,$rndkey1
2300         lvx             $rndkey1,$idx,$key1
2301         addi            $idx,$idx,16
2302         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2303         vncipher        $inout,$inout,$rndkey0
2304         lvx             $rndkey0,$idx,$key1
2305         addi            $idx,$idx,16
2306         bdnz            Loop_xts_dec
2307
2308         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2309         vncipher        $inout,$inout,$rndkey1
2310         lvx             $rndkey1,$idx,$key1
2311         li              $idx,16
2312         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2313         vxor            $rndkey0,$rndkey0,$tweak
2314         vncipherlast    $output,$inout,$rndkey0
2315
2316         le?vperm        $tmp,$output,$output,$leperm
2317         be?nop
2318         le?stvx_u       $tmp,0,$out
2319         be?stvx_u       $output,0,$out
2320         addi            $out,$out,16
2321
2322         subic.          $len,$len,16
2323         beq             Lxts_dec_done
2324
2325         vmr             $inout,$inptail
2326         lvx             $inptail,0,$inp
2327         addi            $inp,$inp,16
2328         lvx             $rndkey0,0,$key1
2329         lvx             $rndkey1,$idx,$key1
2330         addi            $idx,$idx,16
2331
2332         vsrab           $tmp,$tweak,$seven              # next tweak value
2333         vaddubm         $tweak,$tweak,$tweak
2334         vsldoi          $tmp,$tmp,$tmp,15
2335         vand            $tmp,$tmp,$eighty7
2336         vxor            $tweak,$tweak,$tmp
2337
2338         vperm           $inout,$inout,$inptail,$inpperm
2339         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2340         vxor            $inout,$inout,$tweak
2341         vxor            $inout,$inout,$rndkey0
2342         lvx             $rndkey0,$idx,$key1
2343         addi            $idx,$idx,16
2344
2345         mtctr           $rounds
2346         ${UCMP}i        $len,16
2347         bge             Loop_xts_dec
2348
2349 Ltail_xts_dec:
2350         vsrab           $tmp,$tweak,$seven              # next tweak value
2351         vaddubm         $tweak1,$tweak,$tweak
2352         vsldoi          $tmp,$tmp,$tmp,15
2353         vand            $tmp,$tmp,$eighty7
2354         vxor            $tweak1,$tweak1,$tmp
2355
2356         subi            $inp,$inp,16
2357         add             $inp,$inp,$len
2358
2359         vxor            $inout,$inout,$tweak            # :-(
2360         vxor            $inout,$inout,$tweak1           # :-)
2361
2362 Loop_xts_dec_short:
2363         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2364         vncipher        $inout,$inout,$rndkey1
2365         lvx             $rndkey1,$idx,$key1
2366         addi            $idx,$idx,16
2367         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2368         vncipher        $inout,$inout,$rndkey0
2369         lvx             $rndkey0,$idx,$key1
2370         addi            $idx,$idx,16
2371         bdnz            Loop_xts_dec_short
2372
2373         ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
2374         vncipher        $inout,$inout,$rndkey1
2375         lvx             $rndkey1,$idx,$key1
2376         li              $idx,16
2377         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2378         vxor            $rndkey0,$rndkey0,$tweak1
2379         vncipherlast    $output,$inout,$rndkey0
2380
2381         le?vperm        $tmp,$output,$output,$leperm
2382         be?nop
2383         le?stvx_u       $tmp,0,$out
2384         be?stvx_u       $output,0,$out
2385
2386         vmr             $inout,$inptail
2387         lvx             $inptail,0,$inp
2388         #addi           $inp,$inp,16
2389         lvx             $rndkey0,0,$key1
2390         lvx             $rndkey1,$idx,$key1
2391         addi            $idx,$idx,16
2392         vperm           $inout,$inout,$inptail,$inpperm
2393         ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
2394
2395         lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
2396         vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
2397         vspltisb        $tmp,-1
2398         vperm           $inptail,$inptail,$tmp,$inpperm
2399         vsel            $inout,$inout,$output,$inptail
2400
2401         vxor            $rndkey0,$rndkey0,$tweak
2402         vxor            $inout,$inout,$rndkey0
2403         lvx             $rndkey0,$idx,$key1
2404         addi            $idx,$idx,16
2405
2406         subi            r11,$out,1
2407         mtctr           $len
2408         li              $len,16
2409 Loop_xts_dec_steal:
2410         lbzu            r0,1(r11)
2411         stb             r0,16(r11)
2412         bdnz            Loop_xts_dec_steal
2413
2414         mtctr           $rounds
2415         b               Loop_xts_dec                    # one more time...
2416
2417 Lxts_dec_done:
2418         ${UCMP}i        $ivp,0
2419         beq             Lxts_dec_ret
2420
2421         vsrab           $tmp,$tweak,$seven              # next tweak value
2422         vaddubm         $tweak,$tweak,$tweak
2423         vsldoi          $tmp,$tmp,$tmp,15
2424         vand            $tmp,$tmp,$eighty7
2425         vxor            $tweak,$tweak,$tmp
2426
2427         le?vperm        $tweak,$tweak,$tweak,$leperm
2428         stvx_u          $tweak,0,$ivp
2429
2430 Lxts_dec_ret:
2431         mtspr           256,r12                         # restore vrsave
2432         li              r3,0
2433         blr
2434         .long           0
2435         .byte           0,12,0x04,0,0x80,6,6,0
2436         .long           0
2437 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2438 ___
2439 #########################################################################
2440 {{      # Optimized XTS procedures                                      #
2441 my $key_=$key2;
2442 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2443     $x00=0 if ($flavour =~ /osx/);
2444 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2445 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2446 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2447 my $rndkey0="v23";      # v24-v25 rotating buffer for first found keys
2448                         # v26-v31 last 6 round keys
2449 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2450 my $taillen=$x70;
2451
2452 $code.=<<___;
2453 .align  5
2454 _aesp8_xts_encrypt6x:
2455         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2456         mflr            r11
2457         li              r7,`$FRAME+8*16+15`
2458         li              r3,`$FRAME+8*16+31`
2459         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2460         stvx            v20,r7,$sp              # ABI says so
2461         addi            r7,r7,32
2462         stvx            v21,r3,$sp
2463         addi            r3,r3,32
2464         stvx            v22,r7,$sp
2465         addi            r7,r7,32
2466         stvx            v23,r3,$sp
2467         addi            r3,r3,32
2468         stvx            v24,r7,$sp
2469         addi            r7,r7,32
2470         stvx            v25,r3,$sp
2471         addi            r3,r3,32
2472         stvx            v26,r7,$sp
2473         addi            r7,r7,32
2474         stvx            v27,r3,$sp
2475         addi            r3,r3,32
2476         stvx            v28,r7,$sp
2477         addi            r7,r7,32
2478         stvx            v29,r3,$sp
2479         addi            r3,r3,32
2480         stvx            v30,r7,$sp
2481         stvx            v31,r3,$sp
2482         li              r0,-1
2483         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2484         li              $x10,0x10
2485         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2486         li              $x20,0x20
2487         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2488         li              $x30,0x30
2489         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2490         li              $x40,0x40
2491         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2492         li              $x50,0x50
2493         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2494         li              $x60,0x60
2495         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2496         li              $x70,0x70
2497         mtspr           256,r0
2498
2499         xxlor           2, 32+$eighty7, 32+$eighty7
2500         vsldoi          $eighty7,$tmp,$eighty7,1        # 0x010101..87
2501         xxlor           1, 32+$eighty7, 32+$eighty7
2502
2503         # Load XOR Lconsts.
2504         mr              $x70, r6
2505         bl              Lconsts
2506         lxvw4x          0, $x40, r6             # load XOR contents
2507         mr              r6, $x70
2508         li              $x70,0x70
2509
2510         subi            $rounds,$rounds,3       # -4 in total
2511
2512         lvx             $rndkey0,$x00,$key1     # load key schedule
2513         lvx             v30,$x10,$key1
2514         addi            $key1,$key1,0x20
2515         lvx             v31,$x00,$key1
2516         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
2517         addi            $key_,$sp,$FRAME+15
2518         mtctr           $rounds
2519
2520 Load_xts_enc_key:
2521         ?vperm          v24,v30,v31,$keyperm
2522         lvx             v30,$x10,$key1
2523         addi            $key1,$key1,0x20
2524         stvx            v24,$x00,$key_          # off-load round[1]
2525         ?vperm          v25,v31,v30,$keyperm
2526         lvx             v31,$x00,$key1
2527         stvx            v25,$x10,$key_          # off-load round[2]
2528         addi            $key_,$key_,0x20
2529         bdnz            Load_xts_enc_key
2530
2531         lvx             v26,$x10,$key1
2532         ?vperm          v24,v30,v31,$keyperm
2533         lvx             v27,$x20,$key1
2534         stvx            v24,$x00,$key_          # off-load round[3]
2535         ?vperm          v25,v31,v26,$keyperm
2536         lvx             v28,$x30,$key1
2537         stvx            v25,$x10,$key_          # off-load round[4]
2538         addi            $key_,$sp,$FRAME+15     # rewind $key_
2539         ?vperm          v26,v26,v27,$keyperm
2540         lvx             v29,$x40,$key1
2541         ?vperm          v27,v27,v28,$keyperm
2542         lvx             v30,$x50,$key1
2543         ?vperm          v28,v28,v29,$keyperm
2544         lvx             v31,$x60,$key1
2545         ?vperm          v29,v29,v30,$keyperm
2546         lvx             $twk5,$x70,$key1        # borrow $twk5
2547         ?vperm          v30,v30,v31,$keyperm
2548         lvx             v24,$x00,$key_          # pre-load round[1]
2549         ?vperm          v31,v31,$twk5,$keyperm
2550         lvx             v25,$x10,$key_          # pre-load round[2]
2551
2552         # Switch to use the following codes with 0x010101..87 to generate tweak.
2553         #     eighty7 = 0x010101..87
2554         # vsrab         tmp, tweak, seven       # next tweak value, right shift 7 bits
2555         # vand          tmp, tmp, eighty7       # last byte with carry
2556         # vaddubm       tweak, tweak, tweak     # left shift 1 bit (x2)
2557         # xxlor         vsx, 0, 0
2558         # vpermxor      tweak, tweak, tmp, vsx
2559
2560          vperm          $in0,$inout,$inptail,$inpperm
2561          subi           $inp,$inp,31            # undo "caller"
2562         vxor            $twk0,$tweak,$rndkey0
2563         vsrab           $tmp,$tweak,$seven      # next tweak value
2564         vaddubm         $tweak,$tweak,$tweak
2565         vand            $tmp,$tmp,$eighty7
2566          vxor           $out0,$in0,$twk0
2567         xxlor           32+$in1, 0, 0
2568         vpermxor        $tweak, $tweak, $tmp, $in1
2569
2570          lvx_u          $in1,$x10,$inp
2571         vxor            $twk1,$tweak,$rndkey0
2572         vsrab           $tmp,$tweak,$seven      # next tweak value
2573         vaddubm         $tweak,$tweak,$tweak
2574          le?vperm       $in1,$in1,$in1,$leperm
2575         vand            $tmp,$tmp,$eighty7
2576          vxor           $out1,$in1,$twk1
2577         xxlor           32+$in2, 0, 0
2578         vpermxor        $tweak, $tweak, $tmp, $in2
2579
2580          lvx_u          $in2,$x20,$inp
2581          andi.          $taillen,$len,15
2582         vxor            $twk2,$tweak,$rndkey0
2583         vsrab           $tmp,$tweak,$seven      # next tweak value
2584         vaddubm         $tweak,$tweak,$tweak
2585          le?vperm       $in2,$in2,$in2,$leperm
2586         vand            $tmp,$tmp,$eighty7
2587          vxor           $out2,$in2,$twk2
2588         xxlor           32+$in3, 0, 0
2589         vpermxor        $tweak, $tweak, $tmp, $in3
2590
2591          lvx_u          $in3,$x30,$inp
2592          sub            $len,$len,$taillen
2593         vxor            $twk3,$tweak,$rndkey0
2594         vsrab           $tmp,$tweak,$seven      # next tweak value
2595         vaddubm         $tweak,$tweak,$tweak
2596          le?vperm       $in3,$in3,$in3,$leperm
2597         vand            $tmp,$tmp,$eighty7
2598          vxor           $out3,$in3,$twk3
2599         xxlor           32+$in4, 0, 0
2600         vpermxor        $tweak, $tweak, $tmp, $in4
2601
2602          lvx_u          $in4,$x40,$inp
2603          subi           $len,$len,0x60
2604         vxor            $twk4,$tweak,$rndkey0
2605         vsrab           $tmp,$tweak,$seven      # next tweak value
2606         vaddubm         $tweak,$tweak,$tweak
2607          le?vperm       $in4,$in4,$in4,$leperm
2608         vand            $tmp,$tmp,$eighty7
2609          vxor           $out4,$in4,$twk4
2610         xxlor           32+$in5, 0, 0
2611         vpermxor        $tweak, $tweak, $tmp, $in5
2612
2613          lvx_u          $in5,$x50,$inp
2614          addi           $inp,$inp,0x60
2615         vxor            $twk5,$tweak,$rndkey0
2616         vsrab           $tmp,$tweak,$seven      # next tweak value
2617         vaddubm         $tweak,$tweak,$tweak
2618          le?vperm       $in5,$in5,$in5,$leperm
2619         vand            $tmp,$tmp,$eighty7
2620          vxor           $out5,$in5,$twk5
2621         xxlor           32+$in0, 0, 0
2622         vpermxor        $tweak, $tweak, $tmp, $in0
2623
2624         vxor            v31,v31,$rndkey0
2625         mtctr           $rounds
2626         b               Loop_xts_enc6x
2627
2628 .align  5
2629 Loop_xts_enc6x:
2630         vcipher         $out0,$out0,v24
2631         vcipher         $out1,$out1,v24
2632         vcipher         $out2,$out2,v24
2633         vcipher         $out3,$out3,v24
2634         vcipher         $out4,$out4,v24
2635         vcipher         $out5,$out5,v24
2636         lvx             v24,$x20,$key_          # round[3]
2637         addi            $key_,$key_,0x20
2638
2639         vcipher         $out0,$out0,v25
2640         vcipher         $out1,$out1,v25
2641         vcipher         $out2,$out2,v25
2642         vcipher         $out3,$out3,v25
2643         vcipher         $out4,$out4,v25
2644         vcipher         $out5,$out5,v25
2645         lvx             v25,$x10,$key_          # round[4]
2646         bdnz            Loop_xts_enc6x
2647
2648         xxlor           32+$eighty7, 1, 1       # 0x010101..87
2649
2650         subic           $len,$len,96            # $len-=96
2651          vxor           $in0,$twk0,v31          # xor with last round key
2652         vcipher         $out0,$out0,v24
2653         vcipher         $out1,$out1,v24
2654          vsrab          $tmp,$tweak,$seven      # next tweak value
2655          vxor           $twk0,$tweak,$rndkey0
2656          vaddubm        $tweak,$tweak,$tweak
2657         vcipher         $out2,$out2,v24
2658         vcipher         $out3,$out3,v24
2659         vcipher         $out4,$out4,v24
2660         vcipher         $out5,$out5,v24
2661
2662         subfe.          r0,r0,r0                # borrow?-1:0
2663          vand           $tmp,$tmp,$eighty7
2664         vcipher         $out0,$out0,v25
2665         vcipher         $out1,$out1,v25
2666          xxlor          32+$in1, 0, 0
2667          vpermxor       $tweak, $tweak, $tmp, $in1
2668         vcipher         $out2,$out2,v25
2669         vcipher         $out3,$out3,v25
2670          vxor           $in1,$twk1,v31
2671          vsrab          $tmp,$tweak,$seven      # next tweak value
2672          vxor           $twk1,$tweak,$rndkey0
2673         vcipher         $out4,$out4,v25
2674         vcipher         $out5,$out5,v25
2675
2676         and             r0,r0,$len
2677          vaddubm        $tweak,$tweak,$tweak
2678         vcipher         $out0,$out0,v26
2679         vcipher         $out1,$out1,v26
2680          vand           $tmp,$tmp,$eighty7
2681         vcipher         $out2,$out2,v26
2682         vcipher         $out3,$out3,v26
2683          xxlor          32+$in2, 0, 0
2684          vpermxor       $tweak, $tweak, $tmp, $in2
2685         vcipher         $out4,$out4,v26
2686         vcipher         $out5,$out5,v26
2687
2688         add             $inp,$inp,r0            # $inp is adjusted in such
2689                                                 # way that at exit from the
2690                                                 # loop inX-in5 are loaded
2691                                                 # with last "words"
2692          vxor           $in2,$twk2,v31
2693          vsrab          $tmp,$tweak,$seven      # next tweak value
2694          vxor           $twk2,$tweak,$rndkey0
2695          vaddubm        $tweak,$tweak,$tweak
2696         vcipher         $out0,$out0,v27
2697         vcipher         $out1,$out1,v27
2698         vcipher         $out2,$out2,v27
2699         vcipher         $out3,$out3,v27
2700          vand           $tmp,$tmp,$eighty7
2701         vcipher         $out4,$out4,v27
2702         vcipher         $out5,$out5,v27
2703
2704         addi            $key_,$sp,$FRAME+15     # rewind $key_
2705          xxlor          32+$in3, 0, 0
2706          vpermxor       $tweak, $tweak, $tmp, $in3
2707         vcipher         $out0,$out0,v28
2708         vcipher         $out1,$out1,v28
2709          vxor           $in3,$twk3,v31
2710          vsrab          $tmp,$tweak,$seven      # next tweak value
2711          vxor           $twk3,$tweak,$rndkey0
2712         vcipher         $out2,$out2,v28
2713         vcipher         $out3,$out3,v28
2714          vaddubm        $tweak,$tweak,$tweak
2715         vcipher         $out4,$out4,v28
2716         vcipher         $out5,$out5,v28
2717         lvx             v24,$x00,$key_          # re-pre-load round[1]
2718          vand           $tmp,$tmp,$eighty7
2719
2720         vcipher         $out0,$out0,v29
2721         vcipher         $out1,$out1,v29
2722          xxlor          32+$in4, 0, 0
2723          vpermxor       $tweak, $tweak, $tmp, $in4
2724         vcipher         $out2,$out2,v29
2725         vcipher         $out3,$out3,v29
2726          vxor           $in4,$twk4,v31
2727          vsrab          $tmp,$tweak,$seven      # next tweak value
2728          vxor           $twk4,$tweak,$rndkey0
2729         vcipher         $out4,$out4,v29
2730         vcipher         $out5,$out5,v29
2731         lvx             v25,$x10,$key_          # re-pre-load round[2]
2732          vaddubm        $tweak,$tweak,$tweak
2733
2734         vcipher         $out0,$out0,v30
2735         vcipher         $out1,$out1,v30
2736          vand           $tmp,$tmp,$eighty7
2737         vcipher         $out2,$out2,v30
2738         vcipher         $out3,$out3,v30
2739          xxlor          32+$in5, 0, 0
2740          vpermxor       $tweak, $tweak, $tmp, $in5
2741         vcipher         $out4,$out4,v30
2742         vcipher         $out5,$out5,v30
2743          vxor           $in5,$twk5,v31
2744          vsrab          $tmp,$tweak,$seven      # next tweak value
2745          vxor           $twk5,$tweak,$rndkey0
2746
2747         vcipherlast     $out0,$out0,$in0
2748          lvx_u          $in0,$x00,$inp          # load next input block
2749          vaddubm        $tweak,$tweak,$tweak
2750         vcipherlast     $out1,$out1,$in1
2751          lvx_u          $in1,$x10,$inp
2752         vcipherlast     $out2,$out2,$in2
2753          le?vperm       $in0,$in0,$in0,$leperm
2754          lvx_u          $in2,$x20,$inp
2755          vand           $tmp,$tmp,$eighty7
2756         vcipherlast     $out3,$out3,$in3
2757          le?vperm       $in1,$in1,$in1,$leperm
2758          lvx_u          $in3,$x30,$inp
2759         vcipherlast     $out4,$out4,$in4
2760          le?vperm       $in2,$in2,$in2,$leperm
2761          lvx_u          $in4,$x40,$inp
2762          xxlor          10, 32+$in0, 32+$in0
2763          xxlor          32+$in0, 0, 0
2764          vpermxor       $tweak, $tweak, $tmp, $in0
2765          xxlor          32+$in0, 10, 10
2766         vcipherlast     $tmp,$out5,$in5         # last block might be needed
2767                                                 # in stealing mode
2768          le?vperm       $in3,$in3,$in3,$leperm
2769          lvx_u          $in5,$x50,$inp
2770          addi           $inp,$inp,0x60
2771          le?vperm       $in4,$in4,$in4,$leperm
2772          le?vperm       $in5,$in5,$in5,$leperm
2773
2774         le?vperm        $out0,$out0,$out0,$leperm
2775         le?vperm        $out1,$out1,$out1,$leperm
2776         stvx_u          $out0,$x00,$out         # store output
2777          vxor           $out0,$in0,$twk0
2778         le?vperm        $out2,$out2,$out2,$leperm
2779         stvx_u          $out1,$x10,$out
2780          vxor           $out1,$in1,$twk1
2781         le?vperm        $out3,$out3,$out3,$leperm
2782         stvx_u          $out2,$x20,$out
2783          vxor           $out2,$in2,$twk2
2784         le?vperm        $out4,$out4,$out4,$leperm
2785         stvx_u          $out3,$x30,$out
2786          vxor           $out3,$in3,$twk3
2787         le?vperm        $out5,$tmp,$tmp,$leperm
2788         stvx_u          $out4,$x40,$out
2789          vxor           $out4,$in4,$twk4
2790         le?stvx_u       $out5,$x50,$out
2791         be?stvx_u       $tmp, $x50,$out
2792          vxor           $out5,$in5,$twk5
2793         addi            $out,$out,0x60
2794
2795         mtctr           $rounds
2796         beq             Loop_xts_enc6x          # did $len-=96 borrow?
2797
2798         xxlor           32+$eighty7, 2, 2       # 0x010101..87
2799
2800         addic.          $len,$len,0x60
2801         beq             Lxts_enc6x_zero
2802         cmpwi           $len,0x20
2803         blt             Lxts_enc6x_one
2804         nop
2805         beq             Lxts_enc6x_two
2806         cmpwi           $len,0x40
2807         blt             Lxts_enc6x_three
2808         nop
2809         beq             Lxts_enc6x_four
2810
2811 Lxts_enc6x_five:
2812         vxor            $out0,$in1,$twk0
2813         vxor            $out1,$in2,$twk1
2814         vxor            $out2,$in3,$twk2
2815         vxor            $out3,$in4,$twk3
2816         vxor            $out4,$in5,$twk4
2817
2818         bl              _aesp8_xts_enc5x
2819
2820         le?vperm        $out0,$out0,$out0,$leperm
2821         vmr             $twk0,$twk5             # unused tweak
2822         le?vperm        $out1,$out1,$out1,$leperm
2823         stvx_u          $out0,$x00,$out         # store output
2824         le?vperm        $out2,$out2,$out2,$leperm
2825         stvx_u          $out1,$x10,$out
2826         le?vperm        $out3,$out3,$out3,$leperm
2827         stvx_u          $out2,$x20,$out
2828         vxor            $tmp,$out4,$twk5        # last block prep for stealing
2829         le?vperm        $out4,$out4,$out4,$leperm
2830         stvx_u          $out3,$x30,$out
2831         stvx_u          $out4,$x40,$out
2832         addi            $out,$out,0x50
2833         bne             Lxts_enc6x_steal
2834         b               Lxts_enc6x_done
2835
2836 .align  4
2837 Lxts_enc6x_four:
2838         vxor            $out0,$in2,$twk0
2839         vxor            $out1,$in3,$twk1
2840         vxor            $out2,$in4,$twk2
2841         vxor            $out3,$in5,$twk3
2842         vxor            $out4,$out4,$out4
2843
2844         bl              _aesp8_xts_enc5x
2845
2846         le?vperm        $out0,$out0,$out0,$leperm
2847         vmr             $twk0,$twk4             # unused tweak
2848         le?vperm        $out1,$out1,$out1,$leperm
2849         stvx_u          $out0,$x00,$out         # store output
2850         le?vperm        $out2,$out2,$out2,$leperm
2851         stvx_u          $out1,$x10,$out
2852         vxor            $tmp,$out3,$twk4        # last block prep for stealing
2853         le?vperm        $out3,$out3,$out3,$leperm
2854         stvx_u          $out2,$x20,$out
2855         stvx_u          $out3,$x30,$out
2856         addi            $out,$out,0x40
2857         bne             Lxts_enc6x_steal
2858         b               Lxts_enc6x_done
2859
2860 .align  4
2861 Lxts_enc6x_three:
2862         vxor            $out0,$in3,$twk0
2863         vxor            $out1,$in4,$twk1
2864         vxor            $out2,$in5,$twk2
2865         vxor            $out3,$out3,$out3
2866         vxor            $out4,$out4,$out4
2867
2868         bl              _aesp8_xts_enc5x
2869
2870         le?vperm        $out0,$out0,$out0,$leperm
2871         vmr             $twk0,$twk3             # unused tweak
2872         le?vperm        $out1,$out1,$out1,$leperm
2873         stvx_u          $out0,$x00,$out         # store output
2874         vxor            $tmp,$out2,$twk3        # last block prep for stealing
2875         le?vperm        $out2,$out2,$out2,$leperm
2876         stvx_u          $out1,$x10,$out
2877         stvx_u          $out2,$x20,$out
2878         addi            $out,$out,0x30
2879         bne             Lxts_enc6x_steal
2880         b               Lxts_enc6x_done
2881
2882 .align  4
2883 Lxts_enc6x_two:
2884         vxor            $out0,$in4,$twk0
2885         vxor            $out1,$in5,$twk1
2886         vxor            $out2,$out2,$out2
2887         vxor            $out3,$out3,$out3
2888         vxor            $out4,$out4,$out4
2889
2890         bl              _aesp8_xts_enc5x
2891
2892         le?vperm        $out0,$out0,$out0,$leperm
2893         vmr             $twk0,$twk2             # unused tweak
2894         vxor            $tmp,$out1,$twk2        # last block prep for stealing
2895         le?vperm        $out1,$out1,$out1,$leperm
2896         stvx_u          $out0,$x00,$out         # store output
2897         stvx_u          $out1,$x10,$out
2898         addi            $out,$out,0x20
2899         bne             Lxts_enc6x_steal
2900         b               Lxts_enc6x_done
2901
2902 .align  4
2903 Lxts_enc6x_one:
2904         vxor            $out0,$in5,$twk0
2905         nop
2906 Loop_xts_enc1x:
2907         vcipher         $out0,$out0,v24
2908         lvx             v24,$x20,$key_          # round[3]
2909         addi            $key_,$key_,0x20
2910
2911         vcipher         $out0,$out0,v25
2912         lvx             v25,$x10,$key_          # round[4]
2913         bdnz            Loop_xts_enc1x
2914
2915         add             $inp,$inp,$taillen
2916         cmpwi           $taillen,0
2917         vcipher         $out0,$out0,v24
2918
2919         subi            $inp,$inp,16
2920         vcipher         $out0,$out0,v25
2921
2922         lvsr            $inpperm,0,$taillen
2923         vcipher         $out0,$out0,v26
2924
2925         lvx_u           $in0,0,$inp
2926         vcipher         $out0,$out0,v27
2927
2928         addi            $key_,$sp,$FRAME+15     # rewind $key_
2929         vcipher         $out0,$out0,v28
2930         lvx             v24,$x00,$key_          # re-pre-load round[1]
2931
2932         vcipher         $out0,$out0,v29
2933         lvx             v25,$x10,$key_          # re-pre-load round[2]
2934          vxor           $twk0,$twk0,v31
2935
2936         le?vperm        $in0,$in0,$in0,$leperm
2937         vcipher         $out0,$out0,v30
2938
2939         vperm           $in0,$in0,$in0,$inpperm
2940         vcipherlast     $out0,$out0,$twk0
2941
2942         vmr             $twk0,$twk1             # unused tweak
2943         vxor            $tmp,$out0,$twk1        # last block prep for stealing
2944         le?vperm        $out0,$out0,$out0,$leperm
2945         stvx_u          $out0,$x00,$out         # store output
2946         addi            $out,$out,0x10
2947         bne             Lxts_enc6x_steal
2948         b               Lxts_enc6x_done
2949
2950 .align  4
2951 Lxts_enc6x_zero:
2952         cmpwi           $taillen,0
2953         beq             Lxts_enc6x_done
2954
2955         add             $inp,$inp,$taillen
2956         subi            $inp,$inp,16
2957         lvx_u           $in0,0,$inp
2958         lvsr            $inpperm,0,$taillen     # $in5 is no more
2959         le?vperm        $in0,$in0,$in0,$leperm
2960         vperm           $in0,$in0,$in0,$inpperm
2961         vxor            $tmp,$tmp,$twk0
2962 Lxts_enc6x_steal:
2963         vxor            $in0,$in0,$twk0
2964         vxor            $out0,$out0,$out0
2965         vspltisb        $out1,-1
2966         vperm           $out0,$out0,$out1,$inpperm
2967         vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2968
2969         subi            r30,$out,17
2970         subi            $out,$out,16
2971         mtctr           $taillen
2972 Loop_xts_enc6x_steal:
2973         lbzu            r0,1(r30)
2974         stb             r0,16(r30)
2975         bdnz            Loop_xts_enc6x_steal
2976
2977         li              $taillen,0
2978         mtctr           $rounds
2979         b               Loop_xts_enc1x          # one more time...
2980
2981 .align  4
2982 Lxts_enc6x_done:
2983         ${UCMP}i        $ivp,0
2984         beq             Lxts_enc6x_ret
2985
2986         vxor            $tweak,$twk0,$rndkey0
2987         le?vperm        $tweak,$tweak,$tweak,$leperm
2988         stvx_u          $tweak,0,$ivp
2989
2990 Lxts_enc6x_ret:
2991         mtlr            r11
2992         li              r10,`$FRAME+15`
2993         li              r11,`$FRAME+31`
2994         stvx            $seven,r10,$sp          # wipe copies of round keys
2995         addi            r10,r10,32
2996         stvx            $seven,r11,$sp
2997         addi            r11,r11,32
2998         stvx            $seven,r10,$sp
2999         addi            r10,r10,32
3000         stvx            $seven,r11,$sp
3001         addi            r11,r11,32
3002         stvx            $seven,r10,$sp
3003         addi            r10,r10,32
3004         stvx            $seven,r11,$sp
3005         addi            r11,r11,32
3006         stvx            $seven,r10,$sp
3007         addi            r10,r10,32
3008         stvx            $seven,r11,$sp
3009         addi            r11,r11,32
3010
3011         mtspr           256,$vrsave
3012         lvx             v20,r10,$sp             # ABI says so
3013         addi            r10,r10,32
3014         lvx             v21,r11,$sp
3015         addi            r11,r11,32
3016         lvx             v22,r10,$sp
3017         addi            r10,r10,32
3018         lvx             v23,r11,$sp
3019         addi            r11,r11,32
3020         lvx             v24,r10,$sp
3021         addi            r10,r10,32
3022         lvx             v25,r11,$sp
3023         addi            r11,r11,32
3024         lvx             v26,r10,$sp
3025         addi            r10,r10,32
3026         lvx             v27,r11,$sp
3027         addi            r11,r11,32
3028         lvx             v28,r10,$sp
3029         addi            r10,r10,32
3030         lvx             v29,r11,$sp
3031         addi            r11,r11,32
3032         lvx             v30,r10,$sp
3033         lvx             v31,r11,$sp
3034         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3035         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3036         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3037         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3038         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3039         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3040         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3041         blr
3042         .long           0
3043         .byte           0,12,0x04,1,0x80,6,6,0
3044         .long           0
3045
3046 .align  5
3047 _aesp8_xts_enc5x:
3048         vcipher         $out0,$out0,v24
3049         vcipher         $out1,$out1,v24
3050         vcipher         $out2,$out2,v24
3051         vcipher         $out3,$out3,v24
3052         vcipher         $out4,$out4,v24
3053         lvx             v24,$x20,$key_          # round[3]
3054         addi            $key_,$key_,0x20
3055
3056         vcipher         $out0,$out0,v25
3057         vcipher         $out1,$out1,v25
3058         vcipher         $out2,$out2,v25
3059         vcipher         $out3,$out3,v25
3060         vcipher         $out4,$out4,v25
3061         lvx             v25,$x10,$key_          # round[4]
3062         bdnz            _aesp8_xts_enc5x
3063
3064         add             $inp,$inp,$taillen
3065         cmpwi           $taillen,0
3066         vcipher         $out0,$out0,v24
3067         vcipher         $out1,$out1,v24
3068         vcipher         $out2,$out2,v24
3069         vcipher         $out3,$out3,v24
3070         vcipher         $out4,$out4,v24
3071
3072         subi            $inp,$inp,16
3073         vcipher         $out0,$out0,v25
3074         vcipher         $out1,$out1,v25
3075         vcipher         $out2,$out2,v25
3076         vcipher         $out3,$out3,v25
3077         vcipher         $out4,$out4,v25
3078          vxor           $twk0,$twk0,v31
3079
3080         vcipher         $out0,$out0,v26
3081         lvsr            $inpperm,r0,$taillen    # $in5 is no more
3082         vcipher         $out1,$out1,v26
3083         vcipher         $out2,$out2,v26
3084         vcipher         $out3,$out3,v26
3085         vcipher         $out4,$out4,v26
3086          vxor           $in1,$twk1,v31
3087
3088         vcipher         $out0,$out0,v27
3089         lvx_u           $in0,0,$inp
3090         vcipher         $out1,$out1,v27
3091         vcipher         $out2,$out2,v27
3092         vcipher         $out3,$out3,v27
3093         vcipher         $out4,$out4,v27
3094          vxor           $in2,$twk2,v31
3095
3096         addi            $key_,$sp,$FRAME+15     # rewind $key_
3097         vcipher         $out0,$out0,v28
3098         vcipher         $out1,$out1,v28
3099         vcipher         $out2,$out2,v28
3100         vcipher         $out3,$out3,v28
3101         vcipher         $out4,$out4,v28
3102         lvx             v24,$x00,$key_          # re-pre-load round[1]
3103          vxor           $in3,$twk3,v31
3104
3105         vcipher         $out0,$out0,v29
3106         le?vperm        $in0,$in0,$in0,$leperm
3107         vcipher         $out1,$out1,v29
3108         vcipher         $out2,$out2,v29
3109         vcipher         $out3,$out3,v29
3110         vcipher         $out4,$out4,v29
3111         lvx             v25,$x10,$key_          # re-pre-load round[2]
3112          vxor           $in4,$twk4,v31
3113
3114         vcipher         $out0,$out0,v30
3115         vperm           $in0,$in0,$in0,$inpperm
3116         vcipher         $out1,$out1,v30
3117         vcipher         $out2,$out2,v30
3118         vcipher         $out3,$out3,v30
3119         vcipher         $out4,$out4,v30
3120
3121         vcipherlast     $out0,$out0,$twk0
3122         vcipherlast     $out1,$out1,$in1
3123         vcipherlast     $out2,$out2,$in2
3124         vcipherlast     $out3,$out3,$in3
3125         vcipherlast     $out4,$out4,$in4
3126         blr
3127         .long           0
3128         .byte           0,12,0x14,0,0,0,0,0
3129
3130 .align  5
3131 _aesp8_xts_decrypt6x:
3132         $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3133         mflr            r11
3134         li              r7,`$FRAME+8*16+15`
3135         li              r3,`$FRAME+8*16+31`
3136         $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3137         stvx            v20,r7,$sp              # ABI says so
3138         addi            r7,r7,32
3139         stvx            v21,r3,$sp
3140         addi            r3,r3,32
3141         stvx            v22,r7,$sp
3142         addi            r7,r7,32
3143         stvx            v23,r3,$sp
3144         addi            r3,r3,32
3145         stvx            v24,r7,$sp
3146         addi            r7,r7,32
3147         stvx            v25,r3,$sp
3148         addi            r3,r3,32
3149         stvx            v26,r7,$sp
3150         addi            r7,r7,32
3151         stvx            v27,r3,$sp
3152         addi            r3,r3,32
3153         stvx            v28,r7,$sp
3154         addi            r7,r7,32
3155         stvx            v29,r3,$sp
3156         addi            r3,r3,32
3157         stvx            v30,r7,$sp
3158         stvx            v31,r3,$sp
3159         li              r0,-1
3160         stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3161         li              $x10,0x10
3162         $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3163         li              $x20,0x20
3164         $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3165         li              $x30,0x30
3166         $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3167         li              $x40,0x40
3168         $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3169         li              $x50,0x50
3170         $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3171         li              $x60,0x60
3172         $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3173         li              $x70,0x70
3174         mtspr           256,r0
3175
3176         xxlor           2, 32+$eighty7, 32+$eighty7
3177         vsldoi          $eighty7,$tmp,$eighty7,1        # 0x010101..87
3178         xxlor           1, 32+$eighty7, 32+$eighty7
3179
3180         # Load XOR Lconsts.
3181         mr              $x70, r6
3182         bl              Lconsts
3183         lxvw4x          0, $x40, r6             # load XOR contents
3184         mr              r6, $x70
3185         li              $x70,0x70
3186
3187         subi            $rounds,$rounds,3       # -4 in total
3188
3189         lvx             $rndkey0,$x00,$key1     # load key schedule
3190         lvx             v30,$x10,$key1
3191         addi            $key1,$key1,0x20
3192         lvx             v31,$x00,$key1
3193         ?vperm          $rndkey0,$rndkey0,v30,$keyperm
3194         addi            $key_,$sp,$FRAME+15
3195         mtctr           $rounds
3196
3197 Load_xts_dec_key:
3198         ?vperm          v24,v30,v31,$keyperm
3199         lvx             v30,$x10,$key1
3200         addi            $key1,$key1,0x20
3201         stvx            v24,$x00,$key_          # off-load round[1]
3202         ?vperm          v25,v31,v30,$keyperm
3203         lvx             v31,$x00,$key1
3204         stvx            v25,$x10,$key_          # off-load round[2]
3205         addi            $key_,$key_,0x20
3206         bdnz            Load_xts_dec_key
3207
3208         lvx             v26,$x10,$key1
3209         ?vperm          v24,v30,v31,$keyperm
3210         lvx             v27,$x20,$key1
3211         stvx            v24,$x00,$key_          # off-load round[3]
3212         ?vperm          v25,v31,v26,$keyperm
3213         lvx             v28,$x30,$key1
3214         stvx            v25,$x10,$key_          # off-load round[4]
3215         addi            $key_,$sp,$FRAME+15     # rewind $key_
3216         ?vperm          v26,v26,v27,$keyperm
3217         lvx             v29,$x40,$key1
3218         ?vperm          v27,v27,v28,$keyperm
3219         lvx             v30,$x50,$key1
3220         ?vperm          v28,v28,v29,$keyperm
3221         lvx             v31,$x60,$key1
3222         ?vperm          v29,v29,v30,$keyperm
3223         lvx             $twk5,$x70,$key1        # borrow $twk5
3224         ?vperm          v30,v30,v31,$keyperm
3225         lvx             v24,$x00,$key_          # pre-load round[1]
3226         ?vperm          v31,v31,$twk5,$keyperm
3227         lvx             v25,$x10,$key_          # pre-load round[2]
3228
3229          vperm          $in0,$inout,$inptail,$inpperm
3230          subi           $inp,$inp,31            # undo "caller"
3231         vxor            $twk0,$tweak,$rndkey0
3232         vsrab           $tmp,$tweak,$seven      # next tweak value
3233         vaddubm         $tweak,$tweak,$tweak
3234         vand            $tmp,$tmp,$eighty7
3235          vxor           $out0,$in0,$twk0
3236         xxlor           32+$in1, 0, 0
3237         vpermxor        $tweak, $tweak, $tmp, $in1
3238
3239          lvx_u          $in1,$x10,$inp
3240         vxor            $twk1,$tweak,$rndkey0
3241         vsrab           $tmp,$tweak,$seven      # next tweak value
3242         vaddubm         $tweak,$tweak,$tweak
3243          le?vperm       $in1,$in1,$in1,$leperm
3244         vand            $tmp,$tmp,$eighty7
3245          vxor           $out1,$in1,$twk1
3246         xxlor           32+$in2, 0, 0
3247         vpermxor        $tweak, $tweak, $tmp, $in2
3248
3249          lvx_u          $in2,$x20,$inp
3250          andi.          $taillen,$len,15
3251         vxor            $twk2,$tweak,$rndkey0
3252         vsrab           $tmp,$tweak,$seven      # next tweak value
3253         vaddubm         $tweak,$tweak,$tweak
3254          le?vperm       $in2,$in2,$in2,$leperm
3255         vand            $tmp,$tmp,$eighty7
3256          vxor           $out2,$in2,$twk2
3257         xxlor           32+$in3, 0, 0
3258         vpermxor        $tweak, $tweak, $tmp, $in3
3259
3260          lvx_u          $in3,$x30,$inp
3261          sub            $len,$len,$taillen
3262         vxor            $twk3,$tweak,$rndkey0
3263         vsrab           $tmp,$tweak,$seven      # next tweak value
3264         vaddubm         $tweak,$tweak,$tweak
3265          le?vperm       $in3,$in3,$in3,$leperm
3266         vand            $tmp,$tmp,$eighty7
3267          vxor           $out3,$in3,$twk3
3268         xxlor           32+$in4, 0, 0
3269         vpermxor        $tweak, $tweak, $tmp, $in4
3270
3271          lvx_u          $in4,$x40,$inp
3272          subi           $len,$len,0x60
3273         vxor            $twk4,$tweak,$rndkey0
3274         vsrab           $tmp,$tweak,$seven      # next tweak value
3275         vaddubm         $tweak,$tweak,$tweak
3276          le?vperm       $in4,$in4,$in4,$leperm
3277         vand            $tmp,$tmp,$eighty7
3278          vxor           $out4,$in4,$twk4
3279         xxlor           32+$in5, 0, 0
3280         vpermxor        $tweak, $tweak, $tmp, $in5
3281
3282          lvx_u          $in5,$x50,$inp
3283          addi           $inp,$inp,0x60
3284         vxor            $twk5,$tweak,$rndkey0
3285         vsrab           $tmp,$tweak,$seven      # next tweak value
3286         vaddubm         $tweak,$tweak,$tweak
3287          le?vperm       $in5,$in5,$in5,$leperm
3288         vand            $tmp,$tmp,$eighty7
3289          vxor           $out5,$in5,$twk5
3290         xxlor           32+$in0, 0, 0
3291         vpermxor        $tweak, $tweak, $tmp, $in0
3292
3293         vxor            v31,v31,$rndkey0
3294         mtctr           $rounds
3295         b               Loop_xts_dec6x
3296
3297 .align  5
3298 Loop_xts_dec6x:
3299         vncipher        $out0,$out0,v24
3300         vncipher        $out1,$out1,v24
3301         vncipher        $out2,$out2,v24
3302         vncipher        $out3,$out3,v24
3303         vncipher        $out4,$out4,v24
3304         vncipher        $out5,$out5,v24
3305         lvx             v24,$x20,$key_          # round[3]
3306         addi            $key_,$key_,0x20
3307
3308         vncipher        $out0,$out0,v25
3309         vncipher        $out1,$out1,v25
3310         vncipher        $out2,$out2,v25
3311         vncipher        $out3,$out3,v25
3312         vncipher        $out4,$out4,v25
3313         vncipher        $out5,$out5,v25
3314         lvx             v25,$x10,$key_          # round[4]
3315         bdnz            Loop_xts_dec6x
3316
3317         xxlor           32+$eighty7, 1, 1       # 0x010101..87
3318
3319         subic           $len,$len,96            # $len-=96
3320          vxor           $in0,$twk0,v31          # xor with last round key
3321         vncipher        $out0,$out0,v24
3322         vncipher        $out1,$out1,v24
3323          vsrab          $tmp,$tweak,$seven      # next tweak value
3324          vxor           $twk0,$tweak,$rndkey0
3325          vaddubm        $tweak,$tweak,$tweak
3326         vncipher        $out2,$out2,v24
3327         vncipher        $out3,$out3,v24
3328         vncipher        $out4,$out4,v24
3329         vncipher        $out5,$out5,v24
3330
3331         subfe.          r0,r0,r0                # borrow?-1:0
3332          vand           $tmp,$tmp,$eighty7
3333         vncipher        $out0,$out0,v25
3334         vncipher        $out1,$out1,v25
3335          xxlor          32+$in1, 0, 0
3336          vpermxor       $tweak, $tweak, $tmp, $in1
3337         vncipher        $out2,$out2,v25
3338         vncipher        $out3,$out3,v25
3339          vxor           $in1,$twk1,v31
3340          vsrab          $tmp,$tweak,$seven      # next tweak value
3341          vxor           $twk1,$tweak,$rndkey0
3342         vncipher        $out4,$out4,v25
3343         vncipher        $out5,$out5,v25
3344
3345         and             r0,r0,$len
3346          vaddubm        $tweak,$tweak,$tweak
3347         vncipher        $out0,$out0,v26
3348         vncipher        $out1,$out1,v26
3349          vand           $tmp,$tmp,$eighty7
3350         vncipher        $out2,$out2,v26
3351         vncipher        $out3,$out3,v26
3352          xxlor          32+$in2, 0, 0
3353          vpermxor       $tweak, $tweak, $tmp, $in2
3354         vncipher        $out4,$out4,v26
3355         vncipher        $out5,$out5,v26
3356
3357         add             $inp,$inp,r0            # $inp is adjusted in such
3358                                                 # way that at exit from the
3359                                                 # loop inX-in5 are loaded
3360                                                 # with last "words"
3361          vxor           $in2,$twk2,v31
3362          vsrab          $tmp,$tweak,$seven      # next tweak value
3363          vxor           $twk2,$tweak,$rndkey0
3364          vaddubm        $tweak,$tweak,$tweak
3365         vncipher        $out0,$out0,v27
3366         vncipher        $out1,$out1,v27
3367         vncipher        $out2,$out2,v27
3368         vncipher        $out3,$out3,v27
3369          vand           $tmp,$tmp,$eighty7
3370         vncipher        $out4,$out4,v27
3371         vncipher        $out5,$out5,v27
3372
3373         addi            $key_,$sp,$FRAME+15     # rewind $key_
3374          xxlor          32+$in3, 0, 0
3375          vpermxor       $tweak, $tweak, $tmp, $in3
3376         vncipher        $out0,$out0,v28
3377         vncipher        $out1,$out1,v28
3378          vxor           $in3,$twk3,v31
3379          vsrab          $tmp,$tweak,$seven      # next tweak value
3380          vxor           $twk3,$tweak,$rndkey0
3381         vncipher        $out2,$out2,v28
3382         vncipher        $out3,$out3,v28
3383          vaddubm        $tweak,$tweak,$tweak
3384         vncipher        $out4,$out4,v28
3385         vncipher        $out5,$out5,v28
3386         lvx             v24,$x00,$key_          # re-pre-load round[1]
3387          vand           $tmp,$tmp,$eighty7
3388
3389         vncipher        $out0,$out0,v29
3390         vncipher        $out1,$out1,v29
3391          xxlor          32+$in4, 0, 0
3392          vpermxor       $tweak, $tweak, $tmp, $in4
3393         vncipher        $out2,$out2,v29
3394         vncipher        $out3,$out3,v29
3395          vxor           $in4,$twk4,v31
3396          vsrab          $tmp,$tweak,$seven      # next tweak value
3397          vxor           $twk4,$tweak,$rndkey0
3398         vncipher        $out4,$out4,v29
3399         vncipher        $out5,$out5,v29
3400         lvx             v25,$x10,$key_          # re-pre-load round[2]
3401          vaddubm        $tweak,$tweak,$tweak
3402
3403         vncipher        $out0,$out0,v30
3404         vncipher        $out1,$out1,v30
3405          vand           $tmp,$tmp,$eighty7
3406         vncipher        $out2,$out2,v30
3407         vncipher        $out3,$out3,v30
3408          xxlor          32+$in5, 0, 0
3409          vpermxor       $tweak, $tweak, $tmp, $in5
3410         vncipher        $out4,$out4,v30
3411         vncipher        $out5,$out5,v30
3412          vxor           $in5,$twk5,v31
3413          vsrab          $tmp,$tweak,$seven      # next tweak value
3414          vxor           $twk5,$tweak,$rndkey0
3415
3416         vncipherlast    $out0,$out0,$in0
3417          lvx_u          $in0,$x00,$inp          # load next input block
3418          vaddubm        $tweak,$tweak,$tweak
3419         vncipherlast    $out1,$out1,$in1
3420          lvx_u          $in1,$x10,$inp
3421         vncipherlast    $out2,$out2,$in2
3422          le?vperm       $in0,$in0,$in0,$leperm
3423          lvx_u          $in2,$x20,$inp
3424          vand           $tmp,$tmp,$eighty7
3425         vncipherlast    $out3,$out3,$in3
3426          le?vperm       $in1,$in1,$in1,$leperm
3427          lvx_u          $in3,$x30,$inp
3428         vncipherlast    $out4,$out4,$in4
3429          le?vperm       $in2,$in2,$in2,$leperm
3430          lvx_u          $in4,$x40,$inp
3431          xxlor          10, 32+$in0, 32+$in0
3432          xxlor          32+$in0, 0, 0
3433          vpermxor       $tweak, $tweak, $tmp, $in0
3434          xxlor          32+$in0, 10, 10
3435         vncipherlast    $out5,$out5,$in5
3436          le?vperm       $in3,$in3,$in3,$leperm
3437          lvx_u          $in5,$x50,$inp
3438          addi           $inp,$inp,0x60
3439          le?vperm       $in4,$in4,$in4,$leperm
3440          le?vperm       $in5,$in5,$in5,$leperm
3441
3442         le?vperm        $out0,$out0,$out0,$leperm
3443         le?vperm        $out1,$out1,$out1,$leperm
3444         stvx_u          $out0,$x00,$out         # store output
3445          vxor           $out0,$in0,$twk0
3446         le?vperm        $out2,$out2,$out2,$leperm
3447         stvx_u          $out1,$x10,$out
3448          vxor           $out1,$in1,$twk1
3449         le?vperm        $out3,$out3,$out3,$leperm
3450         stvx_u          $out2,$x20,$out
3451          vxor           $out2,$in2,$twk2
3452         le?vperm        $out4,$out4,$out4,$leperm
3453         stvx_u          $out3,$x30,$out
3454          vxor           $out3,$in3,$twk3
3455         le?vperm        $out5,$out5,$out5,$leperm
3456         stvx_u          $out4,$x40,$out
3457          vxor           $out4,$in4,$twk4
3458         stvx_u          $out5,$x50,$out
3459          vxor           $out5,$in5,$twk5
3460         addi            $out,$out,0x60
3461
3462         mtctr           $rounds
3463         beq             Loop_xts_dec6x          # did $len-=96 borrow?
3464
3465         xxlor           32+$eighty7, 2, 2       # 0x010101..87
3466
3467         addic.          $len,$len,0x60
3468         beq             Lxts_dec6x_zero
3469         cmpwi           $len,0x20
3470         blt             Lxts_dec6x_one
3471         nop
3472         beq             Lxts_dec6x_two
3473         cmpwi           $len,0x40
3474         blt             Lxts_dec6x_three
3475         nop
3476         beq             Lxts_dec6x_four
3477
3478 Lxts_dec6x_five:
3479         vxor            $out0,$in1,$twk0
3480         vxor            $out1,$in2,$twk1
3481         vxor            $out2,$in3,$twk2
3482         vxor            $out3,$in4,$twk3
3483         vxor            $out4,$in5,$twk4
3484
3485         bl              _aesp8_xts_dec5x
3486
3487         le?vperm        $out0,$out0,$out0,$leperm
3488         vmr             $twk0,$twk5             # unused tweak
3489         vxor            $twk1,$tweak,$rndkey0
3490         le?vperm        $out1,$out1,$out1,$leperm
3491         stvx_u          $out0,$x00,$out         # store output
3492         vxor            $out0,$in0,$twk1
3493         le?vperm        $out2,$out2,$out2,$leperm
3494         stvx_u          $out1,$x10,$out
3495         le?vperm        $out3,$out3,$out3,$leperm
3496         stvx_u          $out2,$x20,$out
3497         le?vperm        $out4,$out4,$out4,$leperm
3498         stvx_u          $out3,$x30,$out
3499         stvx_u          $out4,$x40,$out
3500         addi            $out,$out,0x50
3501         bne             Lxts_dec6x_steal
3502         b               Lxts_dec6x_done
3503
3504 .align  4
3505 Lxts_dec6x_four:
3506         vxor            $out0,$in2,$twk0
3507         vxor            $out1,$in3,$twk1
3508         vxor            $out2,$in4,$twk2
3509         vxor            $out3,$in5,$twk3
3510         vxor            $out4,$out4,$out4
3511
3512         bl              _aesp8_xts_dec5x
3513
3514         le?vperm        $out0,$out0,$out0,$leperm
3515         vmr             $twk0,$twk4             # unused tweak
3516         vmr             $twk1,$twk5
3517         le?vperm        $out1,$out1,$out1,$leperm
3518         stvx_u          $out0,$x00,$out         # store output
3519         vxor            $out0,$in0,$twk5
3520         le?vperm        $out2,$out2,$out2,$leperm
3521         stvx_u          $out1,$x10,$out
3522         le?vperm        $out3,$out3,$out3,$leperm
3523         stvx_u          $out2,$x20,$out
3524         stvx_u          $out3,$x30,$out
3525         addi            $out,$out,0x40
3526         bne             Lxts_dec6x_steal
3527         b               Lxts_dec6x_done
3528
3529 .align  4
3530 Lxts_dec6x_three:
3531         vxor            $out0,$in3,$twk0
3532         vxor            $out1,$in4,$twk1
3533         vxor            $out2,$in5,$twk2
3534         vxor            $out3,$out3,$out3
3535         vxor            $out4,$out4,$out4
3536
3537         bl              _aesp8_xts_dec5x
3538
3539         le?vperm        $out0,$out0,$out0,$leperm
3540         vmr             $twk0,$twk3             # unused tweak
3541         vmr             $twk1,$twk4
3542         le?vperm        $out1,$out1,$out1,$leperm
3543         stvx_u          $out0,$x00,$out         # store output
3544         vxor            $out0,$in0,$twk4
3545         le?vperm        $out2,$out2,$out2,$leperm
3546         stvx_u          $out1,$x10,$out
3547         stvx_u          $out2,$x20,$out
3548         addi            $out,$out,0x30
3549         bne             Lxts_dec6x_steal
3550         b               Lxts_dec6x_done
3551
3552 .align  4
3553 Lxts_dec6x_two:
3554         vxor            $out0,$in4,$twk0
3555         vxor            $out1,$in5,$twk1
3556         vxor            $out2,$out2,$out2
3557         vxor            $out3,$out3,$out3
3558         vxor            $out4,$out4,$out4
3559
3560         bl              _aesp8_xts_dec5x
3561
3562         le?vperm        $out0,$out0,$out0,$leperm
3563         vmr             $twk0,$twk2             # unused tweak
3564         vmr             $twk1,$twk3
3565         le?vperm        $out1,$out1,$out1,$leperm
3566         stvx_u          $out0,$x00,$out         # store output
3567         vxor            $out0,$in0,$twk3
3568         stvx_u          $out1,$x10,$out
3569         addi            $out,$out,0x20
3570         bne             Lxts_dec6x_steal
3571         b               Lxts_dec6x_done
3572
3573 .align  4
3574 Lxts_dec6x_one:
3575         vxor            $out0,$in5,$twk0
3576         nop
3577 Loop_xts_dec1x:
3578         vncipher        $out0,$out0,v24
3579         lvx             v24,$x20,$key_          # round[3]
3580         addi            $key_,$key_,0x20
3581
3582         vncipher        $out0,$out0,v25
3583         lvx             v25,$x10,$key_          # round[4]
3584         bdnz            Loop_xts_dec1x
3585
3586         subi            r0,$taillen,1
3587         vncipher        $out0,$out0,v24
3588
3589         andi.           r0,r0,16
3590         cmpwi           $taillen,0
3591         vncipher        $out0,$out0,v25
3592
3593         sub             $inp,$inp,r0
3594         vncipher        $out0,$out0,v26
3595
3596         lvx_u           $in0,0,$inp
3597         vncipher        $out0,$out0,v27
3598
3599         addi            $key_,$sp,$FRAME+15     # rewind $key_
3600         vncipher        $out0,$out0,v28
3601         lvx             v24,$x00,$key_          # re-pre-load round[1]
3602
3603         vncipher        $out0,$out0,v29
3604         lvx             v25,$x10,$key_          # re-pre-load round[2]
3605          vxor           $twk0,$twk0,v31
3606
3607         le?vperm        $in0,$in0,$in0,$leperm
3608         vncipher        $out0,$out0,v30
3609
3610         mtctr           $rounds
3611         vncipherlast    $out0,$out0,$twk0
3612
3613         vmr             $twk0,$twk1             # unused tweak
3614         vmr             $twk1,$twk2
3615         le?vperm        $out0,$out0,$out0,$leperm
3616         stvx_u          $out0,$x00,$out         # store output
3617         addi            $out,$out,0x10
3618         vxor            $out0,$in0,$twk2
3619         bne             Lxts_dec6x_steal
3620         b               Lxts_dec6x_done
3621
3622 .align  4
3623 Lxts_dec6x_zero:
3624         cmpwi           $taillen,0
3625         beq             Lxts_dec6x_done
3626
3627         lvx_u           $in0,0,$inp
3628         le?vperm        $in0,$in0,$in0,$leperm
3629         vxor            $out0,$in0,$twk1
3630 Lxts_dec6x_steal:
3631         vncipher        $out0,$out0,v24
3632         lvx             v24,$x20,$key_          # round[3]
3633         addi            $key_,$key_,0x20
3634
3635         vncipher        $out0,$out0,v25
3636         lvx             v25,$x10,$key_          # round[4]
3637         bdnz            Lxts_dec6x_steal
3638
3639         add             $inp,$inp,$taillen
3640         vncipher        $out0,$out0,v24
3641
3642         cmpwi           $taillen,0
3643         vncipher        $out0,$out0,v25
3644
3645         lvx_u           $in0,0,$inp
3646         vncipher        $out0,$out0,v26
3647
3648         lvsr            $inpperm,0,$taillen     # $in5 is no more
3649         vncipher        $out0,$out0,v27
3650
3651         addi            $key_,$sp,$FRAME+15     # rewind $key_
3652         vncipher        $out0,$out0,v28
3653         lvx             v24,$x00,$key_          # re-pre-load round[1]
3654
3655         vncipher        $out0,$out0,v29
3656         lvx             v25,$x10,$key_          # re-pre-load round[2]
3657          vxor           $twk1,$twk1,v31
3658
3659         le?vperm        $in0,$in0,$in0,$leperm
3660         vncipher        $out0,$out0,v30
3661
3662         vperm           $in0,$in0,$in0,$inpperm
3663         vncipherlast    $tmp,$out0,$twk1
3664
3665         le?vperm        $out0,$tmp,$tmp,$leperm
3666         le?stvx_u       $out0,0,$out
3667         be?stvx_u       $tmp,0,$out
3668
3669         vxor            $out0,$out0,$out0
3670         vspltisb        $out1,-1
3671         vperm           $out0,$out0,$out1,$inpperm
3672         vsel            $out0,$in0,$tmp,$out0
3673         vxor            $out0,$out0,$twk0
3674
3675         subi            r30,$out,1
3676         mtctr           $taillen
3677 Loop_xts_dec6x_steal:
3678         lbzu            r0,1(r30)
3679         stb             r0,16(r30)
3680         bdnz            Loop_xts_dec6x_steal
3681
3682         li              $taillen,0
3683         mtctr           $rounds
3684         b               Loop_xts_dec1x          # one more time...
3685
3686 .align  4
3687 Lxts_dec6x_done:
3688         ${UCMP}i        $ivp,0
3689         beq             Lxts_dec6x_ret
3690
3691         vxor            $tweak,$twk0,$rndkey0
3692         le?vperm        $tweak,$tweak,$tweak,$leperm
3693         stvx_u          $tweak,0,$ivp
3694
3695 Lxts_dec6x_ret:
3696         mtlr            r11
3697         li              r10,`$FRAME+15`
3698         li              r11,`$FRAME+31`
3699         stvx            $seven,r10,$sp          # wipe copies of round keys
3700         addi            r10,r10,32
3701         stvx            $seven,r11,$sp
3702         addi            r11,r11,32
3703         stvx            $seven,r10,$sp
3704         addi            r10,r10,32
3705         stvx            $seven,r11,$sp
3706         addi            r11,r11,32
3707         stvx            $seven,r10,$sp
3708         addi            r10,r10,32
3709         stvx            $seven,r11,$sp
3710         addi            r11,r11,32
3711         stvx            $seven,r10,$sp
3712         addi            r10,r10,32
3713         stvx            $seven,r11,$sp
3714         addi            r11,r11,32
3715
3716         mtspr           256,$vrsave
3717         lvx             v20,r10,$sp             # ABI says so
3718         addi            r10,r10,32
3719         lvx             v21,r11,$sp
3720         addi            r11,r11,32
3721         lvx             v22,r10,$sp
3722         addi            r10,r10,32
3723         lvx             v23,r11,$sp
3724         addi            r11,r11,32
3725         lvx             v24,r10,$sp
3726         addi            r10,r10,32
3727         lvx             v25,r11,$sp
3728         addi            r11,r11,32
3729         lvx             v26,r10,$sp
3730         addi            r10,r10,32
3731         lvx             v27,r11,$sp
3732         addi            r11,r11,32
3733         lvx             v28,r10,$sp
3734         addi            r10,r10,32
3735         lvx             v29,r11,$sp
3736         addi            r11,r11,32
3737         lvx             v30,r10,$sp
3738         lvx             v31,r11,$sp
3739         $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3740         $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3741         $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3742         $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3743         $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3744         $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3745         addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3746         blr
3747         .long           0
3748         .byte           0,12,0x04,1,0x80,6,6,0
3749         .long           0
3750
3751 .align  5
3752 _aesp8_xts_dec5x:
3753         vncipher        $out0,$out0,v24
3754         vncipher        $out1,$out1,v24
3755         vncipher        $out2,$out2,v24
3756         vncipher        $out3,$out3,v24
3757         vncipher        $out4,$out4,v24
3758         lvx             v24,$x20,$key_          # round[3]
3759         addi            $key_,$key_,0x20
3760
3761         vncipher        $out0,$out0,v25
3762         vncipher        $out1,$out1,v25
3763         vncipher        $out2,$out2,v25
3764         vncipher        $out3,$out3,v25
3765         vncipher        $out4,$out4,v25
3766         lvx             v25,$x10,$key_          # round[4]
3767         bdnz            _aesp8_xts_dec5x
3768
3769         subi            r0,$taillen,1
3770         vncipher        $out0,$out0,v24
3771         vncipher        $out1,$out1,v24
3772         vncipher        $out2,$out2,v24
3773         vncipher        $out3,$out3,v24
3774         vncipher        $out4,$out4,v24
3775
3776         andi.           r0,r0,16
3777         cmpwi           $taillen,0
3778         vncipher        $out0,$out0,v25
3779         vncipher        $out1,$out1,v25
3780         vncipher        $out2,$out2,v25
3781         vncipher        $out3,$out3,v25
3782         vncipher        $out4,$out4,v25
3783          vxor           $twk0,$twk0,v31
3784
3785         sub             $inp,$inp,r0
3786         vncipher        $out0,$out0,v26
3787         vncipher        $out1,$out1,v26
3788         vncipher        $out2,$out2,v26
3789         vncipher        $out3,$out3,v26
3790         vncipher        $out4,$out4,v26
3791          vxor           $in1,$twk1,v31
3792
3793         vncipher        $out0,$out0,v27
3794         lvx_u           $in0,0,$inp
3795         vncipher        $out1,$out1,v27
3796         vncipher        $out2,$out2,v27
3797         vncipher        $out3,$out3,v27
3798         vncipher        $out4,$out4,v27
3799          vxor           $in2,$twk2,v31
3800
3801         addi            $key_,$sp,$FRAME+15     # rewind $key_
3802         vncipher        $out0,$out0,v28
3803         vncipher        $out1,$out1,v28
3804         vncipher        $out2,$out2,v28
3805         vncipher        $out3,$out3,v28
3806         vncipher        $out4,$out4,v28
3807         lvx             v24,$x00,$key_          # re-pre-load round[1]
3808          vxor           $in3,$twk3,v31
3809
3810         vncipher        $out0,$out0,v29
3811         le?vperm        $in0,$in0,$in0,$leperm
3812         vncipher        $out1,$out1,v29
3813         vncipher        $out2,$out2,v29
3814         vncipher        $out3,$out3,v29
3815         vncipher        $out4,$out4,v29
3816         lvx             v25,$x10,$key_          # re-pre-load round[2]
3817          vxor           $in4,$twk4,v31
3818
3819         vncipher        $out0,$out0,v30
3820         vncipher        $out1,$out1,v30
3821         vncipher        $out2,$out2,v30
3822         vncipher        $out3,$out3,v30
3823         vncipher        $out4,$out4,v30
3824
3825         vncipherlast    $out0,$out0,$twk0
3826         vncipherlast    $out1,$out1,$in1
3827         vncipherlast    $out2,$out2,$in2
3828         vncipherlast    $out3,$out3,$in3
3829         vncipherlast    $out4,$out4,$in4
3830         mtctr           $rounds
3831         blr
3832         .long           0
3833         .byte           0,12,0x14,0,0,0,0,0
3834 ___
3835 }}      }}}
3836
3837 my $consts=1;
3838 foreach(split("\n",$code)) {
3839         s/\`([^\`]*)\`/eval($1)/geo;
3840
3841         # constants table endian-specific conversion
3842         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3843             my $conv=$3;
3844             my @bytes=();
3845
3846             # convert to endian-agnostic format
3847             if ($1 eq "long") {
3848               foreach (split(/,\s*/,$2)) {
3849                 my $l = /^0/?oct:int;
3850                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3851               }
3852             } else {
3853                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3854             }
3855
3856             # little-endian conversion
3857             if ($flavour =~ /le$/o) {
3858                 SWITCH: for($conv)  {
3859                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3860                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3861                 }
3862             }
3863
3864             #emit
3865             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3866             next;
3867         }
3868         $consts=0 if (m/Lconsts:/o);    # end of table
3869
3870         # instructions prefixed with '?' are endian-specific and need
3871         # to be adjusted accordingly...
3872         if ($flavour =~ /le$/o) {       # little-endian
3873             s/le\?//o           or
3874             s/be\?/#be#/o       or
3875             s/\?lvsr/lvsl/o     or
3876             s/\?lvsl/lvsr/o     or
3877             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3878             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3879             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3880         } else {                        # big-endian
3881             s/le\?/#le#/o       or
3882             s/be\?//o           or
3883             s/\?([a-z]+)/$1/o;
3884         }
3885
3886         print $_,"\n";
3887 }
3888
3889 close STDOUT;