2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
10 # [1] https://www.openssl.org/~appro/cryptogams/
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
19 # * Redistributions of source code must retain copyright notices,
20 # this list of conditions and the following disclaimer.
22 # * Redistributions in binary form must reproduce the above
23 # copyright notice, this list of conditions and the following
24 # disclaimer in the documentation and/or other materials
25 # provided with the distribution.
27 # * Neither the name of the CRYPTOGAMS nor the names of its
28 # copyright holder and contributors may be used to endorse or
29 # promote products derived from this software without specific
30 # prior written permission.
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see https://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
79 # CBC en-/decrypt CTR XTS
80 # POWER8[le] 3.96/0.72 0.74 1.1
81 # POWER8[be] 3.75/0.65 0.66 1.0
85 if ($flavour =~ /64/) {
93 } elsif ($flavour =~ /32/) {
101 } else { die "nonsense $flavour"; }
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
118 #########################################################################
119 {{{ # Key setup procedures #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
131 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
135 .long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
139 mflr $ptr #vvvvv "distance between . and rcon
144 .byte 0,12,0x14,0,0,0,0,0
145 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
147 .globl .${prefix}_set_encrypt_key
150 $PUSH r11,$LRSAVE($sp)
154 beq- Lenc_key_abort # if ($inp==0) return -1;
156 beq- Lenc_key_abort # if ($out==0) return -1;
174 addi $inp,$inp,15 # 15 is not typo
175 lvsr $key,0,r9 # borrow $key
179 le?vspltisb $mask,0x0f # borrow $mask
181 le?vxor $key,$key,$mask # adjust for byte swap
184 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
186 vxor $zero,$zero,$zero
189 ?lvsr $outperm,0,$out
192 ?vperm $outmask,$zero,$outmask,$outperm
202 vperm $key,$in0,$in0,$mask # rotate-n-splat
203 vsldoi $tmp,$zero,$in0,12 # >>32
204 vperm $outtail,$in0,$in0,$outperm # rotate
205 vsel $stage,$outhead,$outtail,$outmask
206 vmr $outhead,$outtail
207 vcipherlast $key,$key,$rcon
212 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vsldoi $tmp,$zero,$tmp,12 # >>32
216 vadduwm $rcon,$rcon,$rcon
220 lvx $rcon,0,$ptr # last two round keys
222 vperm $key,$in0,$in0,$mask # rotate-n-splat
223 vsldoi $tmp,$zero,$in0,12 # >>32
224 vperm $outtail,$in0,$in0,$outperm # rotate
225 vsel $stage,$outhead,$outtail,$outmask
226 vmr $outhead,$outtail
227 vcipherlast $key,$key,$rcon
232 vsldoi $tmp,$zero,$tmp,12 # >>32
234 vsldoi $tmp,$zero,$tmp,12 # >>32
236 vadduwm $rcon,$rcon,$rcon
239 vperm $key,$in0,$in0,$mask # rotate-n-splat
240 vsldoi $tmp,$zero,$in0,12 # >>32
241 vperm $outtail,$in0,$in0,$outperm # rotate
242 vsel $stage,$outhead,$outtail,$outmask
243 vmr $outhead,$outtail
244 vcipherlast $key,$key,$rcon
249 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vsldoi $tmp,$zero,$tmp,12 # >>32
254 vperm $outtail,$in0,$in0,$outperm # rotate
255 vsel $stage,$outhead,$outtail,$outmask
256 vmr $outhead,$outtail
259 addi $inp,$out,15 # 15 is not typo
269 vperm $outtail,$in0,$in0,$outperm # rotate
270 vsel $stage,$outhead,$outtail,$outmask
271 vmr $outhead,$outtail
274 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
275 vspltisb $key,8 # borrow $key
277 vsububm $mask,$mask,$key # adjust the mask
280 vperm $key,$in1,$in1,$mask # roate-n-splat
281 vsldoi $tmp,$zero,$in0,12 # >>32
282 vcipherlast $key,$key,$rcon
285 vsldoi $tmp,$zero,$tmp,12 # >>32
287 vsldoi $tmp,$zero,$tmp,12 # >>32
290 vsldoi $stage,$zero,$in1,8
293 vsldoi $in1,$zero,$in1,12 # >>32
294 vadduwm $rcon,$rcon,$rcon
298 vsldoi $stage,$stage,$in0,8
300 vperm $key,$in1,$in1,$mask # rotate-n-splat
301 vsldoi $tmp,$zero,$in0,12 # >>32
302 vperm $outtail,$stage,$stage,$outperm # rotate
303 vsel $stage,$outhead,$outtail,$outmask
304 vmr $outhead,$outtail
305 vcipherlast $key,$key,$rcon
309 vsldoi $stage,$in0,$in1,8
311 vsldoi $tmp,$zero,$tmp,12 # >>32
312 vperm $outtail,$stage,$stage,$outperm # rotate
313 vsel $stage,$outhead,$outtail,$outmask
314 vmr $outhead,$outtail
316 vsldoi $tmp,$zero,$tmp,12 # >>32
323 vsldoi $in1,$zero,$in1,12 # >>32
324 vadduwm $rcon,$rcon,$rcon
328 vperm $outtail,$in0,$in0,$outperm # rotate
329 vsel $stage,$outhead,$outtail,$outmask
330 vmr $outhead,$outtail
332 addi $inp,$out,15 # 15 is not typo
345 vperm $outtail,$in0,$in0,$outperm # rotate
346 vsel $stage,$outhead,$outtail,$outmask
347 vmr $outhead,$outtail
350 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
354 vperm $key,$in1,$in1,$mask # rotate-n-splat
355 vsldoi $tmp,$zero,$in0,12 # >>32
356 vperm $outtail,$in1,$in1,$outperm # rotate
357 vsel $stage,$outhead,$outtail,$outmask
358 vmr $outhead,$outtail
359 vcipherlast $key,$key,$rcon
364 vsldoi $tmp,$zero,$tmp,12 # >>32
366 vsldoi $tmp,$zero,$tmp,12 # >>32
368 vadduwm $rcon,$rcon,$rcon
370 vperm $outtail,$in0,$in0,$outperm # rotate
371 vsel $stage,$outhead,$outtail,$outmask
372 vmr $outhead,$outtail
374 addi $inp,$out,15 # 15 is not typo
378 vspltw $key,$in0,3 # just splat
379 vsldoi $tmp,$zero,$in1,12 # >>32
383 vsldoi $tmp,$zero,$tmp,12 # >>32
385 vsldoi $tmp,$zero,$tmp,12 # >>32
393 lvx $in1,0,$inp # redundant in aligned case
394 vsel $in1,$outhead,$in1,$outmask
404 .byte 0,12,0x14,1,0,0,3,0
406 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
408 .globl .${prefix}_set_decrypt_key
409 $STU $sp,-$FRAME($sp)
411 $PUSH r10,$FRAME+$LRSAVE($sp)
419 subi $inp,$out,240 # first round key
420 srwi $rounds,$rounds,1
421 add $out,$inp,$cnt # last round key
445 xor r3,r3,r3 # return value
450 .byte 0,12,4,1,0x80,0,3,0
452 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
455 #########################################################################
456 {{{ # Single block en- and decrypt procedures #
459 my $n = $dir eq "de" ? "n" : "";
460 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
463 .globl .${prefix}_${dir}crypt
464 lwz $rounds,240($key)
467 li $idx,15 # 15 is not typo
473 lvsl v2,0,$inp # inpperm
475 ?lvsl v3,0,r11 # outperm
478 vperm v0,v0,v1,v2 # align [and byte swap in LE]
480 ?lvsl v5,0,$key # keyperm
481 srwi $rounds,$rounds,1
484 subi $rounds,$rounds,1
485 ?vperm v1,v1,v2,v5 # align round key
507 v${n}cipherlast v0,v0,v1
511 li $idx,15 # 15 is not typo
512 ?vperm v2,v1,v2,v3 # outmask
514 lvx v1,0,$out # outhead
515 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
525 .byte 0,12,0x14,0,0,0,3,0
527 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
533 #########################################################################
534 {{{ # CBC en- and decrypt procedures #
535 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
536 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
537 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
540 .globl .${prefix}_cbc_encrypt
544 cmpwi $enc,0 # test direction
550 vxor $rndkey0,$rndkey0,$rndkey0
551 le?vspltisb $tmp,0x0f
553 lvx $ivec,0,$ivp # load [unaligned] iv
555 lvx $inptail,$idx,$ivp
556 le?vxor $inpperm,$inpperm,$tmp
557 vperm $ivec,$ivec,$inptail,$inpperm
560 ?lvsl $keyperm,0,$key # prepare for unaligned key
561 lwz $rounds,240($key)
563 lvsr $inpperm,0,r11 # prepare for unaligned load
565 addi $inp,$inp,15 # 15 is not typo
566 le?vxor $inpperm,$inpperm,$tmp
568 ?lvsr $outperm,0,$out # prepare for unaligned store
571 ?vperm $outmask,$rndkey0,$outmask,$outperm
572 le?vxor $outperm,$outperm,$tmp
574 srwi $rounds,$rounds,1
576 subi $rounds,$rounds,1
584 subi $len,$len,16 # len-=16
587 vperm $inout,$inout,$inptail,$inpperm
588 lvx $rndkey1,$idx,$key
590 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
591 vxor $inout,$inout,$rndkey0
592 lvx $rndkey0,$idx,$key
594 vxor $inout,$inout,$ivec
597 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
598 vcipher $inout,$inout,$rndkey1
599 lvx $rndkey1,$idx,$key
601 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
602 vcipher $inout,$inout,$rndkey0
603 lvx $rndkey0,$idx,$key
607 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
608 vcipher $inout,$inout,$rndkey1
609 lvx $rndkey1,$idx,$key
611 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
612 vcipherlast $ivec,$inout,$rndkey0
615 vperm $tmp,$ivec,$ivec,$outperm
616 vsel $inout,$outhead,$tmp,$outmask
627 bge _aesp8_cbc_decrypt8x
632 subi $len,$len,16 # len-=16
635 vperm $tmp,$tmp,$inptail,$inpperm
636 lvx $rndkey1,$idx,$key
638 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
639 vxor $inout,$tmp,$rndkey0
640 lvx $rndkey0,$idx,$key
644 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
645 vncipher $inout,$inout,$rndkey1
646 lvx $rndkey1,$idx,$key
648 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
649 vncipher $inout,$inout,$rndkey0
650 lvx $rndkey0,$idx,$key
654 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
655 vncipher $inout,$inout,$rndkey1
656 lvx $rndkey1,$idx,$key
658 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
659 vncipherlast $inout,$inout,$rndkey0
662 vxor $inout,$inout,$ivec
664 vperm $tmp,$inout,$inout,$outperm
665 vsel $inout,$outhead,$tmp,$outmask
673 lvx $inout,0,$out # redundant in aligned case
674 vsel $inout,$outhead,$inout,$outmask
677 neg $enc,$ivp # write [unaligned] iv
678 li $idx,15 # 15 is not typo
679 vxor $rndkey0,$rndkey0,$rndkey0
681 le?vspltisb $tmp,0x0f
682 ?lvsl $outperm,0,$enc
683 ?vperm $outmask,$rndkey0,$outmask,$outperm
684 le?vxor $outperm,$outperm,$tmp
686 vperm $ivec,$ivec,$ivec,$outperm
687 vsel $inout,$outhead,$ivec,$outmask
688 lvx $inptail,$idx,$ivp
690 vsel $inout,$ivec,$inptail,$outmask
691 stvx $inout,$idx,$ivp
696 .byte 0,12,0x14,0,0,0,6,0
699 #########################################################################
700 {{ # Optimized CBC decrypt procedure #
702 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
703 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
704 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
705 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
706 # v26-v31 last 6 round keys
707 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
711 _aesp8_cbc_decrypt8x:
712 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
713 li r10,`$FRAME+8*16+15`
714 li r11,`$FRAME+8*16+31`
715 stvx v20,r10,$sp # ABI says so
738 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
740 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
742 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
744 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
746 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
748 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
750 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
754 subi $rounds,$rounds,3 # -4 in total
755 subi $len,$len,128 # bias
757 lvx $rndkey0,$x00,$key # load key schedule
761 ?vperm $rndkey0,$rndkey0,v30,$keyperm
762 addi $key_,$sp,$FRAME+15
766 ?vperm v24,v30,v31,$keyperm
769 stvx v24,$x00,$key_ # off-load round[1]
770 ?vperm v25,v31,v30,$keyperm
772 stvx v25,$x10,$key_ # off-load round[2]
773 addi $key_,$key_,0x20
774 bdnz Load_cbc_dec_key
777 ?vperm v24,v30,v31,$keyperm
779 stvx v24,$x00,$key_ # off-load round[3]
780 ?vperm v25,v31,v26,$keyperm
782 stvx v25,$x10,$key_ # off-load round[4]
783 addi $key_,$sp,$FRAME+15 # rewind $key_
784 ?vperm v26,v26,v27,$keyperm
786 ?vperm v27,v27,v28,$keyperm
788 ?vperm v28,v28,v29,$keyperm
790 ?vperm v29,v29,v30,$keyperm
791 lvx $out0,$x70,$key # borrow $out0
792 ?vperm v30,v30,v31,$keyperm
793 lvx v24,$x00,$key_ # pre-load round[1]
794 ?vperm v31,v31,$out0,$keyperm
795 lvx v25,$x10,$key_ # pre-load round[2]
797 #lvx $inptail,0,$inp # "caller" already did this
798 #addi $inp,$inp,15 # 15 is not typo
799 subi $inp,$inp,15 # undo "caller"
802 lvx_u $in0,$x00,$inp # load first 8 "words"
803 le?lvsl $inpperm,0,$idx
804 le?vspltisb $tmp,0x0f
806 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
808 le?vperm $in0,$in0,$in0,$inpperm
810 le?vperm $in1,$in1,$in1,$inpperm
812 le?vperm $in2,$in2,$in2,$inpperm
813 vxor $out0,$in0,$rndkey0
815 le?vperm $in3,$in3,$in3,$inpperm
816 vxor $out1,$in1,$rndkey0
818 le?vperm $in4,$in4,$in4,$inpperm
819 vxor $out2,$in2,$rndkey0
822 le?vperm $in5,$in5,$in5,$inpperm
823 vxor $out3,$in3,$rndkey0
824 le?vperm $in6,$in6,$in6,$inpperm
825 vxor $out4,$in4,$rndkey0
826 le?vperm $in7,$in7,$in7,$inpperm
827 vxor $out5,$in5,$rndkey0
828 vxor $out6,$in6,$rndkey0
829 vxor $out7,$in7,$rndkey0
835 vncipher $out0,$out0,v24
836 vncipher $out1,$out1,v24
837 vncipher $out2,$out2,v24
838 vncipher $out3,$out3,v24
839 vncipher $out4,$out4,v24
840 vncipher $out5,$out5,v24
841 vncipher $out6,$out6,v24
842 vncipher $out7,$out7,v24
843 lvx v24,$x20,$key_ # round[3]
844 addi $key_,$key_,0x20
846 vncipher $out0,$out0,v25
847 vncipher $out1,$out1,v25
848 vncipher $out2,$out2,v25
849 vncipher $out3,$out3,v25
850 vncipher $out4,$out4,v25
851 vncipher $out5,$out5,v25
852 vncipher $out6,$out6,v25
853 vncipher $out7,$out7,v25
854 lvx v25,$x10,$key_ # round[4]
857 subic $len,$len,128 # $len-=128
858 vncipher $out0,$out0,v24
859 vncipher $out1,$out1,v24
860 vncipher $out2,$out2,v24
861 vncipher $out3,$out3,v24
862 vncipher $out4,$out4,v24
863 vncipher $out5,$out5,v24
864 vncipher $out6,$out6,v24
865 vncipher $out7,$out7,v24
867 subfe. r0,r0,r0 # borrow?-1:0
868 vncipher $out0,$out0,v25
869 vncipher $out1,$out1,v25
870 vncipher $out2,$out2,v25
871 vncipher $out3,$out3,v25
872 vncipher $out4,$out4,v25
873 vncipher $out5,$out5,v25
874 vncipher $out6,$out6,v25
875 vncipher $out7,$out7,v25
878 vncipher $out0,$out0,v26
879 vncipher $out1,$out1,v26
880 vncipher $out2,$out2,v26
881 vncipher $out3,$out3,v26
882 vncipher $out4,$out4,v26
883 vncipher $out5,$out5,v26
884 vncipher $out6,$out6,v26
885 vncipher $out7,$out7,v26
887 add $inp,$inp,r0 # $inp is adjusted in such
888 # way that at exit from the
889 # loop inX-in7 are loaded
891 vncipher $out0,$out0,v27
892 vncipher $out1,$out1,v27
893 vncipher $out2,$out2,v27
894 vncipher $out3,$out3,v27
895 vncipher $out4,$out4,v27
896 vncipher $out5,$out5,v27
897 vncipher $out6,$out6,v27
898 vncipher $out7,$out7,v27
900 addi $key_,$sp,$FRAME+15 # rewind $key_
901 vncipher $out0,$out0,v28
902 vncipher $out1,$out1,v28
903 vncipher $out2,$out2,v28
904 vncipher $out3,$out3,v28
905 vncipher $out4,$out4,v28
906 vncipher $out5,$out5,v28
907 vncipher $out6,$out6,v28
908 vncipher $out7,$out7,v28
909 lvx v24,$x00,$key_ # re-pre-load round[1]
911 vncipher $out0,$out0,v29
912 vncipher $out1,$out1,v29
913 vncipher $out2,$out2,v29
914 vncipher $out3,$out3,v29
915 vncipher $out4,$out4,v29
916 vncipher $out5,$out5,v29
917 vncipher $out6,$out6,v29
918 vncipher $out7,$out7,v29
919 lvx v25,$x10,$key_ # re-pre-load round[2]
921 vncipher $out0,$out0,v30
922 vxor $ivec,$ivec,v31 # xor with last round key
923 vncipher $out1,$out1,v30
925 vncipher $out2,$out2,v30
927 vncipher $out3,$out3,v30
929 vncipher $out4,$out4,v30
931 vncipher $out5,$out5,v30
933 vncipher $out6,$out6,v30
935 vncipher $out7,$out7,v30
938 vncipherlast $out0,$out0,$ivec
939 vncipherlast $out1,$out1,$in0
940 lvx_u $in0,$x00,$inp # load next input block
941 vncipherlast $out2,$out2,$in1
943 vncipherlast $out3,$out3,$in2
944 le?vperm $in0,$in0,$in0,$inpperm
946 vncipherlast $out4,$out4,$in3
947 le?vperm $in1,$in1,$in1,$inpperm
949 vncipherlast $out5,$out5,$in4
950 le?vperm $in2,$in2,$in2,$inpperm
952 vncipherlast $out6,$out6,$in5
953 le?vperm $in3,$in3,$in3,$inpperm
955 vncipherlast $out7,$out7,$in6
956 le?vperm $in4,$in4,$in4,$inpperm
959 le?vperm $in5,$in5,$in5,$inpperm
963 le?vperm $out0,$out0,$out0,$inpperm
964 le?vperm $out1,$out1,$out1,$inpperm
965 stvx_u $out0,$x00,$out
966 le?vperm $in6,$in6,$in6,$inpperm
967 vxor $out0,$in0,$rndkey0
968 le?vperm $out2,$out2,$out2,$inpperm
969 stvx_u $out1,$x10,$out
970 le?vperm $in7,$in7,$in7,$inpperm
971 vxor $out1,$in1,$rndkey0
972 le?vperm $out3,$out3,$out3,$inpperm
973 stvx_u $out2,$x20,$out
974 vxor $out2,$in2,$rndkey0
975 le?vperm $out4,$out4,$out4,$inpperm
976 stvx_u $out3,$x30,$out
977 vxor $out3,$in3,$rndkey0
978 le?vperm $out5,$out5,$out5,$inpperm
979 stvx_u $out4,$x40,$out
980 vxor $out4,$in4,$rndkey0
981 le?vperm $out6,$out6,$out6,$inpperm
982 stvx_u $out5,$x50,$out
983 vxor $out5,$in5,$rndkey0
984 le?vperm $out7,$out7,$out7,$inpperm
985 stvx_u $out6,$x60,$out
986 vxor $out6,$in6,$rndkey0
987 stvx_u $out7,$x70,$out
989 vxor $out7,$in7,$rndkey0
992 beq Loop_cbc_dec8x # did $len-=128 borrow?
999 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
1000 vncipher $out1,$out1,v24
1001 vncipher $out2,$out2,v24
1002 vncipher $out3,$out3,v24
1003 vncipher $out4,$out4,v24
1004 vncipher $out5,$out5,v24
1005 vncipher $out6,$out6,v24
1006 vncipher $out7,$out7,v24
1007 lvx v24,$x20,$key_ # round[3]
1008 addi $key_,$key_,0x20
1010 vncipher $out1,$out1,v25
1011 vncipher $out2,$out2,v25
1012 vncipher $out3,$out3,v25
1013 vncipher $out4,$out4,v25
1014 vncipher $out5,$out5,v25
1015 vncipher $out6,$out6,v25
1016 vncipher $out7,$out7,v25
1017 lvx v25,$x10,$key_ # round[4]
1018 bdnz Loop_cbc_dec8x_tail
1020 vncipher $out1,$out1,v24
1021 vncipher $out2,$out2,v24
1022 vncipher $out3,$out3,v24
1023 vncipher $out4,$out4,v24
1024 vncipher $out5,$out5,v24
1025 vncipher $out6,$out6,v24
1026 vncipher $out7,$out7,v24
1028 vncipher $out1,$out1,v25
1029 vncipher $out2,$out2,v25
1030 vncipher $out3,$out3,v25
1031 vncipher $out4,$out4,v25
1032 vncipher $out5,$out5,v25
1033 vncipher $out6,$out6,v25
1034 vncipher $out7,$out7,v25
1036 vncipher $out1,$out1,v26
1037 vncipher $out2,$out2,v26
1038 vncipher $out3,$out3,v26
1039 vncipher $out4,$out4,v26
1040 vncipher $out5,$out5,v26
1041 vncipher $out6,$out6,v26
1042 vncipher $out7,$out7,v26
1044 vncipher $out1,$out1,v27
1045 vncipher $out2,$out2,v27
1046 vncipher $out3,$out3,v27
1047 vncipher $out4,$out4,v27
1048 vncipher $out5,$out5,v27
1049 vncipher $out6,$out6,v27
1050 vncipher $out7,$out7,v27
1052 vncipher $out1,$out1,v28
1053 vncipher $out2,$out2,v28
1054 vncipher $out3,$out3,v28
1055 vncipher $out4,$out4,v28
1056 vncipher $out5,$out5,v28
1057 vncipher $out6,$out6,v28
1058 vncipher $out7,$out7,v28
1060 vncipher $out1,$out1,v29
1061 vncipher $out2,$out2,v29
1062 vncipher $out3,$out3,v29
1063 vncipher $out4,$out4,v29
1064 vncipher $out5,$out5,v29
1065 vncipher $out6,$out6,v29
1066 vncipher $out7,$out7,v29
1068 vncipher $out1,$out1,v30
1069 vxor $ivec,$ivec,v31 # last round key
1070 vncipher $out2,$out2,v30
1072 vncipher $out3,$out3,v30
1074 vncipher $out4,$out4,v30
1076 vncipher $out5,$out5,v30
1078 vncipher $out6,$out6,v30
1080 vncipher $out7,$out7,v30
1083 cmplwi $len,32 # switch($len)
1088 blt Lcbc_dec8x_three
1097 vncipherlast $out1,$out1,$ivec
1098 vncipherlast $out2,$out2,$in1
1099 vncipherlast $out3,$out3,$in2
1100 vncipherlast $out4,$out4,$in3
1101 vncipherlast $out5,$out5,$in4
1102 vncipherlast $out6,$out6,$in5
1103 vncipherlast $out7,$out7,$in6
1106 le?vperm $out1,$out1,$out1,$inpperm
1107 le?vperm $out2,$out2,$out2,$inpperm
1108 stvx_u $out1,$x00,$out
1109 le?vperm $out3,$out3,$out3,$inpperm
1110 stvx_u $out2,$x10,$out
1111 le?vperm $out4,$out4,$out4,$inpperm
1112 stvx_u $out3,$x20,$out
1113 le?vperm $out5,$out5,$out5,$inpperm
1114 stvx_u $out4,$x30,$out
1115 le?vperm $out6,$out6,$out6,$inpperm
1116 stvx_u $out5,$x40,$out
1117 le?vperm $out7,$out7,$out7,$inpperm
1118 stvx_u $out6,$x50,$out
1119 stvx_u $out7,$x60,$out
1125 vncipherlast $out2,$out2,$ivec
1126 vncipherlast $out3,$out3,$in2
1127 vncipherlast $out4,$out4,$in3
1128 vncipherlast $out5,$out5,$in4
1129 vncipherlast $out6,$out6,$in5
1130 vncipherlast $out7,$out7,$in6
1133 le?vperm $out2,$out2,$out2,$inpperm
1134 le?vperm $out3,$out3,$out3,$inpperm
1135 stvx_u $out2,$x00,$out
1136 le?vperm $out4,$out4,$out4,$inpperm
1137 stvx_u $out3,$x10,$out
1138 le?vperm $out5,$out5,$out5,$inpperm
1139 stvx_u $out4,$x20,$out
1140 le?vperm $out6,$out6,$out6,$inpperm
1141 stvx_u $out5,$x30,$out
1142 le?vperm $out7,$out7,$out7,$inpperm
1143 stvx_u $out6,$x40,$out
1144 stvx_u $out7,$x50,$out
1150 vncipherlast $out3,$out3,$ivec
1151 vncipherlast $out4,$out4,$in3
1152 vncipherlast $out5,$out5,$in4
1153 vncipherlast $out6,$out6,$in5
1154 vncipherlast $out7,$out7,$in6
1157 le?vperm $out3,$out3,$out3,$inpperm
1158 le?vperm $out4,$out4,$out4,$inpperm
1159 stvx_u $out3,$x00,$out
1160 le?vperm $out5,$out5,$out5,$inpperm
1161 stvx_u $out4,$x10,$out
1162 le?vperm $out6,$out6,$out6,$inpperm
1163 stvx_u $out5,$x20,$out
1164 le?vperm $out7,$out7,$out7,$inpperm
1165 stvx_u $out6,$x30,$out
1166 stvx_u $out7,$x40,$out
1172 vncipherlast $out4,$out4,$ivec
1173 vncipherlast $out5,$out5,$in4
1174 vncipherlast $out6,$out6,$in5
1175 vncipherlast $out7,$out7,$in6
1178 le?vperm $out4,$out4,$out4,$inpperm
1179 le?vperm $out5,$out5,$out5,$inpperm
1180 stvx_u $out4,$x00,$out
1181 le?vperm $out6,$out6,$out6,$inpperm
1182 stvx_u $out5,$x10,$out
1183 le?vperm $out7,$out7,$out7,$inpperm
1184 stvx_u $out6,$x20,$out
1185 stvx_u $out7,$x30,$out
1191 vncipherlast $out5,$out5,$ivec
1192 vncipherlast $out6,$out6,$in5
1193 vncipherlast $out7,$out7,$in6
1196 le?vperm $out5,$out5,$out5,$inpperm
1197 le?vperm $out6,$out6,$out6,$inpperm
1198 stvx_u $out5,$x00,$out
1199 le?vperm $out7,$out7,$out7,$inpperm
1200 stvx_u $out6,$x10,$out
1201 stvx_u $out7,$x20,$out
1207 vncipherlast $out6,$out6,$ivec
1208 vncipherlast $out7,$out7,$in6
1211 le?vperm $out6,$out6,$out6,$inpperm
1212 le?vperm $out7,$out7,$out7,$inpperm
1213 stvx_u $out6,$x00,$out
1214 stvx_u $out7,$x10,$out
1220 vncipherlast $out7,$out7,$ivec
1223 le?vperm $out7,$out7,$out7,$inpperm
1228 le?vperm $ivec,$ivec,$ivec,$inpperm
1229 stvx_u $ivec,0,$ivp # write [unaligned] iv
1233 stvx $inpperm,r10,$sp # wipe copies of round keys
1235 stvx $inpperm,r11,$sp
1237 stvx $inpperm,r10,$sp
1239 stvx $inpperm,r11,$sp
1241 stvx $inpperm,r10,$sp
1243 stvx $inpperm,r11,$sp
1245 stvx $inpperm,r10,$sp
1247 stvx $inpperm,r11,$sp
1251 lvx v20,r10,$sp # ABI says so
1273 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1274 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1275 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1276 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1277 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1278 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1279 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1282 .byte 0,12,0x14,0,0x80,6,6,0
1284 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1288 #########################################################################
1289 {{{ # CTR procedure[s] #
1291 ####################### WARNING: Here be dragons! #######################
1293 # This code is written as 'ctr32', based on a 32-bit counter used
1294 # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1295 # a 128-bit counter.
1297 # This leads to subtle changes from the upstream code: the counter
1298 # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1299 # both the bulk (8 blocks at a time) path, and in the individual block
1300 # path. Be aware of this when doing updates.
1303 # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1304 # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1305 # https://github.com/openssl/openssl/pull/8942
1307 #########################################################################
1308 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1309 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1310 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1315 .globl .${prefix}_ctr32_encrypt_blocks
1324 vxor $rndkey0,$rndkey0,$rndkey0
1325 le?vspltisb $tmp,0x0f
1327 lvx $ivec,0,$ivp # load [unaligned] iv
1328 lvsl $inpperm,0,$ivp
1329 lvx $inptail,$idx,$ivp
1331 le?vxor $inpperm,$inpperm,$tmp
1332 vperm $ivec,$ivec,$inptail,$inpperm
1333 vsldoi $one,$rndkey0,$one,1
1336 ?lvsl $keyperm,0,$key # prepare for unaligned key
1337 lwz $rounds,240($key)
1339 lvsr $inpperm,0,r11 # prepare for unaligned load
1341 addi $inp,$inp,15 # 15 is not typo
1342 le?vxor $inpperm,$inpperm,$tmp
1344 srwi $rounds,$rounds,1
1346 subi $rounds,$rounds,1
1349 bge _aesp8_ctr32_encrypt8x
1351 ?lvsr $outperm,0,$out # prepare for unaligned store
1352 vspltisb $outmask,-1
1354 ?vperm $outmask,$rndkey0,$outmask,$outperm
1355 le?vxor $outperm,$outperm,$tmp
1359 lvx $rndkey1,$idx,$key
1361 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1362 vxor $inout,$ivec,$rndkey0
1363 lvx $rndkey0,$idx,$key
1369 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1370 vcipher $inout,$inout,$rndkey1
1371 lvx $rndkey1,$idx,$key
1373 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1374 vcipher $inout,$inout,$rndkey0
1375 lvx $rndkey0,$idx,$key
1379 vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
1383 subic. $len,$len,1 # blocks--
1385 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1386 vcipher $inout,$inout,$rndkey1
1387 lvx $rndkey1,$idx,$key
1388 vperm $dat,$dat,$inptail,$inpperm
1390 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1392 vxor $dat,$dat,$rndkey1 # last round key
1393 vcipherlast $inout,$inout,$dat
1395 lvx $rndkey1,$idx,$key
1397 vperm $inout,$inout,$inout,$outperm
1398 vsel $dat,$outhead,$inout,$outmask
1400 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1402 vxor $inout,$ivec,$rndkey0
1403 lvx $rndkey0,$idx,$key
1410 lvx $inout,0,$out # redundant in aligned case
1411 vsel $inout,$outhead,$inout,$outmask
1417 .byte 0,12,0x14,0,0,0,6,0
1420 #########################################################################
1421 {{ # Optimized CTR procedure #
1423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1424 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1425 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1426 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1427 # v26-v31 last 6 round keys
1428 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1429 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1433 _aesp8_ctr32_encrypt8x:
1434 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1435 li r10,`$FRAME+8*16+15`
1436 li r11,`$FRAME+8*16+31`
1437 stvx v20,r10,$sp # ABI says so
1460 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1462 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1464 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1466 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1468 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1470 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1472 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1476 subi $rounds,$rounds,3 # -4 in total
1478 lvx $rndkey0,$x00,$key # load key schedule
1482 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1483 addi $key_,$sp,$FRAME+15
1487 ?vperm v24,v30,v31,$keyperm
1490 stvx v24,$x00,$key_ # off-load round[1]
1491 ?vperm v25,v31,v30,$keyperm
1493 stvx v25,$x10,$key_ # off-load round[2]
1494 addi $key_,$key_,0x20
1495 bdnz Load_ctr32_enc_key
1498 ?vperm v24,v30,v31,$keyperm
1500 stvx v24,$x00,$key_ # off-load round[3]
1501 ?vperm v25,v31,v26,$keyperm
1503 stvx v25,$x10,$key_ # off-load round[4]
1504 addi $key_,$sp,$FRAME+15 # rewind $key_
1505 ?vperm v26,v26,v27,$keyperm
1507 ?vperm v27,v27,v28,$keyperm
1509 ?vperm v28,v28,v29,$keyperm
1511 ?vperm v29,v29,v30,$keyperm
1512 lvx $out0,$x70,$key # borrow $out0
1513 ?vperm v30,v30,v31,$keyperm
1514 lvx v24,$x00,$key_ # pre-load round[1]
1515 ?vperm v31,v31,$out0,$keyperm
1516 lvx v25,$x10,$key_ # pre-load round[2]
1518 vadduqm $two,$one,$one
1519 subi $inp,$inp,15 # undo "caller"
1522 vadduqm $out1,$ivec,$one # counter values ...
1523 vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
1524 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1526 vadduqm $out3,$out1,$two
1527 vxor $out1,$out1,$rndkey0
1528 le?lvsl $inpperm,0,$idx
1529 vadduqm $out4,$out2,$two
1530 vxor $out2,$out2,$rndkey0
1531 le?vspltisb $tmp,0x0f
1532 vadduqm $out5,$out3,$two
1533 vxor $out3,$out3,$rndkey0
1534 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1535 vadduqm $out6,$out4,$two
1536 vxor $out4,$out4,$rndkey0
1537 vadduqm $out7,$out5,$two
1538 vxor $out5,$out5,$rndkey0
1539 vadduqm $ivec,$out6,$two # next counter value
1540 vxor $out6,$out6,$rndkey0
1541 vxor $out7,$out7,$rndkey0
1547 vcipher $out0,$out0,v24
1548 vcipher $out1,$out1,v24
1549 vcipher $out2,$out2,v24
1550 vcipher $out3,$out3,v24
1551 vcipher $out4,$out4,v24
1552 vcipher $out5,$out5,v24
1553 vcipher $out6,$out6,v24
1554 vcipher $out7,$out7,v24
1555 Loop_ctr32_enc8x_middle:
1556 lvx v24,$x20,$key_ # round[3]
1557 addi $key_,$key_,0x20
1559 vcipher $out0,$out0,v25
1560 vcipher $out1,$out1,v25
1561 vcipher $out2,$out2,v25
1562 vcipher $out3,$out3,v25
1563 vcipher $out4,$out4,v25
1564 vcipher $out5,$out5,v25
1565 vcipher $out6,$out6,v25
1566 vcipher $out7,$out7,v25
1567 lvx v25,$x10,$key_ # round[4]
1568 bdnz Loop_ctr32_enc8x
1570 subic r11,$len,256 # $len-256, borrow $key_
1571 vcipher $out0,$out0,v24
1572 vcipher $out1,$out1,v24
1573 vcipher $out2,$out2,v24
1574 vcipher $out3,$out3,v24
1575 vcipher $out4,$out4,v24
1576 vcipher $out5,$out5,v24
1577 vcipher $out6,$out6,v24
1578 vcipher $out7,$out7,v24
1580 subfe r0,r0,r0 # borrow?-1:0
1581 vcipher $out0,$out0,v25
1582 vcipher $out1,$out1,v25
1583 vcipher $out2,$out2,v25
1584 vcipher $out3,$out3,v25
1585 vcipher $out4,$out4,v25
1586 vcipher $out5,$out5,v25
1587 vcipher $out6,$out6,v25
1588 vcipher $out7,$out7,v25
1591 addi $key_,$sp,$FRAME+15 # rewind $key_
1592 vcipher $out0,$out0,v26
1593 vcipher $out1,$out1,v26
1594 vcipher $out2,$out2,v26
1595 vcipher $out3,$out3,v26
1596 vcipher $out4,$out4,v26
1597 vcipher $out5,$out5,v26
1598 vcipher $out6,$out6,v26
1599 vcipher $out7,$out7,v26
1600 lvx v24,$x00,$key_ # re-pre-load round[1]
1602 subic $len,$len,129 # $len-=129
1603 vcipher $out0,$out0,v27
1604 addi $len,$len,1 # $len-=128 really
1605 vcipher $out1,$out1,v27
1606 vcipher $out2,$out2,v27
1607 vcipher $out3,$out3,v27
1608 vcipher $out4,$out4,v27
1609 vcipher $out5,$out5,v27
1610 vcipher $out6,$out6,v27
1611 vcipher $out7,$out7,v27
1612 lvx v25,$x10,$key_ # re-pre-load round[2]
1614 vcipher $out0,$out0,v28
1615 lvx_u $in0,$x00,$inp # load input
1616 vcipher $out1,$out1,v28
1617 lvx_u $in1,$x10,$inp
1618 vcipher $out2,$out2,v28
1619 lvx_u $in2,$x20,$inp
1620 vcipher $out3,$out3,v28
1621 lvx_u $in3,$x30,$inp
1622 vcipher $out4,$out4,v28
1623 lvx_u $in4,$x40,$inp
1624 vcipher $out5,$out5,v28
1625 lvx_u $in5,$x50,$inp
1626 vcipher $out6,$out6,v28
1627 lvx_u $in6,$x60,$inp
1628 vcipher $out7,$out7,v28
1629 lvx_u $in7,$x70,$inp
1632 vcipher $out0,$out0,v29
1633 le?vperm $in0,$in0,$in0,$inpperm
1634 vcipher $out1,$out1,v29
1635 le?vperm $in1,$in1,$in1,$inpperm
1636 vcipher $out2,$out2,v29
1637 le?vperm $in2,$in2,$in2,$inpperm
1638 vcipher $out3,$out3,v29
1639 le?vperm $in3,$in3,$in3,$inpperm
1640 vcipher $out4,$out4,v29
1641 le?vperm $in4,$in4,$in4,$inpperm
1642 vcipher $out5,$out5,v29
1643 le?vperm $in5,$in5,$in5,$inpperm
1644 vcipher $out6,$out6,v29
1645 le?vperm $in6,$in6,$in6,$inpperm
1646 vcipher $out7,$out7,v29
1647 le?vperm $in7,$in7,$in7,$inpperm
1649 add $inp,$inp,r0 # $inp is adjusted in such
1650 # way that at exit from the
1651 # loop inX-in7 are loaded
1653 subfe. r0,r0,r0 # borrow?-1:0
1654 vcipher $out0,$out0,v30
1655 vxor $in0,$in0,v31 # xor with last round key
1656 vcipher $out1,$out1,v30
1658 vcipher $out2,$out2,v30
1660 vcipher $out3,$out3,v30
1662 vcipher $out4,$out4,v30
1664 vcipher $out5,$out5,v30
1666 vcipher $out6,$out6,v30
1668 vcipher $out7,$out7,v30
1671 bne Lctr32_enc8x_break # did $len-129 borrow?
1673 vcipherlast $in0,$out0,$in0
1674 vcipherlast $in1,$out1,$in1
1675 vadduqm $out1,$ivec,$one # counter values ...
1676 vcipherlast $in2,$out2,$in2
1677 vadduqm $out2,$ivec,$two
1678 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1679 vcipherlast $in3,$out3,$in3
1680 vadduqm $out3,$out1,$two
1681 vxor $out1,$out1,$rndkey0
1682 vcipherlast $in4,$out4,$in4
1683 vadduqm $out4,$out2,$two
1684 vxor $out2,$out2,$rndkey0
1685 vcipherlast $in5,$out5,$in5
1686 vadduqm $out5,$out3,$two
1687 vxor $out3,$out3,$rndkey0
1688 vcipherlast $in6,$out6,$in6
1689 vadduqm $out6,$out4,$two
1690 vxor $out4,$out4,$rndkey0
1691 vcipherlast $in7,$out7,$in7
1692 vadduqm $out7,$out5,$two
1693 vxor $out5,$out5,$rndkey0
1694 le?vperm $in0,$in0,$in0,$inpperm
1695 vadduqm $ivec,$out6,$two # next counter value
1696 vxor $out6,$out6,$rndkey0
1697 le?vperm $in1,$in1,$in1,$inpperm
1698 vxor $out7,$out7,$rndkey0
1701 vcipher $out0,$out0,v24
1702 stvx_u $in0,$x00,$out
1703 le?vperm $in2,$in2,$in2,$inpperm
1704 vcipher $out1,$out1,v24
1705 stvx_u $in1,$x10,$out
1706 le?vperm $in3,$in3,$in3,$inpperm
1707 vcipher $out2,$out2,v24
1708 stvx_u $in2,$x20,$out
1709 le?vperm $in4,$in4,$in4,$inpperm
1710 vcipher $out3,$out3,v24
1711 stvx_u $in3,$x30,$out
1712 le?vperm $in5,$in5,$in5,$inpperm
1713 vcipher $out4,$out4,v24
1714 stvx_u $in4,$x40,$out
1715 le?vperm $in6,$in6,$in6,$inpperm
1716 vcipher $out5,$out5,v24
1717 stvx_u $in5,$x50,$out
1718 le?vperm $in7,$in7,$in7,$inpperm
1719 vcipher $out6,$out6,v24
1720 stvx_u $in6,$x60,$out
1721 vcipher $out7,$out7,v24
1722 stvx_u $in7,$x70,$out
1725 b Loop_ctr32_enc8x_middle
1730 blt Lctr32_enc8x_one
1732 beq Lctr32_enc8x_two
1734 blt Lctr32_enc8x_three
1736 beq Lctr32_enc8x_four
1738 blt Lctr32_enc8x_five
1740 beq Lctr32_enc8x_six
1742 blt Lctr32_enc8x_seven
1745 vcipherlast $out0,$out0,$in0
1746 vcipherlast $out1,$out1,$in1
1747 vcipherlast $out2,$out2,$in2
1748 vcipherlast $out3,$out3,$in3
1749 vcipherlast $out4,$out4,$in4
1750 vcipherlast $out5,$out5,$in5
1751 vcipherlast $out6,$out6,$in6
1752 vcipherlast $out7,$out7,$in7
1754 le?vperm $out0,$out0,$out0,$inpperm
1755 le?vperm $out1,$out1,$out1,$inpperm
1756 stvx_u $out0,$x00,$out
1757 le?vperm $out2,$out2,$out2,$inpperm
1758 stvx_u $out1,$x10,$out
1759 le?vperm $out3,$out3,$out3,$inpperm
1760 stvx_u $out2,$x20,$out
1761 le?vperm $out4,$out4,$out4,$inpperm
1762 stvx_u $out3,$x30,$out
1763 le?vperm $out5,$out5,$out5,$inpperm
1764 stvx_u $out4,$x40,$out
1765 le?vperm $out6,$out6,$out6,$inpperm
1766 stvx_u $out5,$x50,$out
1767 le?vperm $out7,$out7,$out7,$inpperm
1768 stvx_u $out6,$x60,$out
1769 stvx_u $out7,$x70,$out
1775 vcipherlast $out0,$out0,$in1
1776 vcipherlast $out1,$out1,$in2
1777 vcipherlast $out2,$out2,$in3
1778 vcipherlast $out3,$out3,$in4
1779 vcipherlast $out4,$out4,$in5
1780 vcipherlast $out5,$out5,$in6
1781 vcipherlast $out6,$out6,$in7
1783 le?vperm $out0,$out0,$out0,$inpperm
1784 le?vperm $out1,$out1,$out1,$inpperm
1785 stvx_u $out0,$x00,$out
1786 le?vperm $out2,$out2,$out2,$inpperm
1787 stvx_u $out1,$x10,$out
1788 le?vperm $out3,$out3,$out3,$inpperm
1789 stvx_u $out2,$x20,$out
1790 le?vperm $out4,$out4,$out4,$inpperm
1791 stvx_u $out3,$x30,$out
1792 le?vperm $out5,$out5,$out5,$inpperm
1793 stvx_u $out4,$x40,$out
1794 le?vperm $out6,$out6,$out6,$inpperm
1795 stvx_u $out5,$x50,$out
1796 stvx_u $out6,$x60,$out
1802 vcipherlast $out0,$out0,$in2
1803 vcipherlast $out1,$out1,$in3
1804 vcipherlast $out2,$out2,$in4
1805 vcipherlast $out3,$out3,$in5
1806 vcipherlast $out4,$out4,$in6
1807 vcipherlast $out5,$out5,$in7
1809 le?vperm $out0,$out0,$out0,$inpperm
1810 le?vperm $out1,$out1,$out1,$inpperm
1811 stvx_u $out0,$x00,$out
1812 le?vperm $out2,$out2,$out2,$inpperm
1813 stvx_u $out1,$x10,$out
1814 le?vperm $out3,$out3,$out3,$inpperm
1815 stvx_u $out2,$x20,$out
1816 le?vperm $out4,$out4,$out4,$inpperm
1817 stvx_u $out3,$x30,$out
1818 le?vperm $out5,$out5,$out5,$inpperm
1819 stvx_u $out4,$x40,$out
1820 stvx_u $out5,$x50,$out
1826 vcipherlast $out0,$out0,$in3
1827 vcipherlast $out1,$out1,$in4
1828 vcipherlast $out2,$out2,$in5
1829 vcipherlast $out3,$out3,$in6
1830 vcipherlast $out4,$out4,$in7
1832 le?vperm $out0,$out0,$out0,$inpperm
1833 le?vperm $out1,$out1,$out1,$inpperm
1834 stvx_u $out0,$x00,$out
1835 le?vperm $out2,$out2,$out2,$inpperm
1836 stvx_u $out1,$x10,$out
1837 le?vperm $out3,$out3,$out3,$inpperm
1838 stvx_u $out2,$x20,$out
1839 le?vperm $out4,$out4,$out4,$inpperm
1840 stvx_u $out3,$x30,$out
1841 stvx_u $out4,$x40,$out
1847 vcipherlast $out0,$out0,$in4
1848 vcipherlast $out1,$out1,$in5
1849 vcipherlast $out2,$out2,$in6
1850 vcipherlast $out3,$out3,$in7
1852 le?vperm $out0,$out0,$out0,$inpperm
1853 le?vperm $out1,$out1,$out1,$inpperm
1854 stvx_u $out0,$x00,$out
1855 le?vperm $out2,$out2,$out2,$inpperm
1856 stvx_u $out1,$x10,$out
1857 le?vperm $out3,$out3,$out3,$inpperm
1858 stvx_u $out2,$x20,$out
1859 stvx_u $out3,$x30,$out
1865 vcipherlast $out0,$out0,$in5
1866 vcipherlast $out1,$out1,$in6
1867 vcipherlast $out2,$out2,$in7
1869 le?vperm $out0,$out0,$out0,$inpperm
1870 le?vperm $out1,$out1,$out1,$inpperm
1871 stvx_u $out0,$x00,$out
1872 le?vperm $out2,$out2,$out2,$inpperm
1873 stvx_u $out1,$x10,$out
1874 stvx_u $out2,$x20,$out
1880 vcipherlast $out0,$out0,$in6
1881 vcipherlast $out1,$out1,$in7
1883 le?vperm $out0,$out0,$out0,$inpperm
1884 le?vperm $out1,$out1,$out1,$inpperm
1885 stvx_u $out0,$x00,$out
1886 stvx_u $out1,$x10,$out
1892 vcipherlast $out0,$out0,$in7
1894 le?vperm $out0,$out0,$out0,$inpperm
1901 stvx $inpperm,r10,$sp # wipe copies of round keys
1903 stvx $inpperm,r11,$sp
1905 stvx $inpperm,r10,$sp
1907 stvx $inpperm,r11,$sp
1909 stvx $inpperm,r10,$sp
1911 stvx $inpperm,r11,$sp
1913 stvx $inpperm,r10,$sp
1915 stvx $inpperm,r11,$sp
1919 lvx v20,r10,$sp # ABI says so
1941 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1942 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1943 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1944 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1945 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1946 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1947 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1950 .byte 0,12,0x14,0,0x80,6,6,0
1952 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1956 #########################################################################
1957 {{{ # XTS procedures #
1958 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1959 # const AES_KEY *key1, const AES_KEY *key2, #
1960 # [const] unsigned char iv[16]); #
1961 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1962 # input tweak value is assumed to be encrypted already, and last tweak #
1963 # value, one suitable for consecutive call on same chunk of data, is #
1964 # written back to original buffer. In addition, in "tweak chaining" #
1965 # mode only complete input blocks are processed. #
1967 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1968 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1969 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1970 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1971 my $taillen = $key2;
1973 ($inp,$idx) = ($idx,$inp); # reassign
1976 .globl .${prefix}_xts_encrypt
1977 mr $inp,r3 # reassign
1983 mfspr r12,256 # save vrsave
1987 vspltisb $seven,0x07 # 0x070707..07
1988 le?lvsl $leperm,r11,r11
1989 le?vspltisb $tmp,0x0f
1990 le?vxor $leperm,$leperm,$seven
1993 lvx $tweak,0,$ivp # load [unaligned] iv
1994 lvsl $inpperm,0,$ivp
1995 lvx $inptail,$idx,$ivp
1996 le?vxor $inpperm,$inpperm,$tmp
1997 vperm $tweak,$tweak,$inptail,$inpperm
2000 lvsr $inpperm,0,r11 # prepare for unaligned load
2002 addi $inp,$inp,15 # 15 is not typo
2003 le?vxor $inpperm,$inpperm,$tmp
2005 ${UCMP}i $key2,0 # key2==NULL?
2006 beq Lxts_enc_no_key2
2008 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2009 lwz $rounds,240($key2)
2010 srwi $rounds,$rounds,1
2011 subi $rounds,$rounds,1
2014 lvx $rndkey0,0,$key2
2015 lvx $rndkey1,$idx,$key2
2017 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2018 vxor $tweak,$tweak,$rndkey0
2019 lvx $rndkey0,$idx,$key2
2024 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2025 vcipher $tweak,$tweak,$rndkey1
2026 lvx $rndkey1,$idx,$key2
2028 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2029 vcipher $tweak,$tweak,$rndkey0
2030 lvx $rndkey0,$idx,$key2
2034 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2035 vcipher $tweak,$tweak,$rndkey1
2036 lvx $rndkey1,$idx,$key2
2037 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2038 vcipherlast $tweak,$tweak,$rndkey0
2040 li $ivp,0 # don't chain the tweak
2045 and $len,$len,$idx # in "tweak chaining"
2046 # mode only complete
2047 # blocks are processed
2052 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2053 lwz $rounds,240($key1)
2054 srwi $rounds,$rounds,1
2055 subi $rounds,$rounds,1
2058 vslb $eighty7,$seven,$seven # 0x808080..80
2059 vor $eighty7,$eighty7,$seven # 0x878787..87
2060 vspltisb $tmp,1 # 0x010101..01
2061 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2064 bge _aesp8_xts_encrypt6x
2066 andi. $taillen,$len,15
2068 subi $taillen,$taillen,16
2073 lvx $rndkey0,0,$key1
2074 lvx $rndkey1,$idx,$key1
2076 vperm $inout,$inout,$inptail,$inpperm
2077 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2078 vxor $inout,$inout,$tweak
2079 vxor $inout,$inout,$rndkey0
2080 lvx $rndkey0,$idx,$key1
2087 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2088 vcipher $inout,$inout,$rndkey1
2089 lvx $rndkey1,$idx,$key1
2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2092 vcipher $inout,$inout,$rndkey0
2093 lvx $rndkey0,$idx,$key1
2097 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2098 vcipher $inout,$inout,$rndkey1
2099 lvx $rndkey1,$idx,$key1
2101 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2102 vxor $rndkey0,$rndkey0,$tweak
2103 vcipherlast $output,$inout,$rndkey0
2105 le?vperm $tmp,$output,$output,$leperm
2107 le?stvx_u $tmp,0,$out
2108 be?stvx_u $output,0,$out
2117 lvx $rndkey0,0,$key1
2118 lvx $rndkey1,$idx,$key1
2126 vsrab $tmp,$tweak,$seven # next tweak value
2127 vaddubm $tweak,$tweak,$tweak
2128 vsldoi $tmp,$tmp,$tmp,15
2129 vand $tmp,$tmp,$eighty7
2130 vxor $tweak,$tweak,$tmp
2132 vperm $inout,$inout,$inptail,$inpperm
2133 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2134 vxor $inout,$inout,$tweak
2135 vxor $output,$output,$rndkey0 # just in case $len<16
2136 vxor $inout,$inout,$rndkey0
2137 lvx $rndkey0,$idx,$key1
2144 vxor $output,$output,$tweak
2145 lvsr $inpperm,0,$len # $inpperm is no longer needed
2146 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2148 vperm $inptail,$inptail,$tmp,$inpperm
2149 vsel $inout,$inout,$output,$inptail
2158 bdnz Loop_xts_enc_steal
2161 b Loop_xts_enc # one more time...
2167 vsrab $tmp,$tweak,$seven # next tweak value
2168 vaddubm $tweak,$tweak,$tweak
2169 vsldoi $tmp,$tmp,$tmp,15
2170 vand $tmp,$tmp,$eighty7
2171 vxor $tweak,$tweak,$tmp
2173 le?vperm $tweak,$tweak,$tweak,$leperm
2174 stvx_u $tweak,0,$ivp
2177 mtspr 256,r12 # restore vrsave
2181 .byte 0,12,0x04,0,0x80,6,6,0
2183 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2185 .globl .${prefix}_xts_decrypt
2186 mr $inp,r3 # reassign
2192 mfspr r12,256 # save vrsave
2201 vspltisb $seven,0x07 # 0x070707..07
2202 le?lvsl $leperm,r11,r11
2203 le?vspltisb $tmp,0x0f
2204 le?vxor $leperm,$leperm,$seven
2207 lvx $tweak,0,$ivp # load [unaligned] iv
2208 lvsl $inpperm,0,$ivp
2209 lvx $inptail,$idx,$ivp
2210 le?vxor $inpperm,$inpperm,$tmp
2211 vperm $tweak,$tweak,$inptail,$inpperm
2214 lvsr $inpperm,0,r11 # prepare for unaligned load
2216 addi $inp,$inp,15 # 15 is not typo
2217 le?vxor $inpperm,$inpperm,$tmp
2219 ${UCMP}i $key2,0 # key2==NULL?
2220 beq Lxts_dec_no_key2
2222 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2223 lwz $rounds,240($key2)
2224 srwi $rounds,$rounds,1
2225 subi $rounds,$rounds,1
2228 lvx $rndkey0,0,$key2
2229 lvx $rndkey1,$idx,$key2
2231 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2232 vxor $tweak,$tweak,$rndkey0
2233 lvx $rndkey0,$idx,$key2
2238 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2239 vcipher $tweak,$tweak,$rndkey1
2240 lvx $rndkey1,$idx,$key2
2242 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2243 vcipher $tweak,$tweak,$rndkey0
2244 lvx $rndkey0,$idx,$key2
2248 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2249 vcipher $tweak,$tweak,$rndkey1
2250 lvx $rndkey1,$idx,$key2
2251 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2252 vcipherlast $tweak,$tweak,$rndkey0
2254 li $ivp,0 # don't chain the tweak
2260 add $len,$len,$idx # in "tweak chaining"
2261 # mode only complete
2262 # blocks are processed
2267 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2268 lwz $rounds,240($key1)
2269 srwi $rounds,$rounds,1
2270 subi $rounds,$rounds,1
2273 vslb $eighty7,$seven,$seven # 0x808080..80
2274 vor $eighty7,$eighty7,$seven # 0x878787..87
2275 vspltisb $tmp,1 # 0x010101..01
2276 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2279 bge _aesp8_xts_decrypt6x
2281 lvx $rndkey0,0,$key1
2282 lvx $rndkey1,$idx,$key1
2284 vperm $inout,$inout,$inptail,$inpperm
2285 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2286 vxor $inout,$inout,$tweak
2287 vxor $inout,$inout,$rndkey0
2288 lvx $rndkey0,$idx,$key1
2298 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2299 vncipher $inout,$inout,$rndkey1
2300 lvx $rndkey1,$idx,$key1
2302 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2303 vncipher $inout,$inout,$rndkey0
2304 lvx $rndkey0,$idx,$key1
2308 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2309 vncipher $inout,$inout,$rndkey1
2310 lvx $rndkey1,$idx,$key1
2312 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2313 vxor $rndkey0,$rndkey0,$tweak
2314 vncipherlast $output,$inout,$rndkey0
2316 le?vperm $tmp,$output,$output,$leperm
2318 le?stvx_u $tmp,0,$out
2319 be?stvx_u $output,0,$out
2328 lvx $rndkey0,0,$key1
2329 lvx $rndkey1,$idx,$key1
2332 vsrab $tmp,$tweak,$seven # next tweak value
2333 vaddubm $tweak,$tweak,$tweak
2334 vsldoi $tmp,$tmp,$tmp,15
2335 vand $tmp,$tmp,$eighty7
2336 vxor $tweak,$tweak,$tmp
2338 vperm $inout,$inout,$inptail,$inpperm
2339 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2340 vxor $inout,$inout,$tweak
2341 vxor $inout,$inout,$rndkey0
2342 lvx $rndkey0,$idx,$key1
2350 vsrab $tmp,$tweak,$seven # next tweak value
2351 vaddubm $tweak1,$tweak,$tweak
2352 vsldoi $tmp,$tmp,$tmp,15
2353 vand $tmp,$tmp,$eighty7
2354 vxor $tweak1,$tweak1,$tmp
2359 vxor $inout,$inout,$tweak # :-(
2360 vxor $inout,$inout,$tweak1 # :-)
2363 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2364 vncipher $inout,$inout,$rndkey1
2365 lvx $rndkey1,$idx,$key1
2367 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2368 vncipher $inout,$inout,$rndkey0
2369 lvx $rndkey0,$idx,$key1
2371 bdnz Loop_xts_dec_short
2373 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2374 vncipher $inout,$inout,$rndkey1
2375 lvx $rndkey1,$idx,$key1
2377 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2378 vxor $rndkey0,$rndkey0,$tweak1
2379 vncipherlast $output,$inout,$rndkey0
2381 le?vperm $tmp,$output,$output,$leperm
2383 le?stvx_u $tmp,0,$out
2384 be?stvx_u $output,0,$out
2389 lvx $rndkey0,0,$key1
2390 lvx $rndkey1,$idx,$key1
2392 vperm $inout,$inout,$inptail,$inpperm
2393 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2395 lvsr $inpperm,0,$len # $inpperm is no longer needed
2396 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2398 vperm $inptail,$inptail,$tmp,$inpperm
2399 vsel $inout,$inout,$output,$inptail
2401 vxor $rndkey0,$rndkey0,$tweak
2402 vxor $inout,$inout,$rndkey0
2403 lvx $rndkey0,$idx,$key1
2412 bdnz Loop_xts_dec_steal
2415 b Loop_xts_dec # one more time...
2421 vsrab $tmp,$tweak,$seven # next tweak value
2422 vaddubm $tweak,$tweak,$tweak
2423 vsldoi $tmp,$tmp,$tmp,15
2424 vand $tmp,$tmp,$eighty7
2425 vxor $tweak,$tweak,$tmp
2427 le?vperm $tweak,$tweak,$tweak,$leperm
2428 stvx_u $tweak,0,$ivp
2431 mtspr 256,r12 # restore vrsave
2435 .byte 0,12,0x04,0,0x80,6,6,0
2437 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2439 #########################################################################
2440 {{ # Optimized XTS procedures #
2442 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2443 $x00=0 if ($flavour =~ /osx/);
2444 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2445 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2446 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2447 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2448 # v26-v31 last 6 round keys
2449 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2454 _aesp8_xts_encrypt6x:
2455 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2457 li r7,`$FRAME+8*16+15`
2458 li r3,`$FRAME+8*16+31`
2459 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2460 stvx v20,r7,$sp # ABI says so
2483 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2485 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2487 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2489 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2491 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2493 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2495 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2499 xxlor 2, 32+$eighty7, 32+$eighty7
2500 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
2501 xxlor 1, 32+$eighty7, 32+$eighty7
2506 lxvw4x 0, $x40, r6 # load XOR contents
2510 subi $rounds,$rounds,3 # -4 in total
2512 lvx $rndkey0,$x00,$key1 # load key schedule
2514 addi $key1,$key1,0x20
2516 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2517 addi $key_,$sp,$FRAME+15
2521 ?vperm v24,v30,v31,$keyperm
2523 addi $key1,$key1,0x20
2524 stvx v24,$x00,$key_ # off-load round[1]
2525 ?vperm v25,v31,v30,$keyperm
2527 stvx v25,$x10,$key_ # off-load round[2]
2528 addi $key_,$key_,0x20
2529 bdnz Load_xts_enc_key
2532 ?vperm v24,v30,v31,$keyperm
2534 stvx v24,$x00,$key_ # off-load round[3]
2535 ?vperm v25,v31,v26,$keyperm
2537 stvx v25,$x10,$key_ # off-load round[4]
2538 addi $key_,$sp,$FRAME+15 # rewind $key_
2539 ?vperm v26,v26,v27,$keyperm
2541 ?vperm v27,v27,v28,$keyperm
2543 ?vperm v28,v28,v29,$keyperm
2545 ?vperm v29,v29,v30,$keyperm
2546 lvx $twk5,$x70,$key1 # borrow $twk5
2547 ?vperm v30,v30,v31,$keyperm
2548 lvx v24,$x00,$key_ # pre-load round[1]
2549 ?vperm v31,v31,$twk5,$keyperm
2550 lvx v25,$x10,$key_ # pre-load round[2]
2552 # Switch to use the following codes with 0x010101..87 to generate tweak.
2553 # eighty7 = 0x010101..87
2554 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
2555 # vand tmp, tmp, eighty7 # last byte with carry
2556 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
2558 # vpermxor tweak, tweak, tmp, vsx
2560 vperm $in0,$inout,$inptail,$inpperm
2561 subi $inp,$inp,31 # undo "caller"
2562 vxor $twk0,$tweak,$rndkey0
2563 vsrab $tmp,$tweak,$seven # next tweak value
2564 vaddubm $tweak,$tweak,$tweak
2565 vand $tmp,$tmp,$eighty7
2566 vxor $out0,$in0,$twk0
2568 vpermxor $tweak, $tweak, $tmp, $in1
2570 lvx_u $in1,$x10,$inp
2571 vxor $twk1,$tweak,$rndkey0
2572 vsrab $tmp,$tweak,$seven # next tweak value
2573 vaddubm $tweak,$tweak,$tweak
2574 le?vperm $in1,$in1,$in1,$leperm
2575 vand $tmp,$tmp,$eighty7
2576 vxor $out1,$in1,$twk1
2578 vpermxor $tweak, $tweak, $tmp, $in2
2580 lvx_u $in2,$x20,$inp
2581 andi. $taillen,$len,15
2582 vxor $twk2,$tweak,$rndkey0
2583 vsrab $tmp,$tweak,$seven # next tweak value
2584 vaddubm $tweak,$tweak,$tweak
2585 le?vperm $in2,$in2,$in2,$leperm
2586 vand $tmp,$tmp,$eighty7
2587 vxor $out2,$in2,$twk2
2589 vpermxor $tweak, $tweak, $tmp, $in3
2591 lvx_u $in3,$x30,$inp
2592 sub $len,$len,$taillen
2593 vxor $twk3,$tweak,$rndkey0
2594 vsrab $tmp,$tweak,$seven # next tweak value
2595 vaddubm $tweak,$tweak,$tweak
2596 le?vperm $in3,$in3,$in3,$leperm
2597 vand $tmp,$tmp,$eighty7
2598 vxor $out3,$in3,$twk3
2600 vpermxor $tweak, $tweak, $tmp, $in4
2602 lvx_u $in4,$x40,$inp
2604 vxor $twk4,$tweak,$rndkey0
2605 vsrab $tmp,$tweak,$seven # next tweak value
2606 vaddubm $tweak,$tweak,$tweak
2607 le?vperm $in4,$in4,$in4,$leperm
2608 vand $tmp,$tmp,$eighty7
2609 vxor $out4,$in4,$twk4
2611 vpermxor $tweak, $tweak, $tmp, $in5
2613 lvx_u $in5,$x50,$inp
2615 vxor $twk5,$tweak,$rndkey0
2616 vsrab $tmp,$tweak,$seven # next tweak value
2617 vaddubm $tweak,$tweak,$tweak
2618 le?vperm $in5,$in5,$in5,$leperm
2619 vand $tmp,$tmp,$eighty7
2620 vxor $out5,$in5,$twk5
2622 vpermxor $tweak, $tweak, $tmp, $in0
2624 vxor v31,v31,$rndkey0
2630 vcipher $out0,$out0,v24
2631 vcipher $out1,$out1,v24
2632 vcipher $out2,$out2,v24
2633 vcipher $out3,$out3,v24
2634 vcipher $out4,$out4,v24
2635 vcipher $out5,$out5,v24
2636 lvx v24,$x20,$key_ # round[3]
2637 addi $key_,$key_,0x20
2639 vcipher $out0,$out0,v25
2640 vcipher $out1,$out1,v25
2641 vcipher $out2,$out2,v25
2642 vcipher $out3,$out3,v25
2643 vcipher $out4,$out4,v25
2644 vcipher $out5,$out5,v25
2645 lvx v25,$x10,$key_ # round[4]
2648 xxlor 32+$eighty7, 1, 1 # 0x010101..87
2650 subic $len,$len,96 # $len-=96
2651 vxor $in0,$twk0,v31 # xor with last round key
2652 vcipher $out0,$out0,v24
2653 vcipher $out1,$out1,v24
2654 vsrab $tmp,$tweak,$seven # next tweak value
2655 vxor $twk0,$tweak,$rndkey0
2656 vaddubm $tweak,$tweak,$tweak
2657 vcipher $out2,$out2,v24
2658 vcipher $out3,$out3,v24
2659 vcipher $out4,$out4,v24
2660 vcipher $out5,$out5,v24
2662 subfe. r0,r0,r0 # borrow?-1:0
2663 vand $tmp,$tmp,$eighty7
2664 vcipher $out0,$out0,v25
2665 vcipher $out1,$out1,v25
2667 vpermxor $tweak, $tweak, $tmp, $in1
2668 vcipher $out2,$out2,v25
2669 vcipher $out3,$out3,v25
2671 vsrab $tmp,$tweak,$seven # next tweak value
2672 vxor $twk1,$tweak,$rndkey0
2673 vcipher $out4,$out4,v25
2674 vcipher $out5,$out5,v25
2677 vaddubm $tweak,$tweak,$tweak
2678 vcipher $out0,$out0,v26
2679 vcipher $out1,$out1,v26
2680 vand $tmp,$tmp,$eighty7
2681 vcipher $out2,$out2,v26
2682 vcipher $out3,$out3,v26
2684 vpermxor $tweak, $tweak, $tmp, $in2
2685 vcipher $out4,$out4,v26
2686 vcipher $out5,$out5,v26
2688 add $inp,$inp,r0 # $inp is adjusted in such
2689 # way that at exit from the
2690 # loop inX-in5 are loaded
2693 vsrab $tmp,$tweak,$seven # next tweak value
2694 vxor $twk2,$tweak,$rndkey0
2695 vaddubm $tweak,$tweak,$tweak
2696 vcipher $out0,$out0,v27
2697 vcipher $out1,$out1,v27
2698 vcipher $out2,$out2,v27
2699 vcipher $out3,$out3,v27
2700 vand $tmp,$tmp,$eighty7
2701 vcipher $out4,$out4,v27
2702 vcipher $out5,$out5,v27
2704 addi $key_,$sp,$FRAME+15 # rewind $key_
2706 vpermxor $tweak, $tweak, $tmp, $in3
2707 vcipher $out0,$out0,v28
2708 vcipher $out1,$out1,v28
2710 vsrab $tmp,$tweak,$seven # next tweak value
2711 vxor $twk3,$tweak,$rndkey0
2712 vcipher $out2,$out2,v28
2713 vcipher $out3,$out3,v28
2714 vaddubm $tweak,$tweak,$tweak
2715 vcipher $out4,$out4,v28
2716 vcipher $out5,$out5,v28
2717 lvx v24,$x00,$key_ # re-pre-load round[1]
2718 vand $tmp,$tmp,$eighty7
2720 vcipher $out0,$out0,v29
2721 vcipher $out1,$out1,v29
2723 vpermxor $tweak, $tweak, $tmp, $in4
2724 vcipher $out2,$out2,v29
2725 vcipher $out3,$out3,v29
2727 vsrab $tmp,$tweak,$seven # next tweak value
2728 vxor $twk4,$tweak,$rndkey0
2729 vcipher $out4,$out4,v29
2730 vcipher $out5,$out5,v29
2731 lvx v25,$x10,$key_ # re-pre-load round[2]
2732 vaddubm $tweak,$tweak,$tweak
2734 vcipher $out0,$out0,v30
2735 vcipher $out1,$out1,v30
2736 vand $tmp,$tmp,$eighty7
2737 vcipher $out2,$out2,v30
2738 vcipher $out3,$out3,v30
2740 vpermxor $tweak, $tweak, $tmp, $in5
2741 vcipher $out4,$out4,v30
2742 vcipher $out5,$out5,v30
2744 vsrab $tmp,$tweak,$seven # next tweak value
2745 vxor $twk5,$tweak,$rndkey0
2747 vcipherlast $out0,$out0,$in0
2748 lvx_u $in0,$x00,$inp # load next input block
2749 vaddubm $tweak,$tweak,$tweak
2750 vcipherlast $out1,$out1,$in1
2751 lvx_u $in1,$x10,$inp
2752 vcipherlast $out2,$out2,$in2
2753 le?vperm $in0,$in0,$in0,$leperm
2754 lvx_u $in2,$x20,$inp
2755 vand $tmp,$tmp,$eighty7
2756 vcipherlast $out3,$out3,$in3
2757 le?vperm $in1,$in1,$in1,$leperm
2758 lvx_u $in3,$x30,$inp
2759 vcipherlast $out4,$out4,$in4
2760 le?vperm $in2,$in2,$in2,$leperm
2761 lvx_u $in4,$x40,$inp
2762 xxlor 10, 32+$in0, 32+$in0
2764 vpermxor $tweak, $tweak, $tmp, $in0
2765 xxlor 32+$in0, 10, 10
2766 vcipherlast $tmp,$out5,$in5 # last block might be needed
2768 le?vperm $in3,$in3,$in3,$leperm
2769 lvx_u $in5,$x50,$inp
2771 le?vperm $in4,$in4,$in4,$leperm
2772 le?vperm $in5,$in5,$in5,$leperm
2774 le?vperm $out0,$out0,$out0,$leperm
2775 le?vperm $out1,$out1,$out1,$leperm
2776 stvx_u $out0,$x00,$out # store output
2777 vxor $out0,$in0,$twk0
2778 le?vperm $out2,$out2,$out2,$leperm
2779 stvx_u $out1,$x10,$out
2780 vxor $out1,$in1,$twk1
2781 le?vperm $out3,$out3,$out3,$leperm
2782 stvx_u $out2,$x20,$out
2783 vxor $out2,$in2,$twk2
2784 le?vperm $out4,$out4,$out4,$leperm
2785 stvx_u $out3,$x30,$out
2786 vxor $out3,$in3,$twk3
2787 le?vperm $out5,$tmp,$tmp,$leperm
2788 stvx_u $out4,$x40,$out
2789 vxor $out4,$in4,$twk4
2790 le?stvx_u $out5,$x50,$out
2791 be?stvx_u $tmp, $x50,$out
2792 vxor $out5,$in5,$twk5
2796 beq Loop_xts_enc6x # did $len-=96 borrow?
2798 xxlor 32+$eighty7, 2, 2 # 0x010101..87
2800 addic. $len,$len,0x60
2807 blt Lxts_enc6x_three
2812 vxor $out0,$in1,$twk0
2813 vxor $out1,$in2,$twk1
2814 vxor $out2,$in3,$twk2
2815 vxor $out3,$in4,$twk3
2816 vxor $out4,$in5,$twk4
2820 le?vperm $out0,$out0,$out0,$leperm
2821 vmr $twk0,$twk5 # unused tweak
2822 le?vperm $out1,$out1,$out1,$leperm
2823 stvx_u $out0,$x00,$out # store output
2824 le?vperm $out2,$out2,$out2,$leperm
2825 stvx_u $out1,$x10,$out
2826 le?vperm $out3,$out3,$out3,$leperm
2827 stvx_u $out2,$x20,$out
2828 vxor $tmp,$out4,$twk5 # last block prep for stealing
2829 le?vperm $out4,$out4,$out4,$leperm
2830 stvx_u $out3,$x30,$out
2831 stvx_u $out4,$x40,$out
2833 bne Lxts_enc6x_steal
2838 vxor $out0,$in2,$twk0
2839 vxor $out1,$in3,$twk1
2840 vxor $out2,$in4,$twk2
2841 vxor $out3,$in5,$twk3
2842 vxor $out4,$out4,$out4
2846 le?vperm $out0,$out0,$out0,$leperm
2847 vmr $twk0,$twk4 # unused tweak
2848 le?vperm $out1,$out1,$out1,$leperm
2849 stvx_u $out0,$x00,$out # store output
2850 le?vperm $out2,$out2,$out2,$leperm
2851 stvx_u $out1,$x10,$out
2852 vxor $tmp,$out3,$twk4 # last block prep for stealing
2853 le?vperm $out3,$out3,$out3,$leperm
2854 stvx_u $out2,$x20,$out
2855 stvx_u $out3,$x30,$out
2857 bne Lxts_enc6x_steal
2862 vxor $out0,$in3,$twk0
2863 vxor $out1,$in4,$twk1
2864 vxor $out2,$in5,$twk2
2865 vxor $out3,$out3,$out3
2866 vxor $out4,$out4,$out4
2870 le?vperm $out0,$out0,$out0,$leperm
2871 vmr $twk0,$twk3 # unused tweak
2872 le?vperm $out1,$out1,$out1,$leperm
2873 stvx_u $out0,$x00,$out # store output
2874 vxor $tmp,$out2,$twk3 # last block prep for stealing
2875 le?vperm $out2,$out2,$out2,$leperm
2876 stvx_u $out1,$x10,$out
2877 stvx_u $out2,$x20,$out
2879 bne Lxts_enc6x_steal
2884 vxor $out0,$in4,$twk0
2885 vxor $out1,$in5,$twk1
2886 vxor $out2,$out2,$out2
2887 vxor $out3,$out3,$out3
2888 vxor $out4,$out4,$out4
2892 le?vperm $out0,$out0,$out0,$leperm
2893 vmr $twk0,$twk2 # unused tweak
2894 vxor $tmp,$out1,$twk2 # last block prep for stealing
2895 le?vperm $out1,$out1,$out1,$leperm
2896 stvx_u $out0,$x00,$out # store output
2897 stvx_u $out1,$x10,$out
2899 bne Lxts_enc6x_steal
2904 vxor $out0,$in5,$twk0
2907 vcipher $out0,$out0,v24
2908 lvx v24,$x20,$key_ # round[3]
2909 addi $key_,$key_,0x20
2911 vcipher $out0,$out0,v25
2912 lvx v25,$x10,$key_ # round[4]
2915 add $inp,$inp,$taillen
2917 vcipher $out0,$out0,v24
2920 vcipher $out0,$out0,v25
2922 lvsr $inpperm,0,$taillen
2923 vcipher $out0,$out0,v26
2926 vcipher $out0,$out0,v27
2928 addi $key_,$sp,$FRAME+15 # rewind $key_
2929 vcipher $out0,$out0,v28
2930 lvx v24,$x00,$key_ # re-pre-load round[1]
2932 vcipher $out0,$out0,v29
2933 lvx v25,$x10,$key_ # re-pre-load round[2]
2934 vxor $twk0,$twk0,v31
2936 le?vperm $in0,$in0,$in0,$leperm
2937 vcipher $out0,$out0,v30
2939 vperm $in0,$in0,$in0,$inpperm
2940 vcipherlast $out0,$out0,$twk0
2942 vmr $twk0,$twk1 # unused tweak
2943 vxor $tmp,$out0,$twk1 # last block prep for stealing
2944 le?vperm $out0,$out0,$out0,$leperm
2945 stvx_u $out0,$x00,$out # store output
2947 bne Lxts_enc6x_steal
2955 add $inp,$inp,$taillen
2958 lvsr $inpperm,0,$taillen # $in5 is no more
2959 le?vperm $in0,$in0,$in0,$leperm
2960 vperm $in0,$in0,$in0,$inpperm
2961 vxor $tmp,$tmp,$twk0
2963 vxor $in0,$in0,$twk0
2964 vxor $out0,$out0,$out0
2966 vperm $out0,$out0,$out1,$inpperm
2967 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2972 Loop_xts_enc6x_steal:
2975 bdnz Loop_xts_enc6x_steal
2979 b Loop_xts_enc1x # one more time...
2986 vxor $tweak,$twk0,$rndkey0
2987 le?vperm $tweak,$tweak,$tweak,$leperm
2988 stvx_u $tweak,0,$ivp
2994 stvx $seven,r10,$sp # wipe copies of round keys
3012 lvx v20,r10,$sp # ABI says so
3034 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3035 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3036 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3037 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3038 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3039 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3040 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3043 .byte 0,12,0x04,1,0x80,6,6,0
3048 vcipher $out0,$out0,v24
3049 vcipher $out1,$out1,v24
3050 vcipher $out2,$out2,v24
3051 vcipher $out3,$out3,v24
3052 vcipher $out4,$out4,v24
3053 lvx v24,$x20,$key_ # round[3]
3054 addi $key_,$key_,0x20
3056 vcipher $out0,$out0,v25
3057 vcipher $out1,$out1,v25
3058 vcipher $out2,$out2,v25
3059 vcipher $out3,$out3,v25
3060 vcipher $out4,$out4,v25
3061 lvx v25,$x10,$key_ # round[4]
3062 bdnz _aesp8_xts_enc5x
3064 add $inp,$inp,$taillen
3066 vcipher $out0,$out0,v24
3067 vcipher $out1,$out1,v24
3068 vcipher $out2,$out2,v24
3069 vcipher $out3,$out3,v24
3070 vcipher $out4,$out4,v24
3073 vcipher $out0,$out0,v25
3074 vcipher $out1,$out1,v25
3075 vcipher $out2,$out2,v25
3076 vcipher $out3,$out3,v25
3077 vcipher $out4,$out4,v25
3078 vxor $twk0,$twk0,v31
3080 vcipher $out0,$out0,v26
3081 lvsr $inpperm,r0,$taillen # $in5 is no more
3082 vcipher $out1,$out1,v26
3083 vcipher $out2,$out2,v26
3084 vcipher $out3,$out3,v26
3085 vcipher $out4,$out4,v26
3088 vcipher $out0,$out0,v27
3090 vcipher $out1,$out1,v27
3091 vcipher $out2,$out2,v27
3092 vcipher $out3,$out3,v27
3093 vcipher $out4,$out4,v27
3096 addi $key_,$sp,$FRAME+15 # rewind $key_
3097 vcipher $out0,$out0,v28
3098 vcipher $out1,$out1,v28
3099 vcipher $out2,$out2,v28
3100 vcipher $out3,$out3,v28
3101 vcipher $out4,$out4,v28
3102 lvx v24,$x00,$key_ # re-pre-load round[1]
3105 vcipher $out0,$out0,v29
3106 le?vperm $in0,$in0,$in0,$leperm
3107 vcipher $out1,$out1,v29
3108 vcipher $out2,$out2,v29
3109 vcipher $out3,$out3,v29
3110 vcipher $out4,$out4,v29
3111 lvx v25,$x10,$key_ # re-pre-load round[2]
3114 vcipher $out0,$out0,v30
3115 vperm $in0,$in0,$in0,$inpperm
3116 vcipher $out1,$out1,v30
3117 vcipher $out2,$out2,v30
3118 vcipher $out3,$out3,v30
3119 vcipher $out4,$out4,v30
3121 vcipherlast $out0,$out0,$twk0
3122 vcipherlast $out1,$out1,$in1
3123 vcipherlast $out2,$out2,$in2
3124 vcipherlast $out3,$out3,$in3
3125 vcipherlast $out4,$out4,$in4
3128 .byte 0,12,0x14,0,0,0,0,0
3131 _aesp8_xts_decrypt6x:
3132 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3134 li r7,`$FRAME+8*16+15`
3135 li r3,`$FRAME+8*16+31`
3136 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3137 stvx v20,r7,$sp # ABI says so
3160 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3162 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3164 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3166 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3168 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3170 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3172 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3176 xxlor 2, 32+$eighty7, 32+$eighty7
3177 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
3178 xxlor 1, 32+$eighty7, 32+$eighty7
3183 lxvw4x 0, $x40, r6 # load XOR contents
3187 subi $rounds,$rounds,3 # -4 in total
3189 lvx $rndkey0,$x00,$key1 # load key schedule
3191 addi $key1,$key1,0x20
3193 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3194 addi $key_,$sp,$FRAME+15
3198 ?vperm v24,v30,v31,$keyperm
3200 addi $key1,$key1,0x20
3201 stvx v24,$x00,$key_ # off-load round[1]
3202 ?vperm v25,v31,v30,$keyperm
3204 stvx v25,$x10,$key_ # off-load round[2]
3205 addi $key_,$key_,0x20
3206 bdnz Load_xts_dec_key
3209 ?vperm v24,v30,v31,$keyperm
3211 stvx v24,$x00,$key_ # off-load round[3]
3212 ?vperm v25,v31,v26,$keyperm
3214 stvx v25,$x10,$key_ # off-load round[4]
3215 addi $key_,$sp,$FRAME+15 # rewind $key_
3216 ?vperm v26,v26,v27,$keyperm
3218 ?vperm v27,v27,v28,$keyperm
3220 ?vperm v28,v28,v29,$keyperm
3222 ?vperm v29,v29,v30,$keyperm
3223 lvx $twk5,$x70,$key1 # borrow $twk5
3224 ?vperm v30,v30,v31,$keyperm
3225 lvx v24,$x00,$key_ # pre-load round[1]
3226 ?vperm v31,v31,$twk5,$keyperm
3227 lvx v25,$x10,$key_ # pre-load round[2]
3229 vperm $in0,$inout,$inptail,$inpperm
3230 subi $inp,$inp,31 # undo "caller"
3231 vxor $twk0,$tweak,$rndkey0
3232 vsrab $tmp,$tweak,$seven # next tweak value
3233 vaddubm $tweak,$tweak,$tweak
3234 vand $tmp,$tmp,$eighty7
3235 vxor $out0,$in0,$twk0
3237 vpermxor $tweak, $tweak, $tmp, $in1
3239 lvx_u $in1,$x10,$inp
3240 vxor $twk1,$tweak,$rndkey0
3241 vsrab $tmp,$tweak,$seven # next tweak value
3242 vaddubm $tweak,$tweak,$tweak
3243 le?vperm $in1,$in1,$in1,$leperm
3244 vand $tmp,$tmp,$eighty7
3245 vxor $out1,$in1,$twk1
3247 vpermxor $tweak, $tweak, $tmp, $in2
3249 lvx_u $in2,$x20,$inp
3250 andi. $taillen,$len,15
3251 vxor $twk2,$tweak,$rndkey0
3252 vsrab $tmp,$tweak,$seven # next tweak value
3253 vaddubm $tweak,$tweak,$tweak
3254 le?vperm $in2,$in2,$in2,$leperm
3255 vand $tmp,$tmp,$eighty7
3256 vxor $out2,$in2,$twk2
3258 vpermxor $tweak, $tweak, $tmp, $in3
3260 lvx_u $in3,$x30,$inp
3261 sub $len,$len,$taillen
3262 vxor $twk3,$tweak,$rndkey0
3263 vsrab $tmp,$tweak,$seven # next tweak value
3264 vaddubm $tweak,$tweak,$tweak
3265 le?vperm $in3,$in3,$in3,$leperm
3266 vand $tmp,$tmp,$eighty7
3267 vxor $out3,$in3,$twk3
3269 vpermxor $tweak, $tweak, $tmp, $in4
3271 lvx_u $in4,$x40,$inp
3273 vxor $twk4,$tweak,$rndkey0
3274 vsrab $tmp,$tweak,$seven # next tweak value
3275 vaddubm $tweak,$tweak,$tweak
3276 le?vperm $in4,$in4,$in4,$leperm
3277 vand $tmp,$tmp,$eighty7
3278 vxor $out4,$in4,$twk4
3280 vpermxor $tweak, $tweak, $tmp, $in5
3282 lvx_u $in5,$x50,$inp
3284 vxor $twk5,$tweak,$rndkey0
3285 vsrab $tmp,$tweak,$seven # next tweak value
3286 vaddubm $tweak,$tweak,$tweak
3287 le?vperm $in5,$in5,$in5,$leperm
3288 vand $tmp,$tmp,$eighty7
3289 vxor $out5,$in5,$twk5
3291 vpermxor $tweak, $tweak, $tmp, $in0
3293 vxor v31,v31,$rndkey0
3299 vncipher $out0,$out0,v24
3300 vncipher $out1,$out1,v24
3301 vncipher $out2,$out2,v24
3302 vncipher $out3,$out3,v24
3303 vncipher $out4,$out4,v24
3304 vncipher $out5,$out5,v24
3305 lvx v24,$x20,$key_ # round[3]
3306 addi $key_,$key_,0x20
3308 vncipher $out0,$out0,v25
3309 vncipher $out1,$out1,v25
3310 vncipher $out2,$out2,v25
3311 vncipher $out3,$out3,v25
3312 vncipher $out4,$out4,v25
3313 vncipher $out5,$out5,v25
3314 lvx v25,$x10,$key_ # round[4]
3317 xxlor 32+$eighty7, 1, 1 # 0x010101..87
3319 subic $len,$len,96 # $len-=96
3320 vxor $in0,$twk0,v31 # xor with last round key
3321 vncipher $out0,$out0,v24
3322 vncipher $out1,$out1,v24
3323 vsrab $tmp,$tweak,$seven # next tweak value
3324 vxor $twk0,$tweak,$rndkey0
3325 vaddubm $tweak,$tweak,$tweak
3326 vncipher $out2,$out2,v24
3327 vncipher $out3,$out3,v24
3328 vncipher $out4,$out4,v24
3329 vncipher $out5,$out5,v24
3331 subfe. r0,r0,r0 # borrow?-1:0
3332 vand $tmp,$tmp,$eighty7
3333 vncipher $out0,$out0,v25
3334 vncipher $out1,$out1,v25
3336 vpermxor $tweak, $tweak, $tmp, $in1
3337 vncipher $out2,$out2,v25
3338 vncipher $out3,$out3,v25
3340 vsrab $tmp,$tweak,$seven # next tweak value
3341 vxor $twk1,$tweak,$rndkey0
3342 vncipher $out4,$out4,v25
3343 vncipher $out5,$out5,v25
3346 vaddubm $tweak,$tweak,$tweak
3347 vncipher $out0,$out0,v26
3348 vncipher $out1,$out1,v26
3349 vand $tmp,$tmp,$eighty7
3350 vncipher $out2,$out2,v26
3351 vncipher $out3,$out3,v26
3353 vpermxor $tweak, $tweak, $tmp, $in2
3354 vncipher $out4,$out4,v26
3355 vncipher $out5,$out5,v26
3357 add $inp,$inp,r0 # $inp is adjusted in such
3358 # way that at exit from the
3359 # loop inX-in5 are loaded
3362 vsrab $tmp,$tweak,$seven # next tweak value
3363 vxor $twk2,$tweak,$rndkey0
3364 vaddubm $tweak,$tweak,$tweak
3365 vncipher $out0,$out0,v27
3366 vncipher $out1,$out1,v27
3367 vncipher $out2,$out2,v27
3368 vncipher $out3,$out3,v27
3369 vand $tmp,$tmp,$eighty7
3370 vncipher $out4,$out4,v27
3371 vncipher $out5,$out5,v27
3373 addi $key_,$sp,$FRAME+15 # rewind $key_
3375 vpermxor $tweak, $tweak, $tmp, $in3
3376 vncipher $out0,$out0,v28
3377 vncipher $out1,$out1,v28
3379 vsrab $tmp,$tweak,$seven # next tweak value
3380 vxor $twk3,$tweak,$rndkey0
3381 vncipher $out2,$out2,v28
3382 vncipher $out3,$out3,v28
3383 vaddubm $tweak,$tweak,$tweak
3384 vncipher $out4,$out4,v28
3385 vncipher $out5,$out5,v28
3386 lvx v24,$x00,$key_ # re-pre-load round[1]
3387 vand $tmp,$tmp,$eighty7
3389 vncipher $out0,$out0,v29
3390 vncipher $out1,$out1,v29
3392 vpermxor $tweak, $tweak, $tmp, $in4
3393 vncipher $out2,$out2,v29
3394 vncipher $out3,$out3,v29
3396 vsrab $tmp,$tweak,$seven # next tweak value
3397 vxor $twk4,$tweak,$rndkey0
3398 vncipher $out4,$out4,v29
3399 vncipher $out5,$out5,v29
3400 lvx v25,$x10,$key_ # re-pre-load round[2]
3401 vaddubm $tweak,$tweak,$tweak
3403 vncipher $out0,$out0,v30
3404 vncipher $out1,$out1,v30
3405 vand $tmp,$tmp,$eighty7
3406 vncipher $out2,$out2,v30
3407 vncipher $out3,$out3,v30
3409 vpermxor $tweak, $tweak, $tmp, $in5
3410 vncipher $out4,$out4,v30
3411 vncipher $out5,$out5,v30
3413 vsrab $tmp,$tweak,$seven # next tweak value
3414 vxor $twk5,$tweak,$rndkey0
3416 vncipherlast $out0,$out0,$in0
3417 lvx_u $in0,$x00,$inp # load next input block
3418 vaddubm $tweak,$tweak,$tweak
3419 vncipherlast $out1,$out1,$in1
3420 lvx_u $in1,$x10,$inp
3421 vncipherlast $out2,$out2,$in2
3422 le?vperm $in0,$in0,$in0,$leperm
3423 lvx_u $in2,$x20,$inp
3424 vand $tmp,$tmp,$eighty7
3425 vncipherlast $out3,$out3,$in3
3426 le?vperm $in1,$in1,$in1,$leperm
3427 lvx_u $in3,$x30,$inp
3428 vncipherlast $out4,$out4,$in4
3429 le?vperm $in2,$in2,$in2,$leperm
3430 lvx_u $in4,$x40,$inp
3431 xxlor 10, 32+$in0, 32+$in0
3433 vpermxor $tweak, $tweak, $tmp, $in0
3434 xxlor 32+$in0, 10, 10
3435 vncipherlast $out5,$out5,$in5
3436 le?vperm $in3,$in3,$in3,$leperm
3437 lvx_u $in5,$x50,$inp
3439 le?vperm $in4,$in4,$in4,$leperm
3440 le?vperm $in5,$in5,$in5,$leperm
3442 le?vperm $out0,$out0,$out0,$leperm
3443 le?vperm $out1,$out1,$out1,$leperm
3444 stvx_u $out0,$x00,$out # store output
3445 vxor $out0,$in0,$twk0
3446 le?vperm $out2,$out2,$out2,$leperm
3447 stvx_u $out1,$x10,$out
3448 vxor $out1,$in1,$twk1
3449 le?vperm $out3,$out3,$out3,$leperm
3450 stvx_u $out2,$x20,$out
3451 vxor $out2,$in2,$twk2
3452 le?vperm $out4,$out4,$out4,$leperm
3453 stvx_u $out3,$x30,$out
3454 vxor $out3,$in3,$twk3
3455 le?vperm $out5,$out5,$out5,$leperm
3456 stvx_u $out4,$x40,$out
3457 vxor $out4,$in4,$twk4
3458 stvx_u $out5,$x50,$out
3459 vxor $out5,$in5,$twk5
3463 beq Loop_xts_dec6x # did $len-=96 borrow?
3465 xxlor 32+$eighty7, 2, 2 # 0x010101..87
3467 addic. $len,$len,0x60
3474 blt Lxts_dec6x_three
3479 vxor $out0,$in1,$twk0
3480 vxor $out1,$in2,$twk1
3481 vxor $out2,$in3,$twk2
3482 vxor $out3,$in4,$twk3
3483 vxor $out4,$in5,$twk4
3487 le?vperm $out0,$out0,$out0,$leperm
3488 vmr $twk0,$twk5 # unused tweak
3489 vxor $twk1,$tweak,$rndkey0
3490 le?vperm $out1,$out1,$out1,$leperm
3491 stvx_u $out0,$x00,$out # store output
3492 vxor $out0,$in0,$twk1
3493 le?vperm $out2,$out2,$out2,$leperm
3494 stvx_u $out1,$x10,$out
3495 le?vperm $out3,$out3,$out3,$leperm
3496 stvx_u $out2,$x20,$out
3497 le?vperm $out4,$out4,$out4,$leperm
3498 stvx_u $out3,$x30,$out
3499 stvx_u $out4,$x40,$out
3501 bne Lxts_dec6x_steal
3506 vxor $out0,$in2,$twk0
3507 vxor $out1,$in3,$twk1
3508 vxor $out2,$in4,$twk2
3509 vxor $out3,$in5,$twk3
3510 vxor $out4,$out4,$out4
3514 le?vperm $out0,$out0,$out0,$leperm
3515 vmr $twk0,$twk4 # unused tweak
3517 le?vperm $out1,$out1,$out1,$leperm
3518 stvx_u $out0,$x00,$out # store output
3519 vxor $out0,$in0,$twk5
3520 le?vperm $out2,$out2,$out2,$leperm
3521 stvx_u $out1,$x10,$out
3522 le?vperm $out3,$out3,$out3,$leperm
3523 stvx_u $out2,$x20,$out
3524 stvx_u $out3,$x30,$out
3526 bne Lxts_dec6x_steal
3531 vxor $out0,$in3,$twk0
3532 vxor $out1,$in4,$twk1
3533 vxor $out2,$in5,$twk2
3534 vxor $out3,$out3,$out3
3535 vxor $out4,$out4,$out4
3539 le?vperm $out0,$out0,$out0,$leperm
3540 vmr $twk0,$twk3 # unused tweak
3542 le?vperm $out1,$out1,$out1,$leperm
3543 stvx_u $out0,$x00,$out # store output
3544 vxor $out0,$in0,$twk4
3545 le?vperm $out2,$out2,$out2,$leperm
3546 stvx_u $out1,$x10,$out
3547 stvx_u $out2,$x20,$out
3549 bne Lxts_dec6x_steal
3554 vxor $out0,$in4,$twk0
3555 vxor $out1,$in5,$twk1
3556 vxor $out2,$out2,$out2
3557 vxor $out3,$out3,$out3
3558 vxor $out4,$out4,$out4
3562 le?vperm $out0,$out0,$out0,$leperm
3563 vmr $twk0,$twk2 # unused tweak
3565 le?vperm $out1,$out1,$out1,$leperm
3566 stvx_u $out0,$x00,$out # store output
3567 vxor $out0,$in0,$twk3
3568 stvx_u $out1,$x10,$out
3570 bne Lxts_dec6x_steal
3575 vxor $out0,$in5,$twk0
3578 vncipher $out0,$out0,v24
3579 lvx v24,$x20,$key_ # round[3]
3580 addi $key_,$key_,0x20
3582 vncipher $out0,$out0,v25
3583 lvx v25,$x10,$key_ # round[4]
3587 vncipher $out0,$out0,v24
3591 vncipher $out0,$out0,v25
3594 vncipher $out0,$out0,v26
3597 vncipher $out0,$out0,v27
3599 addi $key_,$sp,$FRAME+15 # rewind $key_
3600 vncipher $out0,$out0,v28
3601 lvx v24,$x00,$key_ # re-pre-load round[1]
3603 vncipher $out0,$out0,v29
3604 lvx v25,$x10,$key_ # re-pre-load round[2]
3605 vxor $twk0,$twk0,v31
3607 le?vperm $in0,$in0,$in0,$leperm
3608 vncipher $out0,$out0,v30
3611 vncipherlast $out0,$out0,$twk0
3613 vmr $twk0,$twk1 # unused tweak
3615 le?vperm $out0,$out0,$out0,$leperm
3616 stvx_u $out0,$x00,$out # store output
3618 vxor $out0,$in0,$twk2
3619 bne Lxts_dec6x_steal
3628 le?vperm $in0,$in0,$in0,$leperm
3629 vxor $out0,$in0,$twk1
3631 vncipher $out0,$out0,v24
3632 lvx v24,$x20,$key_ # round[3]
3633 addi $key_,$key_,0x20
3635 vncipher $out0,$out0,v25
3636 lvx v25,$x10,$key_ # round[4]
3637 bdnz Lxts_dec6x_steal
3639 add $inp,$inp,$taillen
3640 vncipher $out0,$out0,v24
3643 vncipher $out0,$out0,v25
3646 vncipher $out0,$out0,v26
3648 lvsr $inpperm,0,$taillen # $in5 is no more
3649 vncipher $out0,$out0,v27
3651 addi $key_,$sp,$FRAME+15 # rewind $key_
3652 vncipher $out0,$out0,v28
3653 lvx v24,$x00,$key_ # re-pre-load round[1]
3655 vncipher $out0,$out0,v29
3656 lvx v25,$x10,$key_ # re-pre-load round[2]
3657 vxor $twk1,$twk1,v31
3659 le?vperm $in0,$in0,$in0,$leperm
3660 vncipher $out0,$out0,v30
3662 vperm $in0,$in0,$in0,$inpperm
3663 vncipherlast $tmp,$out0,$twk1
3665 le?vperm $out0,$tmp,$tmp,$leperm
3666 le?stvx_u $out0,0,$out
3667 be?stvx_u $tmp,0,$out
3669 vxor $out0,$out0,$out0
3671 vperm $out0,$out0,$out1,$inpperm
3672 vsel $out0,$in0,$tmp,$out0
3673 vxor $out0,$out0,$twk0
3677 Loop_xts_dec6x_steal:
3680 bdnz Loop_xts_dec6x_steal
3684 b Loop_xts_dec1x # one more time...
3691 vxor $tweak,$twk0,$rndkey0
3692 le?vperm $tweak,$tweak,$tweak,$leperm
3693 stvx_u $tweak,0,$ivp
3699 stvx $seven,r10,$sp # wipe copies of round keys
3717 lvx v20,r10,$sp # ABI says so
3739 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3740 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3741 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3742 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3743 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3744 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3745 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3748 .byte 0,12,0x04,1,0x80,6,6,0
3753 vncipher $out0,$out0,v24
3754 vncipher $out1,$out1,v24
3755 vncipher $out2,$out2,v24
3756 vncipher $out3,$out3,v24
3757 vncipher $out4,$out4,v24
3758 lvx v24,$x20,$key_ # round[3]
3759 addi $key_,$key_,0x20
3761 vncipher $out0,$out0,v25
3762 vncipher $out1,$out1,v25
3763 vncipher $out2,$out2,v25
3764 vncipher $out3,$out3,v25
3765 vncipher $out4,$out4,v25
3766 lvx v25,$x10,$key_ # round[4]
3767 bdnz _aesp8_xts_dec5x
3770 vncipher $out0,$out0,v24
3771 vncipher $out1,$out1,v24
3772 vncipher $out2,$out2,v24
3773 vncipher $out3,$out3,v24
3774 vncipher $out4,$out4,v24
3778 vncipher $out0,$out0,v25
3779 vncipher $out1,$out1,v25
3780 vncipher $out2,$out2,v25
3781 vncipher $out3,$out3,v25
3782 vncipher $out4,$out4,v25
3783 vxor $twk0,$twk0,v31
3786 vncipher $out0,$out0,v26
3787 vncipher $out1,$out1,v26
3788 vncipher $out2,$out2,v26
3789 vncipher $out3,$out3,v26
3790 vncipher $out4,$out4,v26
3793 vncipher $out0,$out0,v27
3795 vncipher $out1,$out1,v27
3796 vncipher $out2,$out2,v27
3797 vncipher $out3,$out3,v27
3798 vncipher $out4,$out4,v27
3801 addi $key_,$sp,$FRAME+15 # rewind $key_
3802 vncipher $out0,$out0,v28
3803 vncipher $out1,$out1,v28
3804 vncipher $out2,$out2,v28
3805 vncipher $out3,$out3,v28
3806 vncipher $out4,$out4,v28
3807 lvx v24,$x00,$key_ # re-pre-load round[1]
3810 vncipher $out0,$out0,v29
3811 le?vperm $in0,$in0,$in0,$leperm
3812 vncipher $out1,$out1,v29
3813 vncipher $out2,$out2,v29
3814 vncipher $out3,$out3,v29
3815 vncipher $out4,$out4,v29
3816 lvx v25,$x10,$key_ # re-pre-load round[2]
3819 vncipher $out0,$out0,v30
3820 vncipher $out1,$out1,v30
3821 vncipher $out2,$out2,v30
3822 vncipher $out3,$out3,v30
3823 vncipher $out4,$out4,v30
3825 vncipherlast $out0,$out0,$twk0
3826 vncipherlast $out1,$out1,$in1
3827 vncipherlast $out2,$out2,$in2
3828 vncipherlast $out3,$out3,$in3
3829 vncipherlast $out4,$out4,$in4
3833 .byte 0,12,0x14,0,0,0,0,0
3838 foreach(split("\n",$code)) {
3839 s/\`([^\`]*)\`/eval($1)/geo;
3841 # constants table endian-specific conversion
3842 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3846 # convert to endian-agnostic format
3848 foreach (split(/,\s*/,$2)) {
3849 my $l = /^0/?oct:int;
3850 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3853 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3856 # little-endian conversion
3857 if ($flavour =~ /le$/o) {
3858 SWITCH: for($conv) {
3859 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3860 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3865 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3868 $consts=0 if (m/Lconsts:/o); # end of table
3870 # instructions prefixed with '?' are endian-specific and need
3871 # to be adjusted accordingly...
3872 if ($flavour =~ /le$/o) { # little-endian
3877 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3878 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3879 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3880 } else { # big-endian