2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
10 # [1] https://www.openssl.org/~appro/cryptogams/
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
19 # * Redistributions of source code must retain copyright notices,
20 # this list of conditions and the following disclaimer.
22 # * Redistributions in binary form must reproduce the above
23 # copyright notice, this list of conditions and the following
24 # disclaimer in the documentation and/or other materials
25 # provided with the distribution.
27 # * Neither the name of the CRYPTOGAMS nor the names of its
28 # copyright holder and contributors may be used to endorse or
29 # promote products derived from this software without specific
30 # prior written permission.
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see https://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
79 # CBC en-/decrypt CTR XTS
80 # POWER8[le] 3.96/0.72 0.74 1.1
81 # POWER8[be] 3.75/0.65 0.66 1.0
85 if ($flavour =~ /64/) {
93 } elsif ($flavour =~ /32/) {
101 } else { die "nonsense $flavour"; }
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
118 #########################################################################
119 {{{ # Key setup procedures #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
131 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
138 mflr $ptr #vvvvv "distance between . and rcon
143 .byte 0,12,0x14,0,0,0,0,0
144 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146 .globl .${prefix}_set_encrypt_key
149 $PUSH r11,$LRSAVE($sp)
153 beq- Lenc_key_abort # if ($inp==0) return -1;
155 beq- Lenc_key_abort # if ($out==0) return -1;
173 addi $inp,$inp,15 # 15 is not typo
174 lvsr $key,0,r9 # borrow $key
178 le?vspltisb $mask,0x0f # borrow $mask
180 le?vxor $key,$key,$mask # adjust for byte swap
183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
185 vxor $zero,$zero,$zero
188 ?lvsr $outperm,0,$out
191 ?vperm $outmask,$zero,$outmask,$outperm
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
211 vsldoi $tmp,$zero,$tmp,12 # >>32
213 vsldoi $tmp,$zero,$tmp,12 # >>32
215 vadduwm $rcon,$rcon,$rcon
219 lvx $rcon,0,$ptr # last two round keys
221 vperm $key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi $tmp,$zero,$in0,12 # >>32
223 vperm $outtail,$in0,$in0,$outperm # rotate
224 vsel $stage,$outhead,$outtail,$outmask
225 vmr $outhead,$outtail
226 vcipherlast $key,$key,$rcon
231 vsldoi $tmp,$zero,$tmp,12 # >>32
233 vsldoi $tmp,$zero,$tmp,12 # >>32
235 vadduwm $rcon,$rcon,$rcon
238 vperm $key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi $tmp,$zero,$in0,12 # >>32
240 vperm $outtail,$in0,$in0,$outperm # rotate
241 vsel $stage,$outhead,$outtail,$outmask
242 vmr $outhead,$outtail
243 vcipherlast $key,$key,$rcon
248 vsldoi $tmp,$zero,$tmp,12 # >>32
250 vsldoi $tmp,$zero,$tmp,12 # >>32
253 vperm $outtail,$in0,$in0,$outperm # rotate
254 vsel $stage,$outhead,$outtail,$outmask
255 vmr $outhead,$outtail
258 addi $inp,$out,15 # 15 is not typo
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb $key,8 # borrow $key
276 vsububm $mask,$mask,$key # adjust the mask
279 vperm $key,$in1,$in1,$mask # roate-n-splat
280 vsldoi $tmp,$zero,$in0,12 # >>32
281 vcipherlast $key,$key,$rcon
284 vsldoi $tmp,$zero,$tmp,12 # >>32
286 vsldoi $tmp,$zero,$tmp,12 # >>32
289 vsldoi $stage,$zero,$in1,8
292 vsldoi $in1,$zero,$in1,12 # >>32
293 vadduwm $rcon,$rcon,$rcon
297 vsldoi $stage,$stage,$in0,8
299 vperm $key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi $tmp,$zero,$in0,12 # >>32
301 vperm $outtail,$stage,$stage,$outperm # rotate
302 vsel $stage,$outhead,$outtail,$outmask
303 vmr $outhead,$outtail
304 vcipherlast $key,$key,$rcon
308 vsldoi $stage,$in0,$in1,8
310 vsldoi $tmp,$zero,$tmp,12 # >>32
311 vperm $outtail,$stage,$stage,$outperm # rotate
312 vsel $stage,$outhead,$outtail,$outmask
313 vmr $outhead,$outtail
315 vsldoi $tmp,$zero,$tmp,12 # >>32
322 vsldoi $in1,$zero,$in1,12 # >>32
323 vadduwm $rcon,$rcon,$rcon
327 vperm $outtail,$in0,$in0,$outperm # rotate
328 vsel $stage,$outhead,$outtail,$outmask
329 vmr $outhead,$outtail
331 addi $inp,$out,15 # 15 is not typo
344 vperm $outtail,$in0,$in0,$outperm # rotate
345 vsel $stage,$outhead,$outtail,$outmask
346 vmr $outhead,$outtail
349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
353 vperm $key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi $tmp,$zero,$in0,12 # >>32
355 vperm $outtail,$in1,$in1,$outperm # rotate
356 vsel $stage,$outhead,$outtail,$outmask
357 vmr $outhead,$outtail
358 vcipherlast $key,$key,$rcon
363 vsldoi $tmp,$zero,$tmp,12 # >>32
365 vsldoi $tmp,$zero,$tmp,12 # >>32
367 vadduwm $rcon,$rcon,$rcon
369 vperm $outtail,$in0,$in0,$outperm # rotate
370 vsel $stage,$outhead,$outtail,$outmask
371 vmr $outhead,$outtail
373 addi $inp,$out,15 # 15 is not typo
377 vspltw $key,$in0,3 # just splat
378 vsldoi $tmp,$zero,$in1,12 # >>32
382 vsldoi $tmp,$zero,$tmp,12 # >>32
384 vsldoi $tmp,$zero,$tmp,12 # >>32
392 lvx $in1,0,$inp # redundant in aligned case
393 vsel $in1,$outhead,$in1,$outmask
403 .byte 0,12,0x14,1,0,0,3,0
405 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
407 .globl .${prefix}_set_decrypt_key
408 $STU $sp,-$FRAME($sp)
410 $PUSH r10,$FRAME+$LRSAVE($sp)
418 subi $inp,$out,240 # first round key
419 srwi $rounds,$rounds,1
420 add $out,$inp,$cnt # last round key
444 xor r3,r3,r3 # return value
449 .byte 0,12,4,1,0x80,0,3,0
451 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
454 #########################################################################
455 {{{ # Single block en- and decrypt procedures #
458 my $n = $dir eq "de" ? "n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
462 .globl .${prefix}_${dir}crypt
463 lwz $rounds,240($key)
466 li $idx,15 # 15 is not typo
472 lvsl v2,0,$inp # inpperm
474 ?lvsl v3,0,r11 # outperm
477 vperm v0,v0,v1,v2 # align [and byte swap in LE]
479 ?lvsl v5,0,$key # keyperm
480 srwi $rounds,$rounds,1
483 subi $rounds,$rounds,1
484 ?vperm v1,v1,v2,v5 # align round key
506 v${n}cipherlast v0,v0,v1
510 li $idx,15 # 15 is not typo
511 ?vperm v2,v1,v2,v3 # outmask
513 lvx v1,0,$out # outhead
514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
524 .byte 0,12,0x14,0,0,0,3,0
526 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
534 foreach(split("\n",$code)) {
535 s/\`([^\`]*)\`/eval($1)/geo;
537 # constants table endian-specific conversion
538 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
542 # convert to endian-agnostic format
544 foreach (split(/,\s*/,$2)) {
545 my $l = /^0/?oct:int;
546 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
549 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
552 # little-endian conversion
553 if ($flavour =~ /le$/o) {
555 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
556 /\?rev/ && do { @bytes=reverse(@bytes); last; };
561 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
564 $consts=0 if (m/Lconsts:/o); # end of table
566 # instructions prefixed with '?' are endian-specific and need
567 # to be adjusted accordingly...
568 if ($flavour =~ /le$/o) { # little-endian
573 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
574 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
575 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
576 } else { # big-endian