You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
5502 lines
111 KiB
5502 lines
111 KiB
diff -up openssl-1.1.1e/crypto/chacha/asm/chacha-s390x.pl.s390x-update openssl-1.1.1e/crypto/chacha/asm/chacha-s390x.pl |
|
--- openssl-1.1.1e/crypto/chacha/asm/chacha-s390x.pl.s390x-update 2020-03-17 15:31:17.000000000 +0100 |
|
+++ openssl-1.1.1e/crypto/chacha/asm/chacha-s390x.pl 2020-03-19 16:45:05.483440129 +0100 |
|
@@ -20,41 +20,53 @@ |
|
# |
|
# 3 times faster than compiler-generated code. |
|
|
|
-$flavour = shift; |
|
+# |
|
+# August 2018 |
|
+# |
|
+# Add vx code path: 4x"vertical". |
|
+# |
|
+# Copyright IBM Corp. 2018 |
|
+# Author: Patrick Steuer <patrick.steuer@de.ibm.com> |
|
+ |
|
+# |
|
+# February 2019 |
|
+# |
|
+# Add 6x"horizontal" VX implementation. It's ~25% faster than IBM's |
|
+# 4x"vertical" submission [on z13] and >3 faster than scalar code. |
|
+# But to harness overheads revert to transliteration of VSX code path |
|
+# from chacha-ppc module, which is also 4x"vertical", to handle inputs |
|
+# not longer than 256 bytes. |
|
+ |
|
+use strict; |
|
+use FindBin qw($Bin); |
|
+use lib "$Bin/../.."; |
|
+use perlasm::s390x qw(:DEFAULT :VX AUTOLOAD LABEL INCLUDE); |
|
|
|
+my $flavour = shift; |
|
+ |
|
+my ($z,$SIZE_T); |
|
if ($flavour =~ /3[12]/) { |
|
+ $z=0; # S/390 ABI |
|
$SIZE_T=4; |
|
- $g=""; |
|
} else { |
|
+ $z=1; # zSeries ABI |
|
$SIZE_T=8; |
|
- $g="g"; |
|
} |
|
|
|
+my $output; |
|
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} |
|
-open STDOUT,">$output"; |
|
- |
|
-sub AUTOLOAD() # thunk [simplified] x86-style perlasm |
|
-{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; |
|
- $code .= "\t$opcode\t".join(',',@_)."\n"; |
|
-} |
|
|
|
my $sp="%r15"; |
|
- |
|
my $stdframe=16*$SIZE_T+4*8; |
|
-my $frame=$stdframe+4*20; |
|
- |
|
-my ($out,$inp,$len,$key,$counter)=map("%r$_",(2..6)); |
|
|
|
+sub ROUND { |
|
my @x=map("%r$_",(0..7,"x","x","x","x",(10..13))); |
|
my @t=map("%r$_",(8,9)); |
|
- |
|
-sub ROUND { |
|
my ($a0,$b0,$c0,$d0)=@_; |
|
my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); |
|
my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); |
|
my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); |
|
-my ($xc,$xc_)=map("\"$_\"",@t); |
|
-my @x=map("\"$_\"",@x); |
|
+my ($xc,$xc_)=map("$_",@t); |
|
|
|
# Consider order in which variables are addressed by their |
|
# index: |
|
@@ -78,249 +90,967 @@ my @x=map("\"$_\"",@x); |
|
# 'c' stores and loads in the middle, but none in the beginning |
|
# or end. |
|
|
|
- ( |
|
- "&alr (@x[$a0],@x[$b0])", # Q1 |
|
- "&alr (@x[$a1],@x[$b1])", # Q2 |
|
- "&xr (@x[$d0],@x[$a0])", |
|
- "&xr (@x[$d1],@x[$a1])", |
|
- "&rll (@x[$d0],@x[$d0],16)", |
|
- "&rll (@x[$d1],@x[$d1],16)", |
|
- |
|
- "&alr ($xc,@x[$d0])", |
|
- "&alr ($xc_,@x[$d1])", |
|
- "&xr (@x[$b0],$xc)", |
|
- "&xr (@x[$b1],$xc_)", |
|
- "&rll (@x[$b0],@x[$b0],12)", |
|
- "&rll (@x[$b1],@x[$b1],12)", |
|
- |
|
- "&alr (@x[$a0],@x[$b0])", |
|
- "&alr (@x[$a1],@x[$b1])", |
|
- "&xr (@x[$d0],@x[$a0])", |
|
- "&xr (@x[$d1],@x[$a1])", |
|
- "&rll (@x[$d0],@x[$d0],8)", |
|
- "&rll (@x[$d1],@x[$d1],8)", |
|
- |
|
- "&alr ($xc,@x[$d0])", |
|
- "&alr ($xc_,@x[$d1])", |
|
- "&xr (@x[$b0],$xc)", |
|
- "&xr (@x[$b1],$xc_)", |
|
- "&rll (@x[$b0],@x[$b0],7)", |
|
- "&rll (@x[$b1],@x[$b1],7)", |
|
- |
|
- "&stm ($xc,$xc_,'$stdframe+4*8+4*$c0($sp)')", # reload pair of 'c's |
|
- "&lm ($xc,$xc_,'$stdframe+4*8+4*$c2($sp)')", |
|
- |
|
- "&alr (@x[$a2],@x[$b2])", # Q3 |
|
- "&alr (@x[$a3],@x[$b3])", # Q4 |
|
- "&xr (@x[$d2],@x[$a2])", |
|
- "&xr (@x[$d3],@x[$a3])", |
|
- "&rll (@x[$d2],@x[$d2],16)", |
|
- "&rll (@x[$d3],@x[$d3],16)", |
|
- |
|
- "&alr ($xc,@x[$d2])", |
|
- "&alr ($xc_,@x[$d3])", |
|
- "&xr (@x[$b2],$xc)", |
|
- "&xr (@x[$b3],$xc_)", |
|
- "&rll (@x[$b2],@x[$b2],12)", |
|
- "&rll (@x[$b3],@x[$b3],12)", |
|
- |
|
- "&alr (@x[$a2],@x[$b2])", |
|
- "&alr (@x[$a3],@x[$b3])", |
|
- "&xr (@x[$d2],@x[$a2])", |
|
- "&xr (@x[$d3],@x[$a3])", |
|
- "&rll (@x[$d2],@x[$d2],8)", |
|
- "&rll (@x[$d3],@x[$d3],8)", |
|
- |
|
- "&alr ($xc,@x[$d2])", |
|
- "&alr ($xc_,@x[$d3])", |
|
- "&xr (@x[$b2],$xc)", |
|
- "&xr (@x[$b3],$xc_)", |
|
- "&rll (@x[$b2],@x[$b2],7)", |
|
- "&rll (@x[$b3],@x[$b3],7)" |
|
- ); |
|
-} |
|
- |
|
-$code.=<<___; |
|
-.text |
|
- |
|
-.globl ChaCha20_ctr32 |
|
-.type ChaCha20_ctr32,\@function |
|
-.align 32 |
|
-ChaCha20_ctr32: |
|
- lt${g}r $len,$len # $len==0? |
|
- bzr %r14 |
|
- a${g}hi $len,-64 |
|
- l${g}hi %r1,-$frame |
|
- stm${g} %r6,%r15,`6*$SIZE_T`($sp) |
|
- sl${g}r $out,$inp # difference |
|
- la $len,0($inp,$len) # end of input minus 64 |
|
- larl %r7,.Lsigma |
|
- lgr %r0,$sp |
|
- la $sp,0(%r1,$sp) |
|
- st${g} %r0,0($sp) |
|
- |
|
- lmg %r8,%r11,0($key) # load key |
|
- lmg %r12,%r13,0($counter) # load counter |
|
- lmg %r6,%r7,0(%r7) # load sigma constant |
|
- |
|
- la %r14,0($inp) |
|
- st${g} $out,$frame+3*$SIZE_T($sp) |
|
- st${g} $len,$frame+4*$SIZE_T($sp) |
|
- stmg %r6,%r13,$stdframe($sp) # copy key schedule to stack |
|
- srlg @x[12],%r12,32 # 32-bit counter value |
|
- j .Loop_outer |
|
- |
|
-.align 16 |
|
-.Loop_outer: |
|
- lm @x[0],@x[7],$stdframe+4*0($sp) # load x[0]-x[7] |
|
- lm @t[0],@t[1],$stdframe+4*10($sp) # load x[10]-x[11] |
|
- lm @x[13],@x[15],$stdframe+4*13($sp) # load x[13]-x[15] |
|
- stm @t[0],@t[1],$stdframe+4*8+4*10($sp) # offload x[10]-x[11] |
|
- lm @t[0],@t[1],$stdframe+4*8($sp) # load x[8]-x[9] |
|
- st @x[12],$stdframe+4*12($sp) # save counter |
|
- st${g} %r14,$frame+2*$SIZE_T($sp) # save input pointer |
|
- lhi %r14,10 |
|
- j .Loop |
|
- |
|
-.align 4 |
|
-.Loop: |
|
-___ |
|
- foreach (&ROUND(0, 4, 8,12)) { eval; } |
|
- foreach (&ROUND(0, 5,10,15)) { eval; } |
|
-$code.=<<___; |
|
- brct %r14,.Loop |
|
- |
|
- l${g} %r14,$frame+2*$SIZE_T($sp) # pull input pointer |
|
- stm @t[0],@t[1],$stdframe+4*8+4*8($sp) # offload x[8]-x[9] |
|
- lm${g} @t[0],@t[1],$frame+3*$SIZE_T($sp) |
|
- |
|
- al @x[0],$stdframe+4*0($sp) # accumulate key schedule |
|
- al @x[1],$stdframe+4*1($sp) |
|
- al @x[2],$stdframe+4*2($sp) |
|
- al @x[3],$stdframe+4*3($sp) |
|
- al @x[4],$stdframe+4*4($sp) |
|
- al @x[5],$stdframe+4*5($sp) |
|
- al @x[6],$stdframe+4*6($sp) |
|
- al @x[7],$stdframe+4*7($sp) |
|
- lrvr @x[0],@x[0] |
|
- lrvr @x[1],@x[1] |
|
- lrvr @x[2],@x[2] |
|
- lrvr @x[3],@x[3] |
|
- lrvr @x[4],@x[4] |
|
- lrvr @x[5],@x[5] |
|
- lrvr @x[6],@x[6] |
|
- lrvr @x[7],@x[7] |
|
- al @x[12],$stdframe+4*12($sp) |
|
- al @x[13],$stdframe+4*13($sp) |
|
- al @x[14],$stdframe+4*14($sp) |
|
- al @x[15],$stdframe+4*15($sp) |
|
- lrvr @x[12],@x[12] |
|
- lrvr @x[13],@x[13] |
|
- lrvr @x[14],@x[14] |
|
- lrvr @x[15],@x[15] |
|
- |
|
- la @t[0],0(@t[0],%r14) # reconstruct output pointer |
|
- cl${g}r %r14,@t[1] |
|
- jh .Ltail |
|
- |
|
- x @x[0],4*0(%r14) # xor with input |
|
- x @x[1],4*1(%r14) |
|
- st @x[0],4*0(@t[0]) # store output |
|
- x @x[2],4*2(%r14) |
|
- st @x[1],4*1(@t[0]) |
|
- x @x[3],4*3(%r14) |
|
- st @x[2],4*2(@t[0]) |
|
- x @x[4],4*4(%r14) |
|
- st @x[3],4*3(@t[0]) |
|
- lm @x[0],@x[3],$stdframe+4*8+4*8($sp) # load x[8]-x[11] |
|
- x @x[5],4*5(%r14) |
|
- st @x[4],4*4(@t[0]) |
|
- x @x[6],4*6(%r14) |
|
- al @x[0],$stdframe+4*8($sp) |
|
- st @x[5],4*5(@t[0]) |
|
- x @x[7],4*7(%r14) |
|
- al @x[1],$stdframe+4*9($sp) |
|
- st @x[6],4*6(@t[0]) |
|
- x @x[12],4*12(%r14) |
|
- al @x[2],$stdframe+4*10($sp) |
|
- st @x[7],4*7(@t[0]) |
|
- x @x[13],4*13(%r14) |
|
- al @x[3],$stdframe+4*11($sp) |
|
- st @x[12],4*12(@t[0]) |
|
- x @x[14],4*14(%r14) |
|
- st @x[13],4*13(@t[0]) |
|
- x @x[15],4*15(%r14) |
|
- st @x[14],4*14(@t[0]) |
|
- lrvr @x[0],@x[0] |
|
- st @x[15],4*15(@t[0]) |
|
- lrvr @x[1],@x[1] |
|
- lrvr @x[2],@x[2] |
|
- lrvr @x[3],@x[3] |
|
- lhi @x[12],1 |
|
- x @x[0],4*8(%r14) |
|
- al @x[12],$stdframe+4*12($sp) # increment counter |
|
- x @x[1],4*9(%r14) |
|
- st @x[0],4*8(@t[0]) |
|
- x @x[2],4*10(%r14) |
|
- st @x[1],4*9(@t[0]) |
|
- x @x[3],4*11(%r14) |
|
- st @x[2],4*10(@t[0]) |
|
- st @x[3],4*11(@t[0]) |
|
- |
|
- cl${g}r %r14,@t[1] # done yet? |
|
- la %r14,64(%r14) |
|
- jl .Loop_outer |
|
- |
|
-.Ldone: |
|
- xgr %r0,%r0 |
|
- xgr %r1,%r1 |
|
- xgr %r2,%r2 |
|
- xgr %r3,%r3 |
|
- stmg %r0,%r3,$stdframe+4*4($sp) # wipe key copy |
|
- stmg %r0,%r3,$stdframe+4*12($sp) |
|
- |
|
- lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp) |
|
- br %r14 |
|
- |
|
-.align 16 |
|
-.Ltail: |
|
- la @t[1],64($t[1]) |
|
- stm @x[0],@x[7],$stdframe+4*0($sp) |
|
- sl${g}r @t[1],%r14 |
|
- lm @x[0],@x[3],$stdframe+4*8+4*8($sp) |
|
- l${g}hi @x[6],0 |
|
- stm @x[12],@x[15],$stdframe+4*12($sp) |
|
- al @x[0],$stdframe+4*8($sp) |
|
- al @x[1],$stdframe+4*9($sp) |
|
- al @x[2],$stdframe+4*10($sp) |
|
- al @x[3],$stdframe+4*11($sp) |
|
- lrvr @x[0],@x[0] |
|
- lrvr @x[1],@x[1] |
|
- lrvr @x[2],@x[2] |
|
- lrvr @x[3],@x[3] |
|
- stm @x[0],@x[3],$stdframe+4*8($sp) |
|
- |
|
-.Loop_tail: |
|
- llgc @x[4],0(@x[6],%r14) |
|
- llgc @x[5],$stdframe(@x[6],$sp) |
|
- xr @x[5],@x[4] |
|
- stc @x[5],0(@x[6],@t[0]) |
|
- la @x[6],1(@x[6]) |
|
- brct @t[1],.Loop_tail |
|
- |
|
- j .Ldone |
|
-.size ChaCha20_ctr32,.-ChaCha20_ctr32 |
|
- |
|
-.align 32 |
|
-.Lsigma: |
|
-.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral |
|
-.asciz "ChaCha20 for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
|
-.align 4 |
|
-___ |
|
+ alr (@x[$a0],@x[$b0]); # Q1 |
|
+ alr (@x[$a1],@x[$b1]); # Q2 |
|
+ xr (@x[$d0],@x[$a0]); |
|
+ xr (@x[$d1],@x[$a1]); |
|
+ rll (@x[$d0],@x[$d0],16); |
|
+ rll (@x[$d1],@x[$d1],16); |
|
+ |
|
+ alr ($xc,@x[$d0]); |
|
+ alr ($xc_,@x[$d1]); |
|
+ xr (@x[$b0],$xc); |
|
+ xr (@x[$b1],$xc_); |
|
+ rll (@x[$b0],@x[$b0],12); |
|
+ rll (@x[$b1],@x[$b1],12); |
|
+ |
|
+ alr (@x[$a0],@x[$b0]); |
|
+ alr (@x[$a1],@x[$b1]); |
|
+ xr (@x[$d0],@x[$a0]); |
|
+ xr (@x[$d1],@x[$a1]); |
|
+ rll (@x[$d0],@x[$d0],8); |
|
+ rll (@x[$d1],@x[$d1],8); |
|
+ |
|
+ alr ($xc,@x[$d0]); |
|
+ alr ($xc_,@x[$d1]); |
|
+ xr (@x[$b0],$xc); |
|
+ xr (@x[$b1],$xc_); |
|
+ rll (@x[$b0],@x[$b0],7); |
|
+ rll (@x[$b1],@x[$b1],7); |
|
+ |
|
+ stm ($xc,$xc_,"$stdframe+4*8+4*$c0($sp)"); # reload pair of 'c's |
|
+ lm ($xc,$xc_,"$stdframe+4*8+4*$c2($sp)"); |
|
+ |
|
+ alr (@x[$a2],@x[$b2]); # Q3 |
|
+ alr (@x[$a3],@x[$b3]); # Q4 |
|
+ xr (@x[$d2],@x[$a2]); |
|
+ xr (@x[$d3],@x[$a3]); |
|
+ rll (@x[$d2],@x[$d2],16); |
|
+ rll (@x[$d3],@x[$d3],16); |
|
+ |
|
+ alr ($xc,@x[$d2]); |
|
+ alr ($xc_,@x[$d3]); |
|
+ xr (@x[$b2],$xc); |
|
+ xr (@x[$b3],$xc_); |
|
+ rll (@x[$b2],@x[$b2],12); |
|
+ rll (@x[$b3],@x[$b3],12); |
|
+ |
|
+ alr (@x[$a2],@x[$b2]); |
|
+ alr (@x[$a3],@x[$b3]); |
|
+ xr (@x[$d2],@x[$a2]); |
|
+ xr (@x[$d3],@x[$a3]); |
|
+ rll (@x[$d2],@x[$d2],8); |
|
+ rll (@x[$d3],@x[$d3],8); |
|
+ |
|
+ alr ($xc,@x[$d2]); |
|
+ alr ($xc_,@x[$d3]); |
|
+ xr (@x[$b2],$xc); |
|
+ xr (@x[$b3],$xc_); |
|
+ rll (@x[$b2],@x[$b2],7); |
|
+ rll (@x[$b3],@x[$b3],7); |
|
+} |
|
+ |
|
+sub VX_lane_ROUND { |
|
+my ($a0,$b0,$c0,$d0)=@_; |
|
+my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0)); |
|
+my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1)); |
|
+my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2)); |
|
+my @x=map("%v$_",(0..15)); |
|
|
|
-foreach (split("\n",$code)) { |
|
- s/\`([^\`]*)\`/eval $1/ge; |
|
+ vaf (@x[$a0],@x[$a0],@x[$b0]); # Q1 |
|
+ vx (@x[$d0],@x[$d0],@x[$a0]); |
|
+ verllf (@x[$d0],@x[$d0],16); |
|
+ vaf (@x[$a1],@x[$a1],@x[$b1]); # Q2 |
|
+ vx (@x[$d1],@x[$d1],@x[$a1]); |
|
+ verllf (@x[$d1],@x[$d1],16); |
|
+ vaf (@x[$a2],@x[$a2],@x[$b2]); # Q3 |
|
+ vx (@x[$d2],@x[$d2],@x[$a2]); |
|
+ verllf (@x[$d2],@x[$d2],16); |
|
+ vaf (@x[$a3],@x[$a3],@x[$b3]); # Q4 |
|
+ vx (@x[$d3],@x[$d3],@x[$a3]); |
|
+ verllf (@x[$d3],@x[$d3],16); |
|
+ |
|
+ vaf (@x[$c0],@x[$c0],@x[$d0]); |
|
+ vx (@x[$b0],@x[$b0],@x[$c0]); |
|
+ verllf (@x[$b0],@x[$b0],12); |
|
+ vaf (@x[$c1],@x[$c1],@x[$d1]); |
|
+ vx (@x[$b1],@x[$b1],@x[$c1]); |
|
+ verllf (@x[$b1],@x[$b1],12); |
|
+ vaf (@x[$c2],@x[$c2],@x[$d2]); |
|
+ vx (@x[$b2],@x[$b2],@x[$c2]); |
|
+ verllf (@x[$b2],@x[$b2],12); |
|
+ vaf (@x[$c3],@x[$c3],@x[$d3]); |
|
+ vx (@x[$b3],@x[$b3],@x[$c3]); |
|
+ verllf (@x[$b3],@x[$b3],12); |
|
+ |
|
+ vaf (@x[$a0],@x[$a0],@x[$b0]); |
|
+ vx (@x[$d0],@x[$d0],@x[$a0]); |
|
+ verllf (@x[$d0],@x[$d0],8); |
|
+ vaf (@x[$a1],@x[$a1],@x[$b1]); |
|
+ vx (@x[$d1],@x[$d1],@x[$a1]); |
|
+ verllf (@x[$d1],@x[$d1],8); |
|
+ vaf (@x[$a2],@x[$a2],@x[$b2]); |
|
+ vx (@x[$d2],@x[$d2],@x[$a2]); |
|
+ verllf (@x[$d2],@x[$d2],8); |
|
+ vaf (@x[$a3],@x[$a3],@x[$b3]); |
|
+ vx (@x[$d3],@x[$d3],@x[$a3]); |
|
+ verllf (@x[$d3],@x[$d3],8); |
|
+ |
|
+ vaf (@x[$c0],@x[$c0],@x[$d0]); |
|
+ vx (@x[$b0],@x[$b0],@x[$c0]); |
|
+ verllf (@x[$b0],@x[$b0],7); |
|
+ vaf (@x[$c1],@x[$c1],@x[$d1]); |
|
+ vx (@x[$b1],@x[$b1],@x[$c1]); |
|
+ verllf (@x[$b1],@x[$b1],7); |
|
+ vaf (@x[$c2],@x[$c2],@x[$d2]); |
|
+ vx (@x[$b2],@x[$b2],@x[$c2]); |
|
+ verllf (@x[$b2],@x[$b2],7); |
|
+ vaf (@x[$c3],@x[$c3],@x[$d3]); |
|
+ vx (@x[$b3],@x[$b3],@x[$c3]); |
|
+ verllf (@x[$b3],@x[$b3],7); |
|
+} |
|
|
|
- print $_,"\n"; |
|
+sub VX_ROUND { |
|
+my @a=@_[0..5]; |
|
+my @b=@_[6..11]; |
|
+my @c=@_[12..17]; |
|
+my @d=@_[18..23]; |
|
+my $odd=@_[24]; |
|
+ |
|
+ vaf (@a[$_],@a[$_],@b[$_]) for (0..5); |
|
+ vx (@d[$_],@d[$_],@a[$_]) for (0..5); |
|
+ verllf (@d[$_],@d[$_],16) for (0..5); |
|
+ |
|
+ vaf (@c[$_],@c[$_],@d[$_]) for (0..5); |
|
+ vx (@b[$_],@b[$_],@c[$_]) for (0..5); |
|
+ verllf (@b[$_],@b[$_],12) for (0..5); |
|
+ |
|
+ vaf (@a[$_],@a[$_],@b[$_]) for (0..5); |
|
+ vx (@d[$_],@d[$_],@a[$_]) for (0..5); |
|
+ verllf (@d[$_],@d[$_],8) for (0..5); |
|
+ |
|
+ vaf (@c[$_],@c[$_],@d[$_]) for (0..5); |
|
+ vx (@b[$_],@b[$_],@c[$_]) for (0..5); |
|
+ verllf (@b[$_],@b[$_],7) for (0..5); |
|
+ |
|
+ vsldb (@c[$_],@c[$_],@c[$_],8) for (0..5); |
|
+ vsldb (@b[$_],@b[$_],@b[$_],$odd?12:4) for (0..5); |
|
+ vsldb (@d[$_],@d[$_],@d[$_],$odd?4:12) for (0..5); |
|
} |
|
-close STDOUT or die "error closing STDOUT: $!"; |
|
+ |
|
+PERLASM_BEGIN($output); |
|
+ |
|
+INCLUDE ("s390x_arch.h"); |
|
+TEXT (); |
|
+ |
|
+################ |
|
+# void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp, size_t len, |
|
+# const unsigned int key[8], const unsigned int counter[4]) |
|
+my ($out,$inp,$len,$key,$counter)=map("%r$_",(2..6)); |
|
+{ |
|
+my $frame=$stdframe+4*20; |
|
+my @x=map("%r$_",(0..7,"x","x","x","x",(10..13))); |
|
+my @t=map("%r$_",(8,9)); |
|
+ |
|
+GLOBL ("ChaCha20_ctr32"); |
|
+TYPE ("ChaCha20_ctr32","\@function"); |
|
+ALIGN (32); |
|
+LABEL ("ChaCha20_ctr32"); |
|
+ larl ("%r1","OPENSSL_s390xcap_P"); |
|
+ |
|
+ lghi ("%r0",64); |
|
+&{$z? \<gr:\<r} ($len,$len); # len==0? |
|
+ bzr ("%r14"); |
|
+ lg ("%r1","S390X_STFLE+16(%r1)"); |
|
+&{$z? \&clgr:\&clr} ($len,"%r0"); |
|
+ jle (".Lshort"); |
|
+ |
|
+ tmhh ("%r1",0x4000); # check for vx bit |
|
+ jnz (".LChaCha20_ctr32_vx"); |
|
+ |
|
+LABEL (".Lshort"); |
|
+&{$z? \&aghi:\&ahi} ($len,-64); |
|
+&{$z? \&lghi:\&lhi} ("%r1",-$frame); |
|
+&{$z? \&stmg:\&stm} ("%r6","%r15","6*$SIZE_T($sp)"); |
|
+&{$z? \&slgr:\&slr} ($out,$inp); # difference |
|
+ la ($len,"0($inp,$len)"); # end of input minus 64 |
|
+ larl ("%r7",".Lsigma"); |
|
+ lgr ("%r0",$sp); |
|
+ la ($sp,"0(%r1,$sp)"); |
|
+&{$z? \&stg:\&st} ("%r0","0($sp)"); |
|
+ |
|
+ lmg ("%r8","%r11","0($key)"); # load key |
|
+ lmg ("%r12","%r13","0($counter)"); # load counter |
|
+ lmg ("%r6","%r7","0(%r7)"); # load sigma constant |
|
+ |
|
+ la ("%r14","0($inp)"); |
|
+&{$z? \&stg:\&st} ($out,"$frame+3*$SIZE_T($sp)"); |
|
+&{$z? \&stg:\&st} ($len,"$frame+4*$SIZE_T($sp)"); |
|
+ stmg ("%r6","%r13","$stdframe($sp)");# copy key schedule to stack |
|
+ srlg (@x[12],"%r12",32); # 32-bit counter value |
|
+ j (".Loop_outer"); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Loop_outer"); |
|
+ lm (@x[0],@x[7],"$stdframe+4*0($sp)"); # load x[0]-x[7] |
|
+ lm (@t[0],@t[1],"$stdframe+4*10($sp)"); # load x[10]-x[11] |
|
+ lm (@x[13],@x[15],"$stdframe+4*13($sp)"); # load x[13]-x[15] |
|
+ stm (@t[0],@t[1],"$stdframe+4*8+4*10($sp)");# offload x[10]-x[11] |
|
+ lm (@t[0],@t[1],"$stdframe+4*8($sp)"); # load x[8]-x[9] |
|
+ st (@x[12],"$stdframe+4*12($sp)"); # save counter |
|
+&{$z? \&stg:\&st} ("%r14","$frame+2*$SIZE_T($sp)");# save input pointer |
|
+ lhi ("%r14",10); |
|
+ j (".Loop"); |
|
+ |
|
+ALIGN (4); |
|
+LABEL (".Loop"); |
|
+ ROUND (0, 4, 8,12); |
|
+ ROUND (0, 5,10,15); |
|
+ brct ("%r14",".Loop"); |
|
+ |
|
+&{$z? \&lg:\&l} ("%r14","$frame+2*$SIZE_T($sp)");# pull input pointer |
|
+ stm (@t[0],@t[1],"$stdframe+4*8+4*8($sp)"); # offload x[8]-x[9] |
|
+&{$z? \&lmg:\&lm} (@t[0],@t[1],"$frame+3*$SIZE_T($sp)"); |
|
+ |
|
+ al (@x[0],"$stdframe+4*0($sp)"); # accumulate key schedule |
|
+ al (@x[1],"$stdframe+4*1($sp)"); |
|
+ al (@x[2],"$stdframe+4*2($sp)"); |
|
+ al (@x[3],"$stdframe+4*3($sp)"); |
|
+ al (@x[4],"$stdframe+4*4($sp)"); |
|
+ al (@x[5],"$stdframe+4*5($sp)"); |
|
+ al (@x[6],"$stdframe+4*6($sp)"); |
|
+ al (@x[7],"$stdframe+4*7($sp)"); |
|
+ lrvr (@x[0],@x[0]); |
|
+ lrvr (@x[1],@x[1]); |
|
+ lrvr (@x[2],@x[2]); |
|
+ lrvr (@x[3],@x[3]); |
|
+ lrvr (@x[4],@x[4]); |
|
+ lrvr (@x[5],@x[5]); |
|
+ lrvr (@x[6],@x[6]); |
|
+ lrvr (@x[7],@x[7]); |
|
+ al (@x[12],"$stdframe+4*12($sp)"); |
|
+ al (@x[13],"$stdframe+4*13($sp)"); |
|
+ al (@x[14],"$stdframe+4*14($sp)"); |
|
+ al (@x[15],"$stdframe+4*15($sp)"); |
|
+ lrvr (@x[12],@x[12]); |
|
+ lrvr (@x[13],@x[13]); |
|
+ lrvr (@x[14],@x[14]); |
|
+ lrvr (@x[15],@x[15]); |
|
+ |
|
+ la (@t[0],"0(@t[0],%r14)"); # reconstruct output pointer |
|
+&{$z? \&clgr:\&clr} ("%r14",@t[1]); |
|
+ jh (".Ltail"); |
|
+ |
|
+ x (@x[0],"4*0(%r14)"); # xor with input |
|
+ x (@x[1],"4*1(%r14)"); |
|
+ st (@x[0],"4*0(@t[0])"); # store output |
|
+ x (@x[2],"4*2(%r14)"); |
|
+ st (@x[1],"4*1(@t[0])"); |
|
+ x (@x[3],"4*3(%r14)"); |
|
+ st (@x[2],"4*2(@t[0])"); |
|
+ x (@x[4],"4*4(%r14)"); |
|
+ st (@x[3],"4*3(@t[0])"); |
|
+ lm (@x[0],@x[3],"$stdframe+4*8+4*8($sp)"); # load x[8]-x[11] |
|
+ x (@x[5],"4*5(%r14)"); |
|
+ st (@x[4],"4*4(@t[0])"); |
|
+ x (@x[6],"4*6(%r14)"); |
|
+ al (@x[0],"$stdframe+4*8($sp)"); |
|
+ st (@x[5],"4*5(@t[0])"); |
|
+ x (@x[7],"4*7(%r14)"); |
|
+ al (@x[1],"$stdframe+4*9($sp)"); |
|
+ st (@x[6],"4*6(@t[0])"); |
|
+ x (@x[12],"4*12(%r14)"); |
|
+ al (@x[2],"$stdframe+4*10($sp)"); |
|
+ st (@x[7],"4*7(@t[0])"); |
|
+ x (@x[13],"4*13(%r14)"); |
|
+ al (@x[3],"$stdframe+4*11($sp)"); |
|
+ st (@x[12],"4*12(@t[0])"); |
|
+ x (@x[14],"4*14(%r14)"); |
|
+ st (@x[13],"4*13(@t[0])"); |
|
+ x (@x[15],"4*15(%r14)"); |
|
+ st (@x[14],"4*14(@t[0])"); |
|
+ lrvr (@x[0],@x[0]); |
|
+ st (@x[15],"4*15(@t[0])"); |
|
+ lrvr (@x[1],@x[1]); |
|
+ lrvr (@x[2],@x[2]); |
|
+ lrvr (@x[3],@x[3]); |
|
+ lhi (@x[12],1); |
|
+ x (@x[0],"4*8(%r14)"); |
|
+ al (@x[12],"$stdframe+4*12($sp)"); # increment counter |
|
+ x (@x[1],"4*9(%r14)"); |
|
+ st (@x[0],"4*8(@t[0])"); |
|
+ x (@x[2],"4*10(%r14)"); |
|
+ st (@x[1],"4*9(@t[0])"); |
|
+ x (@x[3],"4*11(%r14)"); |
|
+ st (@x[2],"4*10(@t[0])"); |
|
+ st (@x[3],"4*11(@t[0])"); |
|
+ |
|
+&{$z? \&clgr:\&clr} ("%r14",@t[1]); # done yet? |
|
+ la ("%r14","64(%r14)"); |
|
+ jl (".Loop_outer"); |
|
+ |
|
+LABEL (".Ldone"); |
|
+ xgr ("%r0","%r0"); |
|
+ xgr ("%r1","%r1"); |
|
+ xgr ("%r2","%r2"); |
|
+ xgr ("%r3","%r3"); |
|
+ stmg ("%r0","%r3","$stdframe+4*4($sp)"); # wipe key copy |
|
+ stmg ("%r0","%r3","$stdframe+4*12($sp)"); |
|
+ |
|
+&{$z? \&lmg:\&lm} ("%r6","%r15","$frame+6*$SIZE_T($sp)"); |
|
+ br ("%r14"); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Ltail"); |
|
+ la (@t[1],"64($t[1])"); |
|
+ stm (@x[0],@x[7],"$stdframe+4*0($sp)"); |
|
+&{$z? \&slgr:\&slr} (@t[1],"%r14"); |
|
+ lm (@x[0],@x[3],"$stdframe+4*8+4*8($sp)"); |
|
+&{$z? \&lghi:\&lhi} (@x[6],0); |
|
+ stm (@x[12],@x[15],"$stdframe+4*12($sp)"); |
|
+ al (@x[0],"$stdframe+4*8($sp)"); |
|
+ al (@x[1],"$stdframe+4*9($sp)"); |
|
+ al (@x[2],"$stdframe+4*10($sp)"); |
|
+ al (@x[3],"$stdframe+4*11($sp)"); |
|
+ lrvr (@x[0],@x[0]); |
|
+ lrvr (@x[1],@x[1]); |
|
+ lrvr (@x[2],@x[2]); |
|
+ lrvr (@x[3],@x[3]); |
|
+ stm (@x[0],@x[3],"$stdframe+4*8($sp)"); |
|
+ |
|
+LABEL (".Loop_tail"); |
|
+ llgc (@x[4],"0(@x[6],%r14)"); |
|
+ llgc (@x[5],"$stdframe(@x[6],$sp)"); |
|
+ xr (@x[5],@x[4]); |
|
+ stc (@x[5],"0(@x[6],@t[0])"); |
|
+ la (@x[6],"1(@x[6])"); |
|
+ brct (@t[1],".Loop_tail"); |
|
+ |
|
+ j (".Ldone"); |
|
+SIZE ("ChaCha20_ctr32",".-ChaCha20_ctr32"); |
|
+} |
|
+ |
|
+######################################################################## |
|
+# 4x"vertical" layout minimizes amount of instructions, but pipeline |
|
+# runs underutilized [because of vector instructions' high latency]. |
|
+# On the other hand minimum amount of data it takes to fully utilize |
|
+# the pipeline is higher, so that effectively, short inputs would be |
|
+# processed slower. Hence this code path targeting <=256 bytes lengths. |
|
+# |
|
+{ |
|
+my ($xa0,$xa1,$xa2,$xa3, $xb0,$xb1,$xb2,$xb3, |
|
+ $xc0,$xc1,$xc2,$xc3, $xd0,$xd1,$xd2,$xd3)=map("%v$_",(0..15)); |
|
+my @K=map("%v$_",(16..19)); |
|
+my $CTR="%v26"; |
|
+my ($xt0,$xt1,$xt2,$xt3)=map("%v$_",(27..30)); |
|
+my $beperm="%v31"; |
|
+my ($x00,$x10,$x20,$x30)=(0,map("r$_",(8..10))); |
|
+my $FRAME=$stdframe+4*16; |
|
+ |
|
+ALIGN (32); |
|
+LABEL ("ChaCha20_ctr32_4x"); |
|
+LABEL (".LChaCha20_ctr32_4x"); |
|
+&{$z? \&stmg:\&stm} ("%r6","%r7","6*$SIZE_T($sp)"); |
|
+if (!$z) { |
|
+ std ("%f4","16*$SIZE_T+2*8($sp)"); |
|
+ std ("%f6","16*$SIZE_T+3*8($sp)"); |
|
+} |
|
+&{$z? \&lghi:\&lhi} ("%r1",-$FRAME); |
|
+ lgr ("%r0",$sp); |
|
+ la ($sp,"0(%r1,$sp)"); |
|
+&{$z? \&stg:\&st} ("%r0","0($sp)"); # back-chain |
|
+if ($z) { |
|
+ std ("%f8","$stdframe+8*0($sp)"); |
|
+ std ("%f9","$stdframe+8*1($sp)"); |
|
+ std ("%f10","$stdframe+8*2($sp)"); |
|
+ std ("%f11","$stdframe+8*3($sp)"); |
|
+ std ("%f12","$stdframe+8*4($sp)"); |
|
+ std ("%f13","$stdframe+8*5($sp)"); |
|
+ std ("%f14","$stdframe+8*6($sp)"); |
|
+ std ("%f15","$stdframe+8*7($sp)"); |
|
+} |
|
+ larl ("%r7",".Lsigma"); |
|
+ lhi ("%r0",10); |
|
+ lhi ("%r1",0); |
|
+ |
|
+ vl (@K[0],"0(%r7)"); # load sigma |
|
+ vl (@K[1],"0($key)"); # load key |
|
+ vl (@K[2],"16($key)"); |
|
+ vl (@K[3],"0($counter)"); # load counter |
|
+ |
|
+ vl ($beperm,"0x40(%r7)"); |
|
+ vl ($xt1,"0x50(%r7)"); |
|
+ vrepf ($CTR,@K[3],0); |
|
+ vlvgf (@K[3],"%r1",0); # clear @K[3].word[0] |
|
+ vaf ($CTR,$CTR,$xt1); |
|
+ |
|
+#LABEL (".Loop_outer_4x"); |
|
+ vlm ($xa0,$xa3,"0x60(%r7)"); # load [smashed] sigma |
|
+ |
|
+ vrepf ($xb0,@K[1],0); # smash the key |
|
+ vrepf ($xb1,@K[1],1); |
|
+ vrepf ($xb2,@K[1],2); |
|
+ vrepf ($xb3,@K[1],3); |
|
+ |
|
+ vrepf ($xc0,@K[2],0); |
|
+ vrepf ($xc1,@K[2],1); |
|
+ vrepf ($xc2,@K[2],2); |
|
+ vrepf ($xc3,@K[2],3); |
|
+ |
|
+ vlr ($xd0,$CTR); |
|
+ vrepf ($xd1,@K[3],1); |
|
+ vrepf ($xd2,@K[3],2); |
|
+ vrepf ($xd3,@K[3],3); |
|
+ |
|
+LABEL (".Loop_4x"); |
|
+ VX_lane_ROUND(0, 4, 8,12); |
|
+ VX_lane_ROUND(0, 5,10,15); |
|
+ brct ("%r0",".Loop_4x"); |
|
+ |
|
+ vaf ($xd0,$xd0,$CTR); |
|
+ |
|
+ vmrhf ($xt0,$xa0,$xa1); # transpose data |
|
+ vmrhf ($xt1,$xa2,$xa3); |
|
+ vmrlf ($xt2,$xa0,$xa1); |
|
+ vmrlf ($xt3,$xa2,$xa3); |
|
+ vpdi ($xa0,$xt0,$xt1,0b0000); |
|
+ vpdi ($xa1,$xt0,$xt1,0b0101); |
|
+ vpdi ($xa2,$xt2,$xt3,0b0000); |
|
+ vpdi ($xa3,$xt2,$xt3,0b0101); |
|
+ |
|
+ vmrhf ($xt0,$xb0,$xb1); |
|
+ vmrhf ($xt1,$xb2,$xb3); |
|
+ vmrlf ($xt2,$xb0,$xb1); |
|
+ vmrlf ($xt3,$xb2,$xb3); |
|
+ vpdi ($xb0,$xt0,$xt1,0b0000); |
|
+ vpdi ($xb1,$xt0,$xt1,0b0101); |
|
+ vpdi ($xb2,$xt2,$xt3,0b0000); |
|
+ vpdi ($xb3,$xt2,$xt3,0b0101); |
|
+ |
|
+ vmrhf ($xt0,$xc0,$xc1); |
|
+ vmrhf ($xt1,$xc2,$xc3); |
|
+ vmrlf ($xt2,$xc0,$xc1); |
|
+ vmrlf ($xt3,$xc2,$xc3); |
|
+ vpdi ($xc0,$xt0,$xt1,0b0000); |
|
+ vpdi ($xc1,$xt0,$xt1,0b0101); |
|
+ vpdi ($xc2,$xt2,$xt3,0b0000); |
|
+ vpdi ($xc3,$xt2,$xt3,0b0101); |
|
+ |
|
+ vmrhf ($xt0,$xd0,$xd1); |
|
+ vmrhf ($xt1,$xd2,$xd3); |
|
+ vmrlf ($xt2,$xd0,$xd1); |
|
+ vmrlf ($xt3,$xd2,$xd3); |
|
+ vpdi ($xd0,$xt0,$xt1,0b0000); |
|
+ vpdi ($xd1,$xt0,$xt1,0b0101); |
|
+ vpdi ($xd2,$xt2,$xt3,0b0000); |
|
+ vpdi ($xd3,$xt2,$xt3,0b0101); |
|
+ |
|
+ #vrepif ($xt0,4); |
|
+ #vaf ($CTR,$CTR,$xt0); # next counter value |
|
+ |
|
+ vaf ($xa0,$xa0,@K[0]); |
|
+ vaf ($xb0,$xb0,@K[1]); |
|
+ vaf ($xc0,$xc0,@K[2]); |
|
+ vaf ($xd0,$xd0,@K[3]); |
|
+ |
|
+ vperm ($xa0,$xa0,$xa0,$beperm); |
|
+ vperm ($xb0,$xb0,$xb0,$beperm); |
|
+ vperm ($xc0,$xc0,$xc0,$beperm); |
|
+ vperm ($xd0,$xd0,$xd0,$beperm); |
|
+ |
|
+ #&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ #jl (".Ltail_4x"); |
|
+ |
|
+ vlm ($xt0,$xt3,"0($inp)"); |
|
+ |
|
+ vx ($xt0,$xt0,$xa0); |
|
+ vx ($xt1,$xt1,$xb0); |
|
+ vx ($xt2,$xt2,$xc0); |
|
+ vx ($xt3,$xt3,$xd0); |
|
+ |
|
+ vstm ($xt0,$xt3,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ #je (".Ldone_4x"); |
|
+ |
|
+ vaf ($xa0,$xa1,@K[0]); |
|
+ vaf ($xb0,$xb1,@K[1]); |
|
+ vaf ($xc0,$xc1,@K[2]); |
|
+ vaf ($xd0,$xd1,@K[3]); |
|
+ |
|
+ vperm ($xa0,$xa0,$xa0,$beperm); |
|
+ vperm ($xb0,$xb0,$xb0,$beperm); |
|
+ vperm ($xc0,$xc0,$xc0,$beperm); |
|
+ vperm ($xd0,$xd0,$xd0,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_4x"); |
|
+ |
|
+ vlm ($xt0,$xt3,"0($inp)"); |
|
+ |
|
+ vx ($xt0,$xt0,$xa0); |
|
+ vx ($xt1,$xt1,$xb0); |
|
+ vx ($xt2,$xt2,$xc0); |
|
+ vx ($xt3,$xt3,$xd0); |
|
+ |
|
+ vstm ($xt0,$xt3,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_4x"); |
|
+ |
|
+ vaf ($xa0,$xa2,@K[0]); |
|
+ vaf ($xb0,$xb2,@K[1]); |
|
+ vaf ($xc0,$xc2,@K[2]); |
|
+ vaf ($xd0,$xd2,@K[3]); |
|
+ |
|
+ vperm ($xa0,$xa0,$xa0,$beperm); |
|
+ vperm ($xb0,$xb0,$xb0,$beperm); |
|
+ vperm ($xc0,$xc0,$xc0,$beperm); |
|
+ vperm ($xd0,$xd0,$xd0,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_4x"); |
|
+ |
|
+ vlm ($xt0,$xt3,"0($inp)"); |
|
+ |
|
+ vx ($xt0,$xt0,$xa0); |
|
+ vx ($xt1,$xt1,$xb0); |
|
+ vx ($xt2,$xt2,$xc0); |
|
+ vx ($xt3,$xt3,$xd0); |
|
+ |
|
+ vstm ($xt0,$xt3,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_4x"); |
|
+ |
|
+ vaf ($xa0,$xa3,@K[0]); |
|
+ vaf ($xb0,$xb3,@K[1]); |
|
+ vaf ($xc0,$xc3,@K[2]); |
|
+ vaf ($xd0,$xd3,@K[3]); |
|
+ |
|
+ vperm ($xa0,$xa0,$xa0,$beperm); |
|
+ vperm ($xb0,$xb0,$xb0,$beperm); |
|
+ vperm ($xc0,$xc0,$xc0,$beperm); |
|
+ vperm ($xd0,$xd0,$xd0,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_4x"); |
|
+ |
|
+ vlm ($xt0,$xt3,"0($inp)"); |
|
+ |
|
+ vx ($xt0,$xt0,$xa0); |
|
+ vx ($xt1,$xt1,$xb0); |
|
+ vx ($xt2,$xt2,$xc0); |
|
+ vx ($xt3,$xt3,$xd0); |
|
+ |
|
+ vstm ($xt0,$xt3,"0($out)"); |
|
+ |
|
+ #la $inp,0x40($inp)); |
|
+ #la $out,0x40($out)); |
|
+ #lhi %r0,10); |
|
+ #&{$z? \&aghi:\&ahi} $len,-0x40); |
|
+ #jne .Loop_outer_4x); |
|
+ |
|
+LABEL (".Ldone_4x"); |
|
+if (!$z) { |
|
+ ld ("%f4","$FRAME+16*$SIZE_T+2*8($sp)"); |
|
+ ld ("%f6","$FRAME+16*$SIZE_T+3*8($sp)"); |
|
+} else { |
|
+ ld ("%f8","$stdframe+8*0($sp)"); |
|
+ ld ("%f9","$stdframe+8*1($sp)"); |
|
+ ld ("%f10","$stdframe+8*2($sp)"); |
|
+ ld ("%f11","$stdframe+8*3($sp)"); |
|
+ ld ("%f12","$stdframe+8*4($sp)"); |
|
+ ld ("%f13","$stdframe+8*5($sp)"); |
|
+ ld ("%f14","$stdframe+8*6($sp)"); |
|
+ ld ("%f15","$stdframe+8*7($sp)"); |
|
+} |
|
+&{$z? \&lmg:\&lm} ("%r6","%r7","$FRAME+6*$SIZE_T($sp)"); |
|
+ la ($sp,"$FRAME($sp)"); |
|
+ br ("%r14"); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Ltail_4x"); |
|
+if (!$z) { |
|
+ vlr ($xt0,$xb0); |
|
+ ld ("%f4","$FRAME+16*$SIZE_T+2*8($sp)"); |
|
+ ld ("%f6","$FRAME+16*$SIZE_T+3*8($sp)"); |
|
+ |
|
+ vst ($xa0,"$stdframe+0x00($sp)"); |
|
+ vst ($xt0,"$stdframe+0x10($sp)"); |
|
+ vst ($xc0,"$stdframe+0x20($sp)"); |
|
+ vst ($xd0,"$stdframe+0x30($sp)"); |
|
+} else { |
|
+ vlr ($xt0,$xc0); |
|
+ ld ("%f8","$stdframe+8*0($sp)"); |
|
+ ld ("%f9","$stdframe+8*1($sp)"); |
|
+ ld ("%f10","$stdframe+8*2($sp)"); |
|
+ ld ("%f11","$stdframe+8*3($sp)"); |
|
+ vlr ($xt1,$xd0); |
|
+ ld ("%f12","$stdframe+8*4($sp)"); |
|
+ ld ("%f13","$stdframe+8*5($sp)"); |
|
+ ld ("%f14","$stdframe+8*6($sp)"); |
|
+ ld ("%f15","$stdframe+8*7($sp)"); |
|
+ |
|
+ vst ($xa0,"$stdframe+0x00($sp)"); |
|
+ vst ($xb0,"$stdframe+0x10($sp)"); |
|
+ vst ($xt0,"$stdframe+0x20($sp)"); |
|
+ vst ($xt1,"$stdframe+0x30($sp)"); |
|
+} |
|
+ lghi ("%r1",0); |
|
+ |
|
+LABEL (".Loop_tail_4x"); |
|
+ llgc ("%r5","0(%r1,$inp)"); |
|
+ llgc ("%r6","$stdframe(%r1,$sp)"); |
|
+ xr ("%r6","%r5"); |
|
+ stc ("%r6","0(%r1,$out)"); |
|
+ la ("%r1","1(%r1)"); |
|
+ brct ($len,".Loop_tail_4x"); |
|
+ |
|
+&{$z? \&lmg:\&lm} ("%r6","%r7","$FRAME+6*$SIZE_T($sp)"); |
|
+ la ($sp,"$FRAME($sp)"); |
|
+ br ("%r14"); |
|
+SIZE ("ChaCha20_ctr32_4x",".-ChaCha20_ctr32_4x"); |
|
+} |
|
+ |
|
+######################################################################## |
|
+# 6x"horizontal" layout is optimal fit for the platform in its current |
|
+# shape, more specifically for given vector instructions' latency. Well, |
|
+# computational part of 8x"vertical" would be faster, but it consumes |
|
+# all registers and dealing with that will diminish the return... |
|
+# |
|
+{ |
|
+my ($a0,$b0,$c0,$d0, $a1,$b1,$c1,$d1, |
|
+ $a2,$b2,$c2,$d2, $a3,$b3,$c3,$d3, |
|
+ $a4,$b4,$c4,$d4, $a5,$b5,$c5,$d5)=map("%v$_",(0..23)); |
|
+my @K=map("%v$_",(27,24..26)); |
|
+my ($t0,$t1,$t2,$t3)=map("%v$_",27..30); |
|
+my $beperm="%v31"; |
|
+my $FRAME=$stdframe + 4*16; |
|
+ |
|
+GLOBL ("ChaCha20_ctr32_vx"); |
|
+ALIGN (32); |
|
+LABEL ("ChaCha20_ctr32_vx"); |
|
+LABEL (".LChaCha20_ctr32_vx"); |
|
+&{$z? \&clgfi:\&clfi} ($len,256); |
|
+ jle (".LChaCha20_ctr32_4x"); |
|
+&{$z? \&stmg:\&stm} ("%r6","%r7","6*$SIZE_T($sp)"); |
|
+if (!$z) { |
|
+ std ("%f4","16*$SIZE_T+2*8($sp)"); |
|
+ std ("%f6","16*$SIZE_T+3*8($sp)"); |
|
+} |
|
+&{$z? \&lghi:\&lhi} ("%r1",-$FRAME); |
|
+ lgr ("%r0",$sp); |
|
+ la ($sp,"0(%r1,$sp)"); |
|
+&{$z? \&stg:\&st} ("%r0","0($sp)"); # back-chain |
|
+if ($z) { |
|
+ std ("%f8","$FRAME-8*8($sp)"); |
|
+ std ("%f9","$FRAME-8*7($sp)"); |
|
+ std ("%f10","$FRAME-8*6($sp)"); |
|
+ std ("%f11","$FRAME-8*5($sp)"); |
|
+ std ("%f12","$FRAME-8*4($sp)"); |
|
+ std ("%f13","$FRAME-8*3($sp)"); |
|
+ std ("%f14","$FRAME-8*2($sp)"); |
|
+ std ("%f15","$FRAME-8*1($sp)"); |
|
+} |
|
+ larl ("%r7",".Lsigma"); |
|
+ lhi ("%r0",10); |
|
+ |
|
+ vlm (@K[1],@K[2],"0($key)"); # load key |
|
+ vl (@K[3],"0($counter)"); # load counter |
|
+ |
|
+ vlm (@K[0],"$beperm","0(%r7)"); # load sigma, increments, ... |
|
+ |
|
+LABEL (".Loop_outer_vx"); |
|
+ vlr ($a0,@K[0]); |
|
+ vlr ($b0,@K[1]); |
|
+ vlr ($a1,@K[0]); |
|
+ vlr ($b1,@K[1]); |
|
+ vlr ($a2,@K[0]); |
|
+ vlr ($b2,@K[1]); |
|
+ vlr ($a3,@K[0]); |
|
+ vlr ($b3,@K[1]); |
|
+ vlr ($a4,@K[0]); |
|
+ vlr ($b4,@K[1]); |
|
+ vlr ($a5,@K[0]); |
|
+ vlr ($b5,@K[1]); |
|
+ |
|
+ vlr ($d0,@K[3]); |
|
+ vaf ($d1,@K[3],$t1); # K[3]+1 |
|
+ vaf ($d2,@K[3],$t2); # K[3]+2 |
|
+ vaf ($d3,@K[3],$t3); # K[3]+3 |
|
+ vaf ($d4,$d2,$t2); # K[3]+4 |
|
+ vaf ($d5,$d2,$t3); # K[3]+5 |
|
+ |
|
+ vlr ($c0,@K[2]); |
|
+ vlr ($c1,@K[2]); |
|
+ vlr ($c2,@K[2]); |
|
+ vlr ($c3,@K[2]); |
|
+ vlr ($c4,@K[2]); |
|
+ vlr ($c5,@K[2]); |
|
+ |
|
+ vlr ($t1,$d1); |
|
+ vlr ($t2,$d2); |
|
+ vlr ($t3,$d3); |
|
+ |
|
+ALIGN (4); |
|
+LABEL (".Loop_vx"); |
|
+ |
|
+ VX_ROUND($a0,$a1,$a2,$a3,$a4,$a5, |
|
+ $b0,$b1,$b2,$b3,$b4,$b5, |
|
+ $c0,$c1,$c2,$c3,$c4,$c5, |
|
+ $d0,$d1,$d2,$d3,$d4,$d5, |
|
+ 0); |
|
+ |
|
+ VX_ROUND($a0,$a1,$a2,$a3,$a4,$a5, |
|
+ $b0,$b1,$b2,$b3,$b4,$b5, |
|
+ $c0,$c1,$c2,$c3,$c4,$c5, |
|
+ $d0,$d1,$d2,$d3,$d4,$d5, |
|
+ 1); |
|
+ |
|
+ brct ("%r0",".Loop_vx"); |
|
+ |
|
+ vaf ($a0,$a0,@K[0]); |
|
+ vaf ($b0,$b0,@K[1]); |
|
+ vaf ($c0,$c0,@K[2]); |
|
+ vaf ($d0,$d0,@K[3]); |
|
+ vaf ($a1,$a1,@K[0]); |
|
+ vaf ($d1,$d1,$t1); # +K[3]+1 |
|
+ |
|
+ vperm ($a0,$a0,$a0,$beperm); |
|
+ vperm ($b0,$b0,$b0,$beperm); |
|
+ vperm ($c0,$c0,$c0,$beperm); |
|
+ vperm ($d0,$d0,$d0,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_vx"); |
|
+ |
|
+ vaf ($d2,$d2,$t2); # +K[3]+2 |
|
+ vaf ($d3,$d3,$t3); # +K[3]+3 |
|
+ vlm ($t0,$t3,"0($inp)"); |
|
+ |
|
+ vx ($a0,$a0,$t0); |
|
+ vx ($b0,$b0,$t1); |
|
+ vx ($c0,$c0,$t2); |
|
+ vx ($d0,$d0,$t3); |
|
+ |
|
+ vlm (@K[0],$t3,"0(%r7)"); # re-load sigma and increments |
|
+ |
|
+ vstm ($a0,$d0,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_vx"); |
|
+ |
|
+ vaf ($b1,$b1,@K[1]); |
|
+ vaf ($c1,$c1,@K[2]); |
|
+ |
|
+ vperm ($a0,$a1,$a1,$beperm); |
|
+ vperm ($b0,$b1,$b1,$beperm); |
|
+ vperm ($c0,$c1,$c1,$beperm); |
|
+ vperm ($d0,$d1,$d1,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_vx"); |
|
+ |
|
+ vlm ($a1,$d1,"0($inp)"); |
|
+ |
|
+ vx ($a0,$a0,$a1); |
|
+ vx ($b0,$b0,$b1); |
|
+ vx ($c0,$c0,$c1); |
|
+ vx ($d0,$d0,$d1); |
|
+ |
|
+ vstm ($a0,$d0,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_vx"); |
|
+ |
|
+ vaf ($a2,$a2,@K[0]); |
|
+ vaf ($b2,$b2,@K[1]); |
|
+ vaf ($c2,$c2,@K[2]); |
|
+ |
|
+ vperm ($a0,$a2,$a2,$beperm); |
|
+ vperm ($b0,$b2,$b2,$beperm); |
|
+ vperm ($c0,$c2,$c2,$beperm); |
|
+ vperm ($d0,$d2,$d2,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_vx"); |
|
+ |
|
+ vlm ($a1,$d1,"0($inp)"); |
|
+ |
|
+ vx ($a0,$a0,$a1); |
|
+ vx ($b0,$b0,$b1); |
|
+ vx ($c0,$c0,$c1); |
|
+ vx ($d0,$d0,$d1); |
|
+ |
|
+ vstm ($a0,$d0,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_vx"); |
|
+ |
|
+ vaf ($a3,$a3,@K[0]); |
|
+ vaf ($b3,$b3,@K[1]); |
|
+ vaf ($c3,$c3,@K[2]); |
|
+ vaf ($d2,@K[3],$t3); # K[3]+3 |
|
+ |
|
+ vperm ($a0,$a3,$a3,$beperm); |
|
+ vperm ($b0,$b3,$b3,$beperm); |
|
+ vperm ($c0,$c3,$c3,$beperm); |
|
+ vperm ($d0,$d3,$d3,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_vx"); |
|
+ |
|
+ vaf ($d3,$d2,$t1); # K[3]+4 |
|
+ vlm ($a1,$d1,"0($inp)"); |
|
+ |
|
+ vx ($a0,$a0,$a1); |
|
+ vx ($b0,$b0,$b1); |
|
+ vx ($c0,$c0,$c1); |
|
+ vx ($d0,$d0,$d1); |
|
+ |
|
+ vstm ($a0,$d0,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_vx"); |
|
+ |
|
+ vaf ($a4,$a4,@K[0]); |
|
+ vaf ($b4,$b4,@K[1]); |
|
+ vaf ($c4,$c4,@K[2]); |
|
+ vaf ($d4,$d4,$d3); # +K[3]+4 |
|
+ vaf ($d3,$d3,$t1); # K[3]+5 |
|
+ vaf (@K[3],$d2,$t3); # K[3]+=6 |
|
+ |
|
+ vperm ($a0,$a4,$a4,$beperm); |
|
+ vperm ($b0,$b4,$b4,$beperm); |
|
+ vperm ($c0,$c4,$c4,$beperm); |
|
+ vperm ($d0,$d4,$d4,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_vx"); |
|
+ |
|
+ vlm ($a1,$d1,"0($inp)"); |
|
+ |
|
+ vx ($a0,$a0,$a1); |
|
+ vx ($b0,$b0,$b1); |
|
+ vx ($c0,$c0,$c1); |
|
+ vx ($d0,$d0,$d1); |
|
+ |
|
+ vstm ($a0,$d0,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ je (".Ldone_vx"); |
|
+ |
|
+ vaf ($a5,$a5,@K[0]); |
|
+ vaf ($b5,$b5,@K[1]); |
|
+ vaf ($c5,$c5,@K[2]); |
|
+ vaf ($d5,$d5,$d3); # +K[3]+5 |
|
+ |
|
+ vperm ($a0,$a5,$a5,$beperm); |
|
+ vperm ($b0,$b5,$b5,$beperm); |
|
+ vperm ($c0,$c5,$c5,$beperm); |
|
+ vperm ($d0,$d5,$d5,$beperm); |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0x40); |
|
+ jl (".Ltail_vx"); |
|
+ |
|
+ vlm ($a1,$d1,"0($inp)"); |
|
+ |
|
+ vx ($a0,$a0,$a1); |
|
+ vx ($b0,$b0,$b1); |
|
+ vx ($c0,$c0,$c1); |
|
+ vx ($d0,$d0,$d1); |
|
+ |
|
+ vstm ($a0,$d0,"0($out)"); |
|
+ |
|
+ la ($inp,"0x40($inp)"); |
|
+ la ($out,"0x40($out)"); |
|
+ lhi ("%r0",10); |
|
+&{$z? \&aghi:\&ahi} ($len,-0x40); |
|
+ jne (".Loop_outer_vx"); |
|
+ |
|
+LABEL (".Ldone_vx"); |
|
+if (!$z) { |
|
+ ld ("%f4","$FRAME+16*$SIZE_T+2*8($sp)"); |
|
+ ld ("%f6","$FRAME+16*$SIZE_T+3*8($sp)"); |
|
+} else { |
|
+ ld ("%f8","$FRAME-8*8($sp)"); |
|
+ ld ("%f9","$FRAME-8*7($sp)"); |
|
+ ld ("%f10","$FRAME-8*6($sp)"); |
|
+ ld ("%f11","$FRAME-8*5($sp)"); |
|
+ ld ("%f12","$FRAME-8*4($sp)"); |
|
+ ld ("%f13","$FRAME-8*3($sp)"); |
|
+ ld ("%f14","$FRAME-8*2($sp)"); |
|
+ ld ("%f15","$FRAME-8*1($sp)"); |
|
+} |
|
+&{$z? \&lmg:\&lm} ("%r6","%r7","$FRAME+6*$SIZE_T($sp)"); |
|
+ la ($sp,"$FRAME($sp)"); |
|
+ br ("%r14"); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Ltail_vx"); |
|
+if (!$z) { |
|
+ ld ("%f4","$FRAME+16*$SIZE_T+2*8($sp)"); |
|
+ ld ("%f6","$FRAME+16*$SIZE_T+3*8($sp)"); |
|
+} else { |
|
+ ld ("%f8","$FRAME-8*8($sp)"); |
|
+ ld ("%f9","$FRAME-8*7($sp)"); |
|
+ ld ("%f10","$FRAME-8*6($sp)"); |
|
+ ld ("%f11","$FRAME-8*5($sp)"); |
|
+ ld ("%f12","$FRAME-8*4($sp)"); |
|
+ ld ("%f13","$FRAME-8*3($sp)"); |
|
+ ld ("%f14","$FRAME-8*2($sp)"); |
|
+ ld ("%f15","$FRAME-8*1($sp)"); |
|
+} |
|
+ vstm ($a0,$d0,"$stdframe($sp)"); |
|
+ lghi ("%r1",0); |
|
+ |
|
+LABEL (".Loop_tail_vx"); |
|
+ llgc ("%r5","0(%r1,$inp)"); |
|
+ llgc ("%r6","$stdframe(%r1,$sp)"); |
|
+ xr ("%r6","%r5"); |
|
+ stc ("%r6","0(%r1,$out)"); |
|
+ la ("%r1","1(%r1)"); |
|
+ brct ($len,".Loop_tail_vx"); |
|
+ |
|
+&{$z? \&lmg:\&lm} ("%r6","%r7","$FRAME+6*$SIZE_T($sp)"); |
|
+ la ($sp,"$FRAME($sp)"); |
|
+ br ("%r14"); |
|
+SIZE ("ChaCha20_ctr32_vx",".-ChaCha20_ctr32_vx"); |
|
+} |
|
+################ |
|
+ |
|
+ALIGN (32); |
|
+LABEL (".Lsigma"); |
|
+LONG (0x61707865,0x3320646e,0x79622d32,0x6b206574); # endian-neutral sigma |
|
+LONG (1,0,0,0); |
|
+LONG (2,0,0,0); |
|
+LONG (3,0,0,0); |
|
+LONG (0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c); # byte swap |
|
+ |
|
+LONG (0,1,2,3); |
|
+LONG (0x61707865,0x61707865,0x61707865,0x61707865); # smashed sigma |
|
+LONG (0x3320646e,0x3320646e,0x3320646e,0x3320646e); |
|
+LONG (0x79622d32,0x79622d32,0x79622d32,0x79622d32); |
|
+LONG (0x6b206574,0x6b206574,0x6b206574,0x6b206574); |
|
+ |
|
+ASCIZ ("\"ChaCha20 for s390x, CRYPTOGAMS by <appro\@openssl.org>\""); |
|
+ALIGN (4); |
|
+ |
|
+PERLASM_END(); |
|
diff -up openssl-1.1.1e/crypto/perlasm/s390x.pm.s390x-update openssl-1.1.1e/crypto/perlasm/s390x.pm |
|
--- openssl-1.1.1e/crypto/perlasm/s390x.pm.s390x-update 2020-03-19 16:20:22.039227394 +0100 |
|
+++ openssl-1.1.1e/crypto/perlasm/s390x.pm 2020-03-19 16:20:22.039227394 +0100 |
|
@@ -0,0 +1,3060 @@ |
|
+#!/usr/bin/env perl |
|
+# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved. |
|
+# |
|
+# Licensed under the OpenSSL license (the "License"). You may not use |
|
+# this file except in compliance with the License. You can obtain a copy |
|
+# in the file LICENSE in the source distribution or at |
|
+# https://www.openssl.org/source/license.html |
|
+ |
|
+# Copyright IBM Corp. 2018 |
|
+# Author: Patrick Steuer <patrick.steuer@de.ibm.com> |
|
+ |
|
+package perlasm::s390x; |
|
+ |
|
+use strict; |
|
+use warnings; |
|
+use Carp qw(confess); |
|
+use Exporter qw(import); |
|
+ |
|
+our @EXPORT=qw(PERLASM_BEGIN PERLASM_END); |
|
+our @EXPORT_OK=qw(AUTOLOAD LABEL INCLUDE stfle); |
|
+our %EXPORT_TAGS=( |
|
+ MSA => [qw(kmac km kmc kimd klmd)], |
|
+ MSA4 => [qw(kmf kmo pcc kmctr)], |
|
+ MSA5 => [qw(ppno prno)], |
|
+ MSA8 => [qw(kma)], |
|
+ VX => [qw(vgef vgeg vgbm vzero vone vgm vgmb vgmh vgmf vgmg |
|
+ vl vlr vlrep vlrepb vlreph vlrepf vlrepg vleb vleh vlef vleg vleib |
|
+ vleih vleif vleig vlgv vlgvb vlgvh vlgvf vlgvg vllez vllezb vllezh |
|
+ vllezf vllezg vlm vlbb vlvg vlvgb vlvgh vlvgf vlvgg vlvgp |
|
+ vll vmrh vmrhb vmrhh vmrhf vmrhg vmrl vmrlb vmrlh vmrlf vmrlg vpk |
|
+ vpkh vpkf vpkg vpks vpksh vpksf vpksg vpkshs vpksfs vpksgs vpkls |
|
+ vpklsh vpklsf vpklsg vpklshs vpklsfs vpklsgs vperm vpdi vrep vrepb |
|
+ vreph vrepf vrepg vrepi vrepib vrepih vrepif vrepig vscef vsceg |
|
+ vsel vseg vsegb vsegh vsegf vst vsteb vsteh vstef vsteg vstm vstl |
|
+ vuph vuphb vuphh vuphf vuplh vuplhb vuplhh vuplhf vupl vuplb vuplhw |
|
+ vuplf vupll vupllb vupllh vupllf va vab vah vaf vag vaq vacc vaccb |
|
+ vacch vaccf vaccg vaccq vac vacq vaccc vacccq vn vnc vavg vavgb |
|
+ vavgh vavgf vavgg vavgl vavglb vavglh vavglf vavglg vcksm vec_ vecb |
|
+ vech vecf vecg vecl veclb veclh veclf veclg vceq vceqb vceqh vceqf |
|
+ vceqg vceqbs vceqhs vceqfs vceqgs vch vchb vchh vchf vchg vchbs |
|
+ vchhs vchfs vchgs vchl vchlb vchlh vchlf vchlg vchlbs vchlhs vchlfs |
|
+ vchlgs vclz vclzb vclzh vclzf vclzg vctz vctzb vctzh vctzf vctzg |
|
+ vx vgfm vgfmb vgfmh vgfmf vgfmg vgfma vgfmab vgfmah vgfmaf vgfmag |
|
+ vlc vlcb vlch vlcf vlcg vlp vlpb vlph vlpf vlpg vmx vmxb vmxh vmxf |
|
+ vmxg vmxl vmxlb vmxlh vmxlf vmxlg vmn vmnb vmnh vmnf vmng vmnl |
|
+ vmnlb vmnlh vmnlf vmnlg vmal vmalb vmalhw vmalf vmah vmahb vmahh |
|
+ vmahf vmalh vmalhb vmalhh vmalhf vmae vmaeb vmaeh vmaef vmale |
|
+ vmaleb vmaleh vmalef vmao vmaob vmaoh vmaof vmalo vmalob vmaloh |
|
+ vmalof vmh vmhb vmhh vmhf vmlh vmlhb vmlhh vmlhf vml vmlb vmlhw |
|
+ vmlf vme vmeb vmeh vmef vmle vmleb vmleh vmlef vmo vmob vmoh vmof |
|
+ vmlo vmlob vmloh vmlof vno vnot vo vpopct verllv verllvb verllvh |
|
+ verllvf verllvg verll verllb verllh verllf verllg verim verimb |
|
+ verimh verimf verimg veslv veslvb veslvh veslvf veslvg vesl veslb |
|
+ veslh veslf veslg vesrav vesravb vesravh vesravf vesravg vesra |
|
+ vesrab vesrah vesraf vesrag vesrlv vesrlvb vesrlvh vesrlvf vesrlvg |
|
+ vesrl vesrlb vesrlh vesrlf vesrlg vsl vslb vsldb vsra vsrab vsrl |
|
+ vsrlb vs vsb vsh vsf vsg vsq vscbi vscbib vscbih vscbif vscbig |
|
+ vscbiq vsbi vsbiq vsbcbi vsbcbiq vsumg vsumgh vsumgf vsumq vsumqf |
|
+ vsumqg vsum vsumb vsumh vtm vfae vfaeb vfaeh vfaef vfaebs vfaehs |
|
+ vfaefs vfaezb vfaezh vfaezf vfaezbs vfaezhs vfaezfs vfee vfeeb |
|
+ vfeeh vfeef vfeebs vfeehs vfeefs vfeezb vfeezh vfeezf vfeezbs |
|
+ vfeezhs vfeezfs vfene vfeneb vfeneh vfenef vfenebs vfenehs vfenefs |
|
+ vfenezb vfenezh vfenezf vfenezbs vfenezhs vfenezfs vistr vistrb |
|
+ vistrh vistrf vistrbs vistrhs vistrfs vstrc vstrcb vstrch vstrcf |
|
+ vstrcbs vstrchs vstrcfs vstrczb vstrczh vstrczf vstrczbs vstrczhs |
|
+ vstrczfs vfa vfadb wfadb wfc wfcdb wfk wfkdb vfce vfcedb wfcedb |
|
+ vfcedbs wfcedbs vfch vfchdb wfchdb vfchdbs wfchdbs vfche vfchedb |
|
+ wfchedb vfchedbs wfchedbs vcdg vcdgb wcdgb vcdlg vcdlgb wcdlgb vcgd |
|
+ vcgdb wcgdb vclgd vclgdb wclgdb vfd vfddb wfddb vfi vfidb wfidb |
|
+ vlde vldeb wldeb vled vledb wledb vfm vfmdb wfmdb vfma vfmadb |
|
+ wfmadb vfms vfmsdb wfmsdb vfpso vfpsodb wfpsodb vflcdb wflcdb |
|
+ vflndb wflndb vflpdb wflpdb vfsq vfsqdb wfsqdb vfs vfsdb wfsdb |
|
+ vftci vftcidb wftcidb)], |
|
+ VXE => [qw(vbperm vllezlf vmsl vmslg vnx vnn voc vpopctb vpopcth |
|
+ vpopctf vpopctg vfasb wfasb wfaxb wfcsb wfcxb wfksb wfkxb vfcesb |
|
+ vfcesbs wfcesb wfcesbs wfcexb wfcexbs vfchsb vfchsbs wfchsb wfchsbs |
|
+ wfchxb wfchxbs vfchesb vfchesbs wfchesb wfchesbs wfchexb wfchexbs |
|
+ vfdsb wfdsb wfdxb vfisb wfisb wfixb vfll vflls wflls wflld vflr |
|
+ vflrd wflrd wflrx vfmax vfmaxsb vfmaxdb wfmaxsb wfmaxdb wfmaxxb |
|
+ vfmin vfminsb vfmindb wfminsb wfmindb wfminxb vfmsb wfmsb wfmxb |
|
+ vfnma vfnms vfmasb wfmasb wfmaxb vfmssb wfmssb wfmsxb vfnmasb |
|
+ vfnmadb wfnmasb wfnmadb wfnmaxb vfnmssb vfnmsdb wfnmssb wfnmsdb |
|
+ wfnmsxb vfpsosb wfpsosb vflcsb wflcsb vflnsb wflnsb vflpsb wflpsb |
|
+ vfpsoxb wfpsoxb vflcxb wflcxb vflnxb wflnxb vflpxb wflpxb vfsqsb |
|
+ wfsqsb wfsqxb vfssb wfssb wfsxb vftcisb wftcisb wftcixb)], |
|
+ VXD => [qw(vlrlr vlrl vstrlr vstrl vap vcp vcvb vcvbg vcvd vcvdg vdp |
|
+ vlip vmp vmsp vpkz vpsop vrp vsdp vsrp vsp vtp vupkz)], |
|
+); |
|
+Exporter::export_ok_tags(qw(MSA MSA4 MSA5 MSA8 VX VXE VXD)); |
|
+ |
|
+our $AUTOLOAD; |
|
+ |
|
+my $GR='(?:%r)?([0-9]|1[0-5])'; |
|
+my $VR='(?:%v)?([0-9]|1[0-9]|2[0-9]|3[0-1])'; |
|
+ |
|
+my ($file,$out); |
|
+ |
|
+sub PERLASM_BEGIN |
|
+{ |
|
+ ($file,$out)=(shift,""); |
|
+} |
|
+sub PERLASM_END |
|
+{ |
|
+ if (defined($file)) { |
|
+ open(my $fd,'>',$file)||die("can't open $file: $!"); |
|
+ print({$fd}$out); |
|
+ close($fd); |
|
+ } else { |
|
+ print($out); |
|
+ } |
|
+} |
|
+ |
|
+sub AUTOLOAD { |
|
+ confess(err("PARSE")) if (grep(!defined($_),@_)); |
|
+ my $token; |
|
+ for ($AUTOLOAD) { |
|
+ $token=".$1" if (/^.*::([A-Z_]+)$/); # uppercase: directive |
|
+ $token="\t$1" if (/^.*::([a-z]+)$/); # lowercase: mnemonic |
|
+ confess(err("PARSE")) if (!defined($token)); |
|
+ } |
|
+ $token.="\t" if ($#_>=0); |
|
+ $out.=$token.join(',',@_)."\n"; |
|
+} |
|
+ |
|
+sub LABEL { # label directive |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my ($label)=@_; |
|
+ $out.="$label:\n"; |
|
+} |
|
+ |
|
+sub INCLUDE { |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my ($file)=@_; |
|
+ $out.="#include \"$file\"\n"; |
|
+} |
|
+ |
|
+# |
|
+# Mnemonics |
|
+# |
|
+ |
|
+sub stfle { |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ S(0xb2b0,@_); |
|
+} |
|
+ |
|
+# MSA |
|
+ |
|
+sub kmac { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb91e,@_); |
|
+} |
|
+ |
|
+sub km { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb92e,@_); |
|
+} |
|
+ |
|
+sub kmc { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb92f,@_); |
|
+} |
|
+ |
|
+sub kimd { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb93e,@_); |
|
+} |
|
+ |
|
+sub klmd { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb93f,@_); |
|
+} |
|
+ |
|
+# MSA4 |
|
+ |
|
+sub kmf { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb92a,@_); |
|
+} |
|
+ |
|
+sub kmo { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb92b,@_); |
|
+} |
|
+ |
|
+sub pcc { |
|
+ confess(err("ARGNUM")) if ($#_!=-1); |
|
+ RRE(0xb92c,@_); |
|
+} |
|
+ |
|
+sub kmctr { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ RRFb(0xb92d,@_); |
|
+} |
|
+ |
|
+# MSA5 |
|
+ |
|
+sub prno { |
|
+ ppno(@_); |
|
+} |
|
+ |
|
+sub ppno { # deprecated, use prno |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ RRE(0xb93c,@_); |
|
+} |
|
+ |
|
+# MSA8 |
|
+ |
|
+sub kma { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ RRFb(0xb929,@_); |
|
+} |
|
+ |
|
+# VX - Support Instructions |
|
+ |
|
+sub vgef { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRV(0xe713,@_); |
|
+} |
|
+sub vgeg { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRV(0xe712,@_); |
|
+} |
|
+ |
|
+sub vgbm { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ VRIa(0xe744,@_); |
|
+} |
|
+sub vzero { |
|
+ vgbm(@_,0); |
|
+} |
|
+sub vone { |
|
+ vgbm(@_,0xffff); |
|
+} |
|
+ |
|
+sub vgm { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRIb(0xe746,@_); |
|
+} |
|
+sub vgmb { |
|
+ vgm(@_,0); |
|
+} |
|
+sub vgmh { |
|
+ vgm(@_,1); |
|
+} |
|
+sub vgmf { |
|
+ vgm(@_,2); |
|
+} |
|
+sub vgmg { |
|
+ vgm(@_,3); |
|
+} |
|
+ |
|
+sub vl { |
|
+ confess(err("ARGNUM")) if ($#_<1||$#_>2); |
|
+ VRX(0xe706,@_); |
|
+} |
|
+ |
|
+sub vlr { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ VRRa(0xe756,@_); |
|
+} |
|
+ |
|
+sub vlrep { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe705,@_); |
|
+} |
|
+sub vlrepb { |
|
+ vlrep(@_,0); |
|
+} |
|
+sub vlreph { |
|
+ vlrep(@_,1); |
|
+} |
|
+sub vlrepf { |
|
+ vlrep(@_,2); |
|
+} |
|
+sub vlrepg { |
|
+ vlrep(@_,3); |
|
+} |
|
+ |
|
+sub vleb { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe700,@_); |
|
+} |
|
+sub vleh { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe701,@_); |
|
+} |
|
+sub vlef { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe703,@_); |
|
+} |
|
+sub vleg { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe702,@_); |
|
+} |
|
+ |
|
+sub vleib { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRIa(0xe740,@_); |
|
+} |
|
+sub vleih { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRIa(0xe741,@_); |
|
+} |
|
+sub vleif { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRIa(0xe743,@_); |
|
+} |
|
+sub vleig { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRIa(0xe742,@_); |
|
+} |
|
+ |
|
+sub vlgv { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRSc(0xe721,@_); |
|
+} |
|
+sub vlgvb { |
|
+ vlgv(@_,0); |
|
+} |
|
+sub vlgvh { |
|
+ vlgv(@_,1); |
|
+} |
|
+sub vlgvf { |
|
+ vlgv(@_,2); |
|
+} |
|
+sub vlgvg { |
|
+ vlgv(@_,3); |
|
+} |
|
+ |
|
+sub vllez { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe704,@_); |
|
+} |
|
+sub vllezb { |
|
+ vllez(@_,0); |
|
+} |
|
+sub vllezh { |
|
+ vllez(@_,1); |
|
+} |
|
+sub vllezf { |
|
+ vllez(@_,2); |
|
+} |
|
+sub vllezg { |
|
+ vllez(@_,3); |
|
+} |
|
+ |
|
+sub vlm { |
|
+ confess(err("ARGNUM")) if ($#_<2||$#_>3); |
|
+ VRSa(0xe736,@_); |
|
+} |
|
+ |
|
+sub vlbb { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe707,@_); |
|
+} |
|
+ |
|
+sub vlvg { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRSb(0xe722,@_); |
|
+} |
|
+sub vlvgb { |
|
+ vlvg(@_,0); |
|
+} |
|
+sub vlvgh { |
|
+ vlvg(@_,1); |
|
+} |
|
+sub vlvgf { |
|
+ vlvg(@_,2); |
|
+} |
|
+sub vlvgg { |
|
+ vlvg(@_,3); |
|
+} |
|
+ |
|
+sub vlvgp { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRf(0xe762,@_); |
|
+} |
|
+ |
|
+sub vll { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRSb(0xe737,@_); |
|
+} |
|
+ |
|
+sub vmrh { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe761,@_); |
|
+} |
|
+sub vmrhb { |
|
+ vmrh(@_,0); |
|
+} |
|
+sub vmrhh { |
|
+ vmrh(@_,1); |
|
+} |
|
+sub vmrhf { |
|
+ vmrh(@_,2); |
|
+} |
|
+sub vmrhg { |
|
+ vmrh(@_,3); |
|
+} |
|
+ |
|
+sub vmrl { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe760,@_); |
|
+} |
|
+sub vmrlb { |
|
+ vmrl(@_,0); |
|
+} |
|
+sub vmrlh { |
|
+ vmrl(@_,1); |
|
+} |
|
+sub vmrlf { |
|
+ vmrl(@_,2); |
|
+} |
|
+sub vmrlg { |
|
+ vmrl(@_,3); |
|
+} |
|
+ |
|
+sub vpk { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe794,@_); |
|
+} |
|
+sub vpkh { |
|
+ vpk(@_,1); |
|
+} |
|
+sub vpkf { |
|
+ vpk(@_,2); |
|
+} |
|
+sub vpkg { |
|
+ vpk(@_,3); |
|
+} |
|
+ |
|
+sub vpks { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRb(0xe797,@_); |
|
+} |
|
+sub vpksh { |
|
+ vpks(@_,1,0); |
|
+} |
|
+sub vpksf { |
|
+ vpks(@_,2,0); |
|
+} |
|
+sub vpksg { |
|
+ vpks(@_,3,0); |
|
+} |
|
+sub vpkshs { |
|
+ vpks(@_,1,1); |
|
+} |
|
+sub vpksfs { |
|
+ vpks(@_,2,1); |
|
+} |
|
+sub vpksgs { |
|
+ vpks(@_,3,1); |
|
+} |
|
+ |
|
+sub vpkls { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRb(0xe795,@_); |
|
+} |
|
+sub vpklsh { |
|
+ vpkls(@_,1,0); |
|
+} |
|
+sub vpklsf { |
|
+ vpkls(@_,2,0); |
|
+} |
|
+sub vpklsg { |
|
+ vpkls(@_,3,0); |
|
+} |
|
+sub vpklshs { |
|
+ vpkls(@_,1,1); |
|
+} |
|
+sub vpklsfs { |
|
+ vpkls(@_,2,1); |
|
+} |
|
+sub vpklsgs { |
|
+ vpkls(@_,3,1); |
|
+} |
|
+ |
|
+sub vperm { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRe(0xe78c,@_); |
|
+} |
|
+ |
|
+sub vpdi { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe784,@_); |
|
+} |
|
+ |
|
+sub vrep { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRIc(0xe74d,@_); |
|
+} |
|
+sub vrepb { |
|
+ vrep(@_,0); |
|
+} |
|
+sub vreph { |
|
+ vrep(@_,1); |
|
+} |
|
+sub vrepf { |
|
+ vrep(@_,2); |
|
+} |
|
+sub vrepg { |
|
+ vrep(@_,3); |
|
+} |
|
+ |
|
+sub vrepi { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRIa(0xe745,@_); |
|
+} |
|
+sub vrepib { |
|
+ vrepi(@_,0); |
|
+} |
|
+sub vrepih { |
|
+ vrepi(@_,1); |
|
+} |
|
+sub vrepif { |
|
+ vrepi(@_,2); |
|
+} |
|
+sub vrepig { |
|
+ vrepi(@_,3); |
|
+} |
|
+ |
|
+sub vscef { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRV(0xe71b,@_); |
|
+} |
|
+sub vsceg { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRV(0xe71a,@_); |
|
+} |
|
+ |
|
+sub vsel { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRe(0xe78d,@_); |
|
+} |
|
+ |
|
+sub vseg { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe75f,@_); |
|
+} |
|
+sub vsegb { |
|
+ vseg(@_,0); |
|
+} |
|
+sub vsegh { |
|
+ vseg(@_,1); |
|
+} |
|
+sub vsegf { |
|
+ vseg(@_,2); |
|
+} |
|
+ |
|
+sub vst { |
|
+ confess(err("ARGNUM")) if ($#_<1||$#_>2); |
|
+ VRX(0xe70e,@_); |
|
+} |
|
+ |
|
+sub vsteb { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe708,@_); |
|
+} |
|
+sub vsteh { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe709,@_); |
|
+} |
|
+sub vstef { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe70b,@_); |
|
+} |
|
+sub vsteg { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRX(0xe70a,@_); |
|
+} |
|
+ |
|
+sub vstm { |
|
+ confess(err("ARGNUM")) if ($#_<2||$#_>3); |
|
+ VRSa(0xe73e,@_); |
|
+} |
|
+ |
|
+sub vstl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRSb(0xe73f,@_); |
|
+} |
|
+ |
|
+sub vuph { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7d7,@_); |
|
+} |
|
+sub vuphb { |
|
+ vuph(@_,0); |
|
+} |
|
+sub vuphh { |
|
+ vuph(@_,1); |
|
+} |
|
+sub vuphf { |
|
+ vuph(@_,2); |
|
+} |
|
+ |
|
+sub vuplh { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7d5,@_); |
|
+} |
|
+sub vuplhb { |
|
+ vuplh(@_,0); |
|
+} |
|
+sub vuplhh { |
|
+ vuplh(@_,1); |
|
+} |
|
+sub vuplhf { |
|
+ vuplh(@_,2); |
|
+} |
|
+ |
|
+sub vupl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7d6,@_); |
|
+} |
|
+sub vuplb { |
|
+ vupl(@_,0); |
|
+} |
|
+sub vuplhw { |
|
+ vupl(@_,1); |
|
+} |
|
+sub vuplf { |
|
+ vupl(@_,2); |
|
+} |
|
+ |
|
+sub vupll { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7d4,@_); |
|
+} |
|
+sub vupllb { |
|
+ vupll(@_,0); |
|
+} |
|
+sub vupllh { |
|
+ vupll(@_,1); |
|
+} |
|
+sub vupllf { |
|
+ vupll(@_,2); |
|
+} |
|
+ |
|
+# VX - Integer Instructions |
|
+ |
|
+sub va { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7f3,@_); |
|
+} |
|
+sub vab { |
|
+ va(@_,0); |
|
+} |
|
+sub vah { |
|
+ va(@_,1); |
|
+} |
|
+sub vaf { |
|
+ va(@_,2); |
|
+} |
|
+sub vag { |
|
+ va(@_,3); |
|
+} |
|
+sub vaq { |
|
+ va(@_,4); |
|
+} |
|
+ |
|
+sub vacc { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7f1,@_); |
|
+} |
|
+sub vaccb { |
|
+ vacc(@_,0); |
|
+} |
|
+sub vacch { |
|
+ vacc(@_,1); |
|
+} |
|
+sub vaccf { |
|
+ vacc(@_,2); |
|
+} |
|
+sub vaccg { |
|
+ vacc(@_,3); |
|
+} |
|
+sub vaccq { |
|
+ vacc(@_,4); |
|
+} |
|
+ |
|
+sub vac { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7bb,@_); |
|
+} |
|
+sub vacq { |
|
+ vac(@_,4); |
|
+} |
|
+ |
|
+sub vaccc { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7b9,@_); |
|
+} |
|
+sub vacccq { |
|
+ vaccc(@_,4); |
|
+} |
|
+ |
|
+sub vn { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe768,@_); |
|
+} |
|
+ |
|
+sub vnc { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe769,@_); |
|
+} |
|
+ |
|
+sub vavg { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7f2,@_); |
|
+} |
|
+sub vavgb { |
|
+ vavg(@_,0); |
|
+} |
|
+sub vavgh { |
|
+ vavg(@_,1); |
|
+} |
|
+sub vavgf { |
|
+ vavg(@_,2); |
|
+} |
|
+sub vavgg { |
|
+ vavg(@_,3); |
|
+} |
|
+ |
|
+sub vavgl { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7f0,@_); |
|
+} |
|
+sub vavglb { |
|
+ vavgl(@_,0); |
|
+} |
|
+sub vavglh { |
|
+ vavgl(@_,1); |
|
+} |
|
+sub vavglf { |
|
+ vavgl(@_,2); |
|
+} |
|
+sub vavglg { |
|
+ vavgl(@_,3); |
|
+} |
|
+ |
|
+sub vcksm { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe766,@_); |
|
+} |
|
+ |
|
+sub vec_ { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7db,@_); |
|
+} |
|
+sub vecb { |
|
+ vec_(@_,0); |
|
+} |
|
+sub vech { |
|
+ vec_(@_,1); |
|
+} |
|
+sub vecf { |
|
+ vec_(@_,2); |
|
+} |
|
+sub vecg { |
|
+ vec_(@_,3); |
|
+} |
|
+ |
|
+sub vecl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7d9,@_); |
|
+} |
|
+sub veclb { |
|
+ vecl(@_,0); |
|
+} |
|
+sub veclh { |
|
+ vecl(@_,1); |
|
+} |
|
+sub veclf { |
|
+ vecl(@_,2); |
|
+} |
|
+sub veclg { |
|
+ vecl(@_,3); |
|
+} |
|
+ |
|
+sub vceq { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRb(0xe7f8,@_); |
|
+} |
|
+sub vceqb { |
|
+ vceq(@_,0,0); |
|
+} |
|
+sub vceqh { |
|
+ vceq(@_,1,0); |
|
+} |
|
+sub vceqf { |
|
+ vceq(@_,2,0); |
|
+} |
|
+sub vceqg { |
|
+ vceq(@_,3,0); |
|
+} |
|
+sub vceqbs { |
|
+ vceq(@_,0,1); |
|
+} |
|
+sub vceqhs { |
|
+ vceq(@_,1,1); |
|
+} |
|
+sub vceqfs { |
|
+ vceq(@_,2,1); |
|
+} |
|
+sub vceqgs { |
|
+ vceq(@_,3,1); |
|
+} |
|
+ |
|
+sub vch { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRb(0xe7fb,@_); |
|
+} |
|
+sub vchb { |
|
+ vch(@_,0,0); |
|
+} |
|
+sub vchh { |
|
+ vch(@_,1,0); |
|
+} |
|
+sub vchf { |
|
+ vch(@_,2,0); |
|
+} |
|
+sub vchg { |
|
+ vch(@_,3,0); |
|
+} |
|
+sub vchbs { |
|
+ vch(@_,0,1); |
|
+} |
|
+sub vchhs { |
|
+ vch(@_,1,1); |
|
+} |
|
+sub vchfs { |
|
+ vch(@_,2,1); |
|
+} |
|
+sub vchgs { |
|
+ vch(@_,3,1); |
|
+} |
|
+ |
|
+sub vchl { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRb(0xe7f9,@_); |
|
+} |
|
+sub vchlb { |
|
+ vchl(@_,0,0); |
|
+} |
|
+sub vchlh { |
|
+ vchl(@_,1,0); |
|
+} |
|
+sub vchlf { |
|
+ vchl(@_,2,0); |
|
+} |
|
+sub vchlg { |
|
+ vchl(@_,3,0); |
|
+} |
|
+sub vchlbs { |
|
+ vchl(@_,0,1); |
|
+} |
|
+sub vchlhs { |
|
+ vchl(@_,1,1); |
|
+} |
|
+sub vchlfs { |
|
+ vchl(@_,2,1); |
|
+} |
|
+sub vchlgs { |
|
+ vchl(@_,3,1); |
|
+} |
|
+ |
|
+sub vclz { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe753,@_); |
|
+} |
|
+sub vclzb { |
|
+ vclz(@_,0); |
|
+} |
|
+sub vclzh { |
|
+ vclz(@_,1); |
|
+} |
|
+sub vclzf { |
|
+ vclz(@_,2); |
|
+} |
|
+sub vclzg { |
|
+ vclz(@_,3); |
|
+} |
|
+ |
|
+sub vctz { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe752,@_); |
|
+} |
|
+sub vctzb { |
|
+ vctz(@_,0); |
|
+} |
|
+sub vctzh { |
|
+ vctz(@_,1); |
|
+} |
|
+sub vctzf { |
|
+ vctz(@_,2); |
|
+} |
|
+sub vctzg { |
|
+ vctz(@_,3); |
|
+} |
|
+ |
|
+sub vx { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe76d,@_); |
|
+} |
|
+ |
|
+sub vgfm { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7b4,@_); |
|
+} |
|
+sub vgfmb { |
|
+ vgfm(@_,0); |
|
+} |
|
+sub vgfmh { |
|
+ vgfm(@_,1); |
|
+} |
|
+sub vgfmf { |
|
+ vgfm(@_,2); |
|
+} |
|
+sub vgfmg { |
|
+ vgfm(@_,3); |
|
+} |
|
+ |
|
+sub vgfma { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7bc,@_); |
|
+} |
|
+sub vgfmab { |
|
+ vgfma(@_,0); |
|
+} |
|
+sub vgfmah { |
|
+ vgfma(@_,1); |
|
+} |
|
+sub vgfmaf { |
|
+ vgfma(@_,2); |
|
+} |
|
+sub vgfmag { |
|
+ vgfma(@_,3); |
|
+} |
|
+ |
|
+sub vlc { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7de,@_); |
|
+} |
|
+sub vlcb { |
|
+ vlc(@_,0); |
|
+} |
|
+sub vlch { |
|
+ vlc(@_,1); |
|
+} |
|
+sub vlcf { |
|
+ vlc(@_,2); |
|
+} |
|
+sub vlcg { |
|
+ vlc(@_,3); |
|
+} |
|
+ |
|
+sub vlp { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe7df,@_); |
|
+} |
|
+sub vlpb { |
|
+ vlp(@_,0); |
|
+} |
|
+sub vlph { |
|
+ vlp(@_,1); |
|
+} |
|
+sub vlpf { |
|
+ vlp(@_,2); |
|
+} |
|
+sub vlpg { |
|
+ vlp(@_,3); |
|
+} |
|
+ |
|
+sub vmx { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7ff,@_); |
|
+} |
|
+sub vmxb { |
|
+ vmx(@_,0); |
|
+} |
|
+sub vmxh { |
|
+ vmx(@_,1); |
|
+} |
|
+sub vmxf { |
|
+ vmx(@_,2); |
|
+} |
|
+sub vmxg { |
|
+ vmx(@_,3); |
|
+} |
|
+ |
|
+sub vmxl { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7fd,@_); |
|
+} |
|
+sub vmxlb { |
|
+ vmxl(@_,0); |
|
+} |
|
+sub vmxlh { |
|
+ vmxl(@_,1); |
|
+} |
|
+sub vmxlf { |
|
+ vmxl(@_,2); |
|
+} |
|
+sub vmxlg { |
|
+ vmxl(@_,3); |
|
+} |
|
+ |
|
+sub vmn { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7fe,@_); |
|
+} |
|
+sub vmnb { |
|
+ vmn(@_,0); |
|
+} |
|
+sub vmnh { |
|
+ vmn(@_,1); |
|
+} |
|
+sub vmnf { |
|
+ vmn(@_,2); |
|
+} |
|
+sub vmng { |
|
+ vmn(@_,3); |
|
+} |
|
+ |
|
+sub vmnl { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7fc,@_); |
|
+} |
|
+sub vmnlb { |
|
+ vmnl(@_,0); |
|
+} |
|
+sub vmnlh { |
|
+ vmnl(@_,1); |
|
+} |
|
+sub vmnlf { |
|
+ vmnl(@_,2); |
|
+} |
|
+sub vmnlg { |
|
+ vmnl(@_,3); |
|
+} |
|
+ |
|
+sub vmal { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7aa,@_); |
|
+} |
|
+sub vmalb { |
|
+ vmal(@_,0); |
|
+} |
|
+sub vmalhw { |
|
+ vmal(@_,1); |
|
+} |
|
+sub vmalf { |
|
+ vmal(@_,2); |
|
+} |
|
+ |
|
+sub vmah { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7ab,@_); |
|
+} |
|
+sub vmahb { |
|
+ vmah(@_,0); |
|
+} |
|
+sub vmahh { |
|
+ vmah(@_,1); |
|
+} |
|
+sub vmahf { |
|
+ vmah(@_,2); |
|
+} |
|
+ |
|
+sub vmalh { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7a9,@_); |
|
+} |
|
+sub vmalhb { |
|
+ vmalh(@_,0); |
|
+} |
|
+sub vmalhh { |
|
+ vmalh(@_,1); |
|
+} |
|
+sub vmalhf { |
|
+ vmalh(@_,2); |
|
+} |
|
+ |
|
+sub vmae { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7ae,@_); |
|
+} |
|
+sub vmaeb { |
|
+ vmae(@_,0); |
|
+} |
|
+sub vmaeh { |
|
+ vmae(@_,1); |
|
+} |
|
+sub vmaef { |
|
+ vmae(@_,2); |
|
+} |
|
+ |
|
+sub vmale { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7ac,@_); |
|
+} |
|
+sub vmaleb { |
|
+ vmale(@_,0); |
|
+} |
|
+sub vmaleh { |
|
+ vmale(@_,1); |
|
+} |
|
+sub vmalef { |
|
+ vmale(@_,2); |
|
+} |
|
+ |
|
+sub vmao { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7af,@_); |
|
+} |
|
+sub vmaob { |
|
+ vmao(@_,0); |
|
+} |
|
+sub vmaoh { |
|
+ vmao(@_,1); |
|
+} |
|
+sub vmaof { |
|
+ vmao(@_,2); |
|
+} |
|
+ |
|
+sub vmalo { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7ad,@_); |
|
+} |
|
+sub vmalob { |
|
+ vmalo(@_,0); |
|
+} |
|
+sub vmaloh { |
|
+ vmalo(@_,1); |
|
+} |
|
+sub vmalof { |
|
+ vmalo(@_,2); |
|
+} |
|
+ |
|
+sub vmh { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a3,@_); |
|
+} |
|
+sub vmhb { |
|
+ vmh(@_,0); |
|
+} |
|
+sub vmhh { |
|
+ vmh(@_,1); |
|
+} |
|
+sub vmhf { |
|
+ vmh(@_,2); |
|
+} |
|
+ |
|
+sub vmlh { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a1,@_); |
|
+} |
|
+sub vmlhb { |
|
+ vmlh(@_,0); |
|
+} |
|
+sub vmlhh { |
|
+ vmlh(@_,1); |
|
+} |
|
+sub vmlhf { |
|
+ vmlh(@_,2); |
|
+} |
|
+ |
|
+sub vml { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a2,@_); |
|
+} |
|
+sub vmlb { |
|
+ vml(@_,0); |
|
+} |
|
+sub vmlhw { |
|
+ vml(@_,1); |
|
+} |
|
+sub vmlf { |
|
+ vml(@_,2); |
|
+} |
|
+ |
|
+sub vme { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a6,@_); |
|
+} |
|
+sub vmeb { |
|
+ vme(@_,0); |
|
+} |
|
+sub vmeh { |
|
+ vme(@_,1); |
|
+} |
|
+sub vmef { |
|
+ vme(@_,2); |
|
+} |
|
+ |
|
+sub vmle { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a4,@_); |
|
+} |
|
+sub vmleb { |
|
+ vmle(@_,0); |
|
+} |
|
+sub vmleh { |
|
+ vmle(@_,1); |
|
+} |
|
+sub vmlef { |
|
+ vmle(@_,2); |
|
+} |
|
+ |
|
+sub vmo { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a7,@_); |
|
+} |
|
+sub vmob { |
|
+ vmo(@_,0); |
|
+} |
|
+sub vmoh { |
|
+ vmo(@_,1); |
|
+} |
|
+sub vmof { |
|
+ vmo(@_,2); |
|
+} |
|
+ |
|
+sub vmlo { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7a5,@_); |
|
+} |
|
+sub vmlob { |
|
+ vmlo(@_,0); |
|
+} |
|
+sub vmloh { |
|
+ vmlo(@_,1); |
|
+} |
|
+sub vmlof { |
|
+ vmlo(@_,2); |
|
+} |
|
+ |
|
+sub vno { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe76b,@_); |
|
+} |
|
+sub vnot { |
|
+ vno(@_,$_[1]); |
|
+} |
|
+ |
|
+sub vo { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe76a,@_); |
|
+} |
|
+ |
|
+sub vpopct { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRa(0xe750,@_); |
|
+} |
|
+ |
|
+sub verllv { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe773,@_); |
|
+} |
|
+sub verllvb { |
|
+ verllv(@_,0); |
|
+} |
|
+sub verllvh { |
|
+ verllv(@_,1); |
|
+} |
|
+sub verllvf { |
|
+ verllv(@_,2); |
|
+} |
|
+sub verllvg { |
|
+ verllv(@_,3); |
|
+} |
|
+ |
|
+sub verll { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRSa(0xe733,@_); |
|
+} |
|
+sub verllb { |
|
+ verll(@_,0); |
|
+} |
|
+sub verllh { |
|
+ verll(@_,1); |
|
+} |
|
+sub verllf { |
|
+ verll(@_,2); |
|
+} |
|
+sub verllg { |
|
+ verll(@_,3); |
|
+} |
|
+ |
|
+sub verim { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRId(0xe772,@_); |
|
+} |
|
+sub verimb { |
|
+ verim(@_,0); |
|
+} |
|
+sub verimh { |
|
+ verim(@_,1); |
|
+} |
|
+sub verimf { |
|
+ verim(@_,2); |
|
+} |
|
+sub verimg { |
|
+ verim(@_,3); |
|
+} |
|
+ |
|
+sub veslv { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe770,@_); |
|
+} |
|
+sub veslvb { |
|
+ veslv(@_,0); |
|
+} |
|
+sub veslvh { |
|
+ veslv(@_,1); |
|
+} |
|
+sub veslvf { |
|
+ veslv(@_,2); |
|
+} |
|
+sub veslvg { |
|
+ veslv(@_,3); |
|
+} |
|
+ |
|
+sub vesl { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRSa(0xe730,@_); |
|
+} |
|
+sub veslb { |
|
+ vesl(@_,0); |
|
+} |
|
+sub veslh { |
|
+ vesl(@_,1); |
|
+} |
|
+sub veslf { |
|
+ vesl(@_,2); |
|
+} |
|
+sub veslg { |
|
+ vesl(@_,3); |
|
+} |
|
+ |
|
+sub vesrav { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe77a,@_); |
|
+} |
|
+sub vesravb { |
|
+ vesrav(@_,0); |
|
+} |
|
+sub vesravh { |
|
+ vesrav(@_,1); |
|
+} |
|
+sub vesravf { |
|
+ vesrav(@_,2); |
|
+} |
|
+sub vesravg { |
|
+ vesrav(@_,3); |
|
+} |
|
+ |
|
+sub vesra { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRSa(0xe73a,@_); |
|
+} |
|
+sub vesrab { |
|
+ vesra(@_,0); |
|
+} |
|
+sub vesrah { |
|
+ vesra(@_,1); |
|
+} |
|
+sub vesraf { |
|
+ vesra(@_,2); |
|
+} |
|
+sub vesrag { |
|
+ vesra(@_,3); |
|
+} |
|
+ |
|
+sub vesrlv { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe778,@_); |
|
+} |
|
+sub vesrlvb { |
|
+ vesrlv(@_,0); |
|
+} |
|
+sub vesrlvh { |
|
+ vesrlv(@_,1); |
|
+} |
|
+sub vesrlvf { |
|
+ vesrlv(@_,2); |
|
+} |
|
+sub vesrlvg { |
|
+ vesrlv(@_,3); |
|
+} |
|
+ |
|
+sub vesrl { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRSa(0xe738,@_); |
|
+} |
|
+sub vesrlb { |
|
+ vesrl(@_,0); |
|
+} |
|
+sub vesrlh { |
|
+ vesrl(@_,1); |
|
+} |
|
+sub vesrlf { |
|
+ vesrl(@_,2); |
|
+} |
|
+sub vesrlg { |
|
+ vesrl(@_,3); |
|
+} |
|
+ |
|
+sub vsl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe774,@_); |
|
+} |
|
+ |
|
+sub vslb { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe775,@_); |
|
+} |
|
+ |
|
+sub vsldb { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRId(0xe777,@_); |
|
+} |
|
+ |
|
+sub vsra { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe77e,@_); |
|
+} |
|
+ |
|
+sub vsrab { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe77f,@_); |
|
+} |
|
+ |
|
+sub vsrl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe77c,@_); |
|
+} |
|
+ |
|
+sub vsrlb { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe77d,@_); |
|
+} |
|
+ |
|
+sub vs { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7f7,@_); |
|
+} |
|
+sub vsb { |
|
+ vs(@_,0); |
|
+} |
|
+sub vsh { |
|
+ vs(@_,1); |
|
+} |
|
+sub vsf { |
|
+ vs(@_,2); |
|
+} |
|
+sub vsg { |
|
+ vs(@_,3); |
|
+} |
|
+sub vsq { |
|
+ vs(@_,4); |
|
+} |
|
+ |
|
+sub vscbi { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe7f5,@_); |
|
+} |
|
+sub vscbib { |
|
+ vscbi(@_,0); |
|
+} |
|
+sub vscbih { |
|
+ vscbi(@_,1); |
|
+} |
|
+sub vscbif { |
|
+ vscbi(@_,2); |
|
+} |
|
+sub vscbig { |
|
+ vscbi(@_,3); |
|
+} |
|
+sub vscbiq { |
|
+ vscbi(@_,4); |
|
+} |
|
+ |
|
+sub vsbi { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7bf,@_); |
|
+} |
|
+sub vsbiq { |
|
+ vsbi(@_,4); |
|
+} |
|
+ |
|
+sub vsbcbi { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRd(0xe7bd,@_); |
|
+} |
|
+sub vsbcbiq { |
|
+ vsbcbi(@_,4); |
|
+} |
|
+ |
|
+sub vsumg { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe765,@_); |
|
+} |
|
+sub vsumgh { |
|
+ vsumg(@_,1); |
|
+} |
|
+sub vsumgf { |
|
+ vsumg(@_,2); |
|
+} |
|
+ |
|
+sub vsumq { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe767,@_); |
|
+} |
|
+sub vsumqf { |
|
+ vsumq(@_,2); |
|
+} |
|
+sub vsumqg { |
|
+ vsumq(@_,3); |
|
+} |
|
+ |
|
+sub vsum { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRc(0xe764,@_); |
|
+} |
|
+sub vsumb { |
|
+ vsum(@_,0); |
|
+} |
|
+sub vsumh { |
|
+ vsum(@_,1); |
|
+} |
|
+ |
|
+sub vtm { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ VRRa(0xe7d8,@_); |
|
+} |
|
+ |
|
+# VX - String Instructions |
|
+ |
|
+sub vfae { |
|
+ confess(err("ARGNUM")) if ($#_<3||$#_>4); |
|
+ VRRb(0xe782,@_); |
|
+} |
|
+sub vfaeb { |
|
+ vfae(@_[0..2],0,$_[3]); |
|
+} |
|
+sub vfaeh { |
|
+ vfae(@_[0..2],1,$_[3]); |
|
+} |
|
+sub vfaef { |
|
+ vfae(@_[0..2],2,$_[3]); |
|
+} |
|
+sub vfaebs { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],0,0x1|$_[3]); |
|
+} |
|
+sub vfaehs { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],1,0x1|$_[3]); |
|
+} |
|
+sub vfaefs { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],2,0x1|$_[3]); |
|
+} |
|
+sub vfaezb { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],0,0x2|$_[3]); |
|
+} |
|
+sub vfaezh { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],1,0x2|$_[3]); |
|
+} |
|
+sub vfaezf { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],2,0x2|$_[3]); |
|
+} |
|
+sub vfaezbs { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],0,0x3|$_[3]); |
|
+} |
|
+sub vfaezhs { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],1,0x3|$_[3]); |
|
+} |
|
+sub vfaezfs { |
|
+ $_[3]=0 if (!defined($_[3])); |
|
+ vfae(@_[0..2],2,0x3|$_[3]); |
|
+} |
|
+ |
|
+sub vfee { |
|
+ confess(err("ARGNUM")) if ($#_<3||$#_>4); |
|
+ VRRb(0xe780,@_); |
|
+} |
|
+sub vfeeb { |
|
+ vfee(@_[0..2],0,$_[3]); |
|
+} |
|
+sub vfeeh { |
|
+ vfee(@_[0..2],1,$_[3]); |
|
+} |
|
+sub vfeef { |
|
+ vfee(@_[0..2],2,$_[3]); |
|
+} |
|
+sub vfeebs { |
|
+ vfee(@_,0,1); |
|
+} |
|
+sub vfeehs { |
|
+ vfee(@_,1,1); |
|
+} |
|
+sub vfeefs { |
|
+ vfee(@_,2,1); |
|
+} |
|
+sub vfeezb { |
|
+ vfee(@_,0,2); |
|
+} |
|
+sub vfeezh { |
|
+ vfee(@_,1,2); |
|
+} |
|
+sub vfeezf { |
|
+ vfee(@_,2,2); |
|
+} |
|
+sub vfeezbs { |
|
+ vfee(@_,0,3); |
|
+} |
|
+sub vfeezhs { |
|
+ vfee(@_,1,3); |
|
+} |
|
+sub vfeezfs { |
|
+ vfee(@_,2,3); |
|
+} |
|
+ |
|
+sub vfene { |
|
+ confess(err("ARGNUM")) if ($#_<3||$#_>4); |
|
+ VRRb(0xe781,@_); |
|
+} |
|
+sub vfeneb { |
|
+ vfene(@_[0..2],0,$_[3]); |
|
+} |
|
+sub vfeneh { |
|
+ vfene(@_[0..2],1,$_[3]); |
|
+} |
|
+sub vfenef { |
|
+ vfene(@_[0..2],2,$_[3]); |
|
+} |
|
+sub vfenebs { |
|
+ vfene(@_,0,1); |
|
+} |
|
+sub vfenehs { |
|
+ vfene(@_,1,1); |
|
+} |
|
+sub vfenefs { |
|
+ vfene(@_,2,1); |
|
+} |
|
+sub vfenezb { |
|
+ vfene(@_,0,2); |
|
+} |
|
+sub vfenezh { |
|
+ vfene(@_,1,2); |
|
+} |
|
+sub vfenezf { |
|
+ vfene(@_,2,2); |
|
+} |
|
+sub vfenezbs { |
|
+ vfene(@_,0,3); |
|
+} |
|
+sub vfenezhs { |
|
+ vfene(@_,1,3); |
|
+} |
|
+sub vfenezfs { |
|
+ vfene(@_,2,3); |
|
+} |
|
+ |
|
+sub vistr { |
|
+ confess(err("ARGNUM")) if ($#_<2||$#_>3); |
|
+ VRRa(0xe75c,@_[0..2],0,$_[3]); |
|
+} |
|
+sub vistrb { |
|
+ vistr(@_[0..1],0,$_[2]); |
|
+} |
|
+sub vistrh { |
|
+ vistr(@_[0..1],1,$_[2]); |
|
+} |
|
+sub vistrf { |
|
+ vistr(@_[0..1],2,$_[2]); |
|
+} |
|
+sub vistrbs { |
|
+ vistr(@_,0,1); |
|
+} |
|
+sub vistrhs { |
|
+ vistr(@_,1,1); |
|
+} |
|
+sub vistrfs { |
|
+ vistr(@_,2,1); |
|
+} |
|
+ |
|
+sub vstrc { |
|
+ confess(err("ARGNUM")) if ($#_<4||$#_>5); |
|
+ VRRd(0xe78a,@_); |
|
+} |
|
+sub vstrcb { |
|
+ vstrc(@_[0..3],0,$_[4]); |
|
+} |
|
+sub vstrch { |
|
+ vstrc(@_[0..3],1,$_[4]); |
|
+} |
|
+sub vstrcf { |
|
+ vstrc(@_[0..3],2,$_[4]); |
|
+} |
|
+sub vstrcbs { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],0,0x1|$_[4]); |
|
+} |
|
+sub vstrchs { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],1,0x1|$_[4]); |
|
+} |
|
+sub vstrcfs { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],2,0x1|$_[4]); |
|
+} |
|
+sub vstrczb { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],0,0x2|$_[4]); |
|
+} |
|
+sub vstrczh { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],1,0x2|$_[4]); |
|
+} |
|
+sub vstrczf { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],2,0x2|$_[4]); |
|
+} |
|
+sub vstrczbs { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],0,0x3|$_[4]); |
|
+} |
|
+sub vstrczhs { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],1,0x3|$_[4]); |
|
+} |
|
+sub vstrczfs { |
|
+ $_[4]=0 if (!defined($_[4])); |
|
+ vstrc(@_[0..3],2,0x3|$_[4]); |
|
+} |
|
+ |
|
+# VX - Floating-point Instructions |
|
+ |
|
+sub vfa { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRc(0xe7e3,@_); |
|
+} |
|
+sub vfadb { |
|
+ vfa(@_,3,0); |
|
+} |
|
+sub wfadb { |
|
+ vfa(@_,3,8); |
|
+} |
|
+ |
|
+sub wfc { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRa(0xe7cb,@_); |
|
+} |
|
+sub wfcdb { |
|
+ wfc(@_,3,0); |
|
+} |
|
+ |
|
+sub wfk { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRa(0xe7ca,@_); |
|
+} |
|
+sub wfksb { |
|
+ wfk(@_,2,0); |
|
+} |
|
+sub wfkdb { |
|
+ wfk(@_,3,0); |
|
+} |
|
+sub wfkxb { |
|
+ wfk(@_,4,0); |
|
+} |
|
+ |
|
+sub vfce { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRc(0xe7e8,@_); |
|
+} |
|
+sub vfcedb { |
|
+ vfce(@_,3,0,0); |
|
+} |
|
+sub vfcedbs { |
|
+ vfce(@_,3,0,1); |
|
+} |
|
+sub wfcedb { |
|
+ vfce(@_,3,8,0); |
|
+} |
|
+sub wfcedbs { |
|
+ vfce(@_,3,8,1); |
|
+} |
|
+ |
|
+sub vfch { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRc(0xe7eb,@_); |
|
+} |
|
+sub vfchdb { |
|
+ vfch(@_,3,0,0); |
|
+} |
|
+sub vfchdbs { |
|
+ vfch(@_,3,0,1); |
|
+} |
|
+sub wfchdb { |
|
+ vfch(@_,3,8,0); |
|
+} |
|
+sub wfchdbs { |
|
+ vfch(@_,3,8,1); |
|
+} |
|
+ |
|
+sub vfche { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRc(0xe7ea,@_); |
|
+} |
|
+sub vfchedb { |
|
+ vfche(@_,3,0,0); |
|
+} |
|
+sub vfchedbs { |
|
+ vfche(@_,3,0,1); |
|
+} |
|
+sub wfchedb { |
|
+ vfche(@_,3,8,0); |
|
+} |
|
+sub wfchedbs { |
|
+ vfche(@_,3,8,1); |
|
+} |
|
+ |
|
+sub vcdg { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7c3,@_); |
|
+} |
|
+sub vcdgb { |
|
+ vcdg(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wcdgb { |
|
+ vcdg(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vcdlg { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7c1,@_); |
|
+} |
|
+sub vcdlgb { |
|
+ vcdlg(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wcdlgb { |
|
+ vcdlg(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vcgd { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7c2,@_); |
|
+} |
|
+sub vcgdb { |
|
+ vcgd(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wcgdb { |
|
+ vcgd(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vclgd { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7c0,@_); |
|
+} |
|
+sub vclgdb { |
|
+ vclgd(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wclgdb { |
|
+ vclgd(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vfd { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRc(0xe7e5,@_); |
|
+} |
|
+sub vfddb { |
|
+ vfd(@_,3,0); |
|
+} |
|
+sub wfddb { |
|
+ vfd(@_,3,8); |
|
+} |
|
+ |
|
+sub vfi { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7c7,@_); |
|
+} |
|
+sub vfidb { |
|
+ vfi(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wfidb { |
|
+ vfi(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vlde { # deprecated, use vfll |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRa(0xe7c4,@_); |
|
+} |
|
+sub vldeb { # deprecated, use vflls |
|
+ vlde(@_,2,0); |
|
+} |
|
+sub wldeb { # deprecated, use wflls |
|
+ vlde(@_,2,8); |
|
+} |
|
+ |
|
+sub vled { # deprecated, use vflr |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7c5,@_); |
|
+} |
|
+sub vledb { # deprecated, use vflrd |
|
+ vled(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wledb { # deprecated, use wflrd |
|
+ vled(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vfm { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRc(0xe7e7,@_); |
|
+} |
|
+sub vfmdb { |
|
+ vfm(@_,3,0); |
|
+} |
|
+sub wfmdb { |
|
+ vfm(@_,3,8); |
|
+} |
|
+ |
|
+sub vfma { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRe(0xe78f,@_); |
|
+} |
|
+sub vfmadb { |
|
+ vfma(@_,0,3); |
|
+} |
|
+sub wfmadb { |
|
+ vfma(@_,8,3); |
|
+} |
|
+ |
|
+sub vfms { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRe(0xe78e,@_); |
|
+} |
|
+sub vfmsdb { |
|
+ vfms(@_,0,3); |
|
+} |
|
+sub wfmsdb { |
|
+ vfms(@_,8,3); |
|
+} |
|
+ |
|
+sub vfpso { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRa(0xe7cc,@_); |
|
+} |
|
+sub vfpsodb { |
|
+ vfpso(@_[0..1],3,0,$_[2]); |
|
+} |
|
+sub wfpsodb { |
|
+ vfpso(@_[0..1],3,8,$_[2]); |
|
+} |
|
+sub vflcdb { |
|
+ vfpso(@_,3,0,0); |
|
+} |
|
+sub wflcdb { |
|
+ vfpso(@_,3,8,0); |
|
+} |
|
+sub vflndb { |
|
+ vfpso(@_,3,0,1); |
|
+} |
|
+sub wflndb { |
|
+ vfpso(@_,3,8,1); |
|
+} |
|
+sub vflpdb { |
|
+ vfpso(@_,3,0,2); |
|
+} |
|
+sub wflpdb { |
|
+ vfpso(@_,3,8,2); |
|
+} |
|
+ |
|
+sub vfsq { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRRa(0xe7ce,@_); |
|
+} |
|
+sub vfsqdb { |
|
+ vfsq(@_,3,0); |
|
+} |
|
+sub wfsqdb { |
|
+ vfsq(@_,3,8); |
|
+} |
|
+ |
|
+sub vfs { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRRc(0xe7e2,@_); |
|
+} |
|
+sub vfsdb { |
|
+ vfs(@_,3,0); |
|
+} |
|
+sub wfsdb { |
|
+ vfs(@_,3,8); |
|
+} |
|
+ |
|
+sub vftci { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIe(0xe74a,@_); |
|
+} |
|
+sub vftcidb { |
|
+ vftci(@_,3,0); |
|
+} |
|
+sub wftcidb { |
|
+ vftci(@_,3,8); |
|
+} |
|
+ |
|
+# VXE - Support Instructions |
|
+ |
|
+sub vbperm { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe785,@_); |
|
+} |
|
+ |
|
+sub vllezlf { |
|
+ vllez(@_,6); |
|
+} |
|
+ |
|
+# VXE - Integer Instructions |
|
+ |
|
+sub vmsl { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRd(0xe7b8,@_); |
|
+} |
|
+sub vmslg { |
|
+ vmsl(@_[0..3],3,$_[4]); |
|
+} |
|
+ |
|
+sub vnx { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe76c,@_); |
|
+} |
|
+ |
|
+sub vnn { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe76e,@_); |
|
+} |
|
+ |
|
+sub voc { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRc(0xe76f,@_); |
|
+} |
|
+ |
|
+sub vpopctb { |
|
+ vpopct(@_,0); |
|
+} |
|
+sub vpopcth { |
|
+ vpopct(@_,1); |
|
+} |
|
+sub vpopctf { |
|
+ vpopct(@_,2); |
|
+} |
|
+sub vpopctg { |
|
+ vpopct(@_,3); |
|
+} |
|
+ |
|
+# VXE - Floating-Point Instructions |
|
+ |
|
+sub vfasb { |
|
+ vfa(@_,2,0); |
|
+} |
|
+sub wfasb { |
|
+ vfa(@_,2,8); |
|
+} |
|
+sub wfaxb { |
|
+ vfa(@_,4,8); |
|
+} |
|
+ |
|
+sub wfcsb { |
|
+ wfc(@_,2,0); |
|
+} |
|
+sub wfcxb { |
|
+ wfc(@_,4,0); |
|
+} |
|
+ |
|
+sub vfcesb { |
|
+ vfce(@_,2,0,0); |
|
+} |
|
+sub vfcesbs { |
|
+ vfce(@_,2,0,1); |
|
+} |
|
+sub wfcesb { |
|
+ vfce(@_,2,8,0); |
|
+} |
|
+sub wfcesbs { |
|
+ vfce(@_,2,8,1); |
|
+} |
|
+sub wfcexb { |
|
+ vfce(@_,4,8,0); |
|
+} |
|
+sub wfcexbs { |
|
+ vfce(@_,4,8,1); |
|
+} |
|
+ |
|
+sub vfchsb { |
|
+ vfch(@_,2,0,0); |
|
+} |
|
+sub vfchsbs { |
|
+ vfch(@_,2,0,1); |
|
+} |
|
+sub wfchsb { |
|
+ vfch(@_,2,8,0); |
|
+} |
|
+sub wfchsbs { |
|
+ vfch(@_,2,8,1); |
|
+} |
|
+sub wfchxb { |
|
+ vfch(@_,4,8,0); |
|
+} |
|
+sub wfchxbs { |
|
+ vfch(@_,4,8,1); |
|
+} |
|
+ |
|
+sub vfchesb { |
|
+ vfche(@_,2,0,0); |
|
+} |
|
+sub vfchesbs { |
|
+ vfche(@_,2,0,1); |
|
+} |
|
+sub wfchesb { |
|
+ vfche(@_,2,8,0); |
|
+} |
|
+sub wfchesbs { |
|
+ vfche(@_,2,8,1); |
|
+} |
|
+sub wfchexb { |
|
+ vfche(@_,4,8,0); |
|
+} |
|
+sub wfchexbs { |
|
+ vfche(@_,4,8,1); |
|
+} |
|
+ |
|
+sub vfdsb { |
|
+ vfd(@_,2,0); |
|
+} |
|
+sub wfdsb { |
|
+ vfd(@_,2,8); |
|
+} |
|
+sub wfdxb { |
|
+ vfd(@_,4,8); |
|
+} |
|
+ |
|
+sub vfisb { |
|
+ vfi(@_[0..1],2,@_[2..3]); |
|
+} |
|
+sub wfisb { |
|
+ vfi(@_[0..1],2,0x8|$_[2],$_[3]); |
|
+} |
|
+sub wfixb { |
|
+ vfi(@_[0..1],4,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vfll { |
|
+ vlde(@_); |
|
+} |
|
+sub vflls { |
|
+ vfll(@_,2,0); |
|
+} |
|
+sub wflls { |
|
+ vfll(@_,2,8); |
|
+} |
|
+sub wflld { |
|
+ vfll(@_,3,8); |
|
+} |
|
+ |
|
+sub vflr { |
|
+ vled(@_); |
|
+} |
|
+sub vflrd { |
|
+ vflr(@_[0..1],3,@_[2..3]); |
|
+} |
|
+sub wflrd { |
|
+ vflr(@_[0..1],3,0x8|$_[2],$_[3]); |
|
+} |
|
+sub wflrx { |
|
+ vflr(@_[0..1],4,0x8|$_[2],$_[3]); |
|
+} |
|
+ |
|
+sub vfmax { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRc(0xe7ef,@_); |
|
+} |
|
+sub vfmaxsb { |
|
+ vfmax(@_[0..2],2,0,$_[3]); |
|
+} |
|
+sub vfmaxdb { |
|
+ vfmax(@_[0..2],3,0,$_[3]); |
|
+} |
|
+sub wfmaxsb { |
|
+ vfmax(@_[0..2],2,8,$_[3]); |
|
+} |
|
+sub wfmaxdb { |
|
+ vfmax(@_[0..2],3,8,$_[3]); |
|
+} |
|
+sub wfmaxxb { |
|
+ vfmax(@_[0..2],4,8,$_[3]); |
|
+} |
|
+ |
|
+sub vfmin { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRc(0xe7ee,@_); |
|
+} |
|
+sub vfminsb { |
|
+ vfmin(@_[0..2],2,0,$_[5]); |
|
+} |
|
+sub vfmindb { |
|
+ vfmin(@_[0..2],3,0,$_[5]); |
|
+} |
|
+sub wfminsb { |
|
+ vfmin(@_[0..2],2,8,$_[5]); |
|
+} |
|
+sub wfmindb { |
|
+ vfmin(@_[0..2],3,8,$_[5]); |
|
+} |
|
+sub wfminxb { |
|
+ vfmin(@_[0..2],4,8,$_[5]); |
|
+} |
|
+ |
|
+sub vfmsb { |
|
+ vfm(@_,2,0); |
|
+} |
|
+sub wfmsb { |
|
+ vfm(@_,2,8); |
|
+} |
|
+sub wfmxb { |
|
+ vfm(@_,4,8); |
|
+} |
|
+ |
|
+sub vfmasb { |
|
+ vfma(@_,0,2); |
|
+} |
|
+sub wfmasb { |
|
+ vfma(@_,8,2); |
|
+} |
|
+sub wfmaxb { |
|
+ vfma(@_,8,4); |
|
+} |
|
+ |
|
+sub vfmssb { |
|
+ vfms(@_,0,2); |
|
+} |
|
+sub wfmssb { |
|
+ vfms(@_,8,2); |
|
+} |
|
+sub wfmsxb { |
|
+ vfms(@_,8,4); |
|
+} |
|
+ |
|
+sub vfnma { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRe(0xe79f,@_); |
|
+} |
|
+sub vfnmasb { |
|
+ vfnma(@_,0,2); |
|
+} |
|
+sub vfnmadb { |
|
+ vfnma(@_,0,3); |
|
+} |
|
+sub wfnmasb { |
|
+ vfnma(@_,8,2); |
|
+} |
|
+sub wfnmadb { |
|
+ vfnma(@_,8,3); |
|
+} |
|
+sub wfnmaxb { |
|
+ vfnma(@_,8,4); |
|
+} |
|
+ |
|
+sub vfnms { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ VRRe(0xe79e,@_); |
|
+} |
|
+sub vfnmssb { |
|
+ vfnms(@_,0,2); |
|
+} |
|
+sub vfnmsdb { |
|
+ vfnms(@_,0,3); |
|
+} |
|
+sub wfnmssb { |
|
+ vfnms(@_,8,2); |
|
+} |
|
+sub wfnmsdb { |
|
+ vfnms(@_,8,3); |
|
+} |
|
+sub wfnmsxb { |
|
+ vfnms(@_,8,4); |
|
+} |
|
+ |
|
+sub vfpsosb { |
|
+ vfpso(@_[0..1],2,0,$_[2]); |
|
+} |
|
+sub wfpsosb { |
|
+ vfpso(@_[0..1],2,8,$_[2]); |
|
+} |
|
+sub vflcsb { |
|
+ vfpso(@_,2,0,0); |
|
+} |
|
+sub wflcsb { |
|
+ vfpso(@_,2,8,0); |
|
+} |
|
+sub vflnsb { |
|
+ vfpso(@_,2,0,1); |
|
+} |
|
+sub wflnsb { |
|
+ vfpso(@_,2,8,1); |
|
+} |
|
+sub vflpsb { |
|
+ vfpso(@_,2,0,2); |
|
+} |
|
+sub wflpsb { |
|
+ vfpso(@_,2,8,2); |
|
+} |
|
+sub vfpsoxb { |
|
+ vfpso(@_[0..1],4,0,$_[2]); |
|
+} |
|
+sub wfpsoxb { |
|
+ vfpso(@_[0..1],4,8,$_[2]); |
|
+} |
|
+sub vflcxb { |
|
+ vfpso(@_,4,0,0); |
|
+} |
|
+sub wflcxb { |
|
+ vfpso(@_,4,8,0); |
|
+} |
|
+sub vflnxb { |
|
+ vfpso(@_,4,0,1); |
|
+} |
|
+sub wflnxb { |
|
+ vfpso(@_,4,8,1); |
|
+} |
|
+sub vflpxb { |
|
+ vfpso(@_,4,0,2); |
|
+} |
|
+sub wflpxb { |
|
+ vfpso(@_,4,8,2); |
|
+} |
|
+ |
|
+sub vfsqsb { |
|
+ vfsq(@_,2,0); |
|
+} |
|
+sub wfsqsb { |
|
+ vfsq(@_,2,8); |
|
+} |
|
+sub wfsqxb { |
|
+ vfsq(@_,4,8); |
|
+} |
|
+ |
|
+sub vfssb { |
|
+ vfs(@_,2,0); |
|
+} |
|
+sub wfssb { |
|
+ vfs(@_,2,8); |
|
+} |
|
+sub wfsxb { |
|
+ vfs(@_,4,8); |
|
+} |
|
+ |
|
+sub vftcisb { |
|
+ vftci(@_,2,0); |
|
+} |
|
+sub wftcisb { |
|
+ vftci(@_,2,8); |
|
+} |
|
+sub wftcixb { |
|
+ vftci(@_,4,8); |
|
+} |
|
+ |
|
+# VXD - Support Instructions |
|
+ |
|
+sub vlrlr { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRSd(0xe637,@_); |
|
+} |
|
+ |
|
+sub vlrl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VSI(0xe635,@_); |
|
+} |
|
+ |
|
+sub vstrlr { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRSd(0xe63f,@_); |
|
+} |
|
+ |
|
+sub vstrl { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VSI(0xe63d,@_); |
|
+} |
|
+ |
|
+sub vap { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe671,@_); |
|
+} |
|
+ |
|
+sub vcp { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRh(0xe677,@_); |
|
+} |
|
+ |
|
+sub vcvb { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRi(0xe650,@_); |
|
+} |
|
+ |
|
+sub vcvbg { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRRi(0xe652,@_); |
|
+} |
|
+ |
|
+sub vcvd { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRIi(0xe658,@_); |
|
+} |
|
+ |
|
+sub vcvdg { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ VRIi(0xe65a,@_); |
|
+} |
|
+ |
|
+sub vdp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe67a,@_); |
|
+} |
|
+ |
|
+sub vlip { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VRIh(0xe649,@_); |
|
+} |
|
+ |
|
+sub vmp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe678,@_); |
|
+} |
|
+ |
|
+sub vmsp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe679,@_); |
|
+} |
|
+ |
|
+sub vpkz { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VSI(0xe634,@_); |
|
+} |
|
+ |
|
+sub vpsop { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIg(0xe65b,@_); |
|
+} |
|
+ |
|
+sub vrp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe67b,@_); |
|
+} |
|
+ |
|
+sub vsdp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe67e,@_); |
|
+} |
|
+ |
|
+sub vsrp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIg(0xe659,@_); |
|
+} |
|
+ |
|
+sub vsp { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ VRIf(0xe673,@_); |
|
+} |
|
+ |
|
+sub vtp { |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ VRRg(0xe65f,@_); |
|
+} |
|
+ |
|
+sub vupkz { |
|
+ confess(err("ARGNUM")) if ($#_!=2); |
|
+ VSI(0xe63c,@_); |
|
+} |
|
+ |
|
+# |
|
+# Instruction Formats |
|
+# |
|
+ |
|
+sub RRE { |
|
+ confess(err("ARGNUM")) if ($#_<0||2<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$r1,$r2)=(shift,get_R(shift),get_R(shift)); |
|
+ |
|
+ $out.="\t.long\t".sprintf("%#010x",($opcode<<16|$r1<<4|$r2)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub RRFb { |
|
+ confess(err("ARGNUM")) if ($#_<3||4<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$r1,$r3,$r2,$m4)=(shift,get_R(shift),get_R(shift) |
|
+ ,get_R(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.long\t" |
|
+ .sprintf("%#010x",($opcode<<16|$r3<<12|$m4<<8|$r1<<4|$r2)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub S { |
|
+ confess(err("ARGNUM")) if ($#_<0||1<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$d2,$b2)=(shift,get_DB(shift)); |
|
+ |
|
+ $out.="\t.long\t".sprintf("%#010x",($opcode<<16|$b2<<12|$d2)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIa { |
|
+ confess(err("ARGNUM")) if ($#_<2||3<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$i2,$m3)=(shift,get_V(shift),get_I(shift,16), |
|
+ get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)).","; |
|
+ $out.=sprintf("%#06x",$i2).","; |
|
+ $out.=sprintf("%#06x",($m3<<12|RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIb { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$i2,$i3,$m4)=(shift,get_V(shift),get_I(shift,8), |
|
+ ,get_I(shift,8),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)).","; |
|
+ $out.=sprintf("%#06x",($i2<<8|$i3)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIc { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v3,$i2,$m4)=(shift,get_V(shift),get_V(shift), |
|
+ ,get_I(shift,16),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)|($v3&0xf)).","; |
|
+ $out.=sprintf("%#06x",$i2).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRId { |
|
+ confess(err("ARGNUM")) if ($#_<4||$#_>5); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$v3,$i4,$m5)=(shift,get_V(shift),get_V(shift), |
|
+ ,get_V(shift),get_I(shift,8),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)|($v2&0xf)).","; |
|
+ $out.=sprintf("%#06x",(($v3&0xf)<<12|$i4)).","; |
|
+ $out.=sprintf("%#06x",($m5<<12|RXB($v1,$v2,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIe { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$i3,$m4,$m5)=(shift,get_V(shift),get_V(shift), |
|
+ ,get_I(shift,12),get_M(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)|($v2&0xf)).","; |
|
+ $out.=sprintf("%#06x",($i3<<4|$m5)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1,$v2)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIf { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$v3,$i4,$m5)=(shift,get_V(shift),get_V(shift), |
|
+ ,get_V(shift),get_I(shift,8),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)|($v2&0xf)).","; |
|
+ $out.=sprintf("%#06x",(($v3&0xf)<<12|$m5<<4)|$i4>>4).","; |
|
+ $out.=sprintf("%#06x",(($i4&0xf)<<12|RXB($v1,$v2,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIg { |
|
+ confess(err("ARGNUM")) if ($#_!=5); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$i3,$i4,$m5)=(shift,get_V(shift),get_V(shift), |
|
+ ,get_I(shift,8),get_I(shift,8),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)|($v2&0xf)).","; |
|
+ $out.=sprintf("%#06x",($i4<<8|$m5<<4|$i3>>4)).","; |
|
+ $out.=sprintf("%#06x",(($i3&0xf)<<12|RXB($v1,$v2)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIh { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$i2,$i3)=(shift,get_V(shift),get_I(shift,16), |
|
+ get_I(shift,4)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)).","; |
|
+ $out.=sprintf("%#06x",$i2).","; |
|
+ $out.=sprintf("%#06x",($i3<<12|RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRIi { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$r2,$i3,$m4)=(shift,get_V(shift),get_R(shift), |
|
+ ,get_I(shift,8),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4)|$r2).","; |
|
+ $out.=sprintf("%#06x",($m4<<4|$i3>>4)).","; |
|
+ $out.=sprintf("%#06x",(($i3&0xf)<<12|RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRa { |
|
+ confess(err("ARGNUM")) if ($#_<2||5<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$m3,$m4,$m5)=(shift,get_V(shift),get_V(shift), |
|
+ get_M(shift),get_M(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",($m5<<4|$m4)).","; |
|
+ $out.=sprintf("%#06x",($m3<<12|RXB($v1,$v2)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRb { |
|
+ confess(err("ARGNUM")) if ($#_<3||5<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$v3,$m4,$m5)=(shift,get_V(shift),get_V(shift), |
|
+ get_V(shift),get_M(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",(($v3&0xf)<<12|$m5<<4)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1,$v2,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRc { |
|
+ confess(err("ARGNUM")) if ($#_<3||6<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$v3,$m4,$m5,$m6)=(shift,get_V(shift),get_V(shift), |
|
+ get_V(shift),get_M(shift),get_M(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",(($v3&0xf)<<12|$m6<<4|$m5)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1,$v2,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRd { |
|
+ confess(err("ARGNUM")) if ($#_<4||6<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$v3,$v4,$m5,$m6)=(shift,get_V(shift),get_V(shift), |
|
+ get_V(shift),get_V(shift),get_M(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",(($v3&0xf)<<12|$m5<<8|$m6<<4)).","; |
|
+ $out.=sprintf("%#06x",(($v4&0xf)<<12|RXB($v1,$v2,$v3,$v4)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRe { |
|
+ confess(err("ARGNUM")) if ($#_<4||6<$#_); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$v3,$v4,$m5,$m6)=(shift,get_V(shift),get_V(shift), |
|
+ get_V(shift),get_V(shift),get_M(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",(($v3&0xf)<<12|$m6<<8|$m5)).","; |
|
+ $out.=sprintf("%#06x",(($v4&0xf)<<12|RXB($v1,$v2,$v3,$v4)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRf { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$r2,$r3)=(shift,get_V(shift),get_R(shift), |
|
+ get_R(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|$r2)).","; |
|
+ $out.=sprintf("%#06x",($r3<<12)).","; |
|
+ $out.=sprintf("%#06x",(RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRg { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1)=(shift,get_V(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf))).","; |
|
+ $out.=sprintf("%#06x",0x0000).","; |
|
+ $out.=sprintf("%#06x",(RXB(0,$v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRh { |
|
+ confess(err("ARGNUM")) if ($#_<2||$#_>3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v2,$m3)=(shift,get_V(shift),get_V(shift), |
|
+ get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf))).","; |
|
+ $out.=sprintf("%#06x",(($v2&0xf)<<12|$m3<<4)).","; |
|
+ $out.=sprintf("%#06x",(RXB(0,$v1,$v2)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRRi { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$r1,$v2,$m3)=(shift,get_R(shift),get_V(shift), |
|
+ get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|$r1<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",($m3<<4))."\,"; |
|
+ $out.=sprintf("%#06x",(RXB(0,$v2)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRSa { |
|
+ confess(err("ARGNUM")) if ($#_<3||$#_>4); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$v3,$d2,$b2,$m4)=(shift,get_V(shift),get_V(shift), |
|
+ get_DB(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v3&0xf))).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRSb { |
|
+ confess(err("ARGNUM")) if ($#_<3||$#_>4); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$r3,$d2,$b2,$m4)=(shift,get_V(shift),get_R(shift), |
|
+ get_DB(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|$r3)).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRSc { |
|
+ confess(err("ARGNUM")) if ($#_!=4); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$r1,$v3,$d2,$b2,$m4)=(shift,get_R(shift),get_V(shift), |
|
+ get_DB(shift),get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|$r1<<4|($v3&0xf))).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",($m4<<12|RXB(0,$v3)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRSd { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$r3,$d2,$b2)=(shift,get_V(shift),get_R(shift), |
|
+ get_DB(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|$r3)).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",(($v1&0xf)<<12|RXB(0,0,0,$v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRV { |
|
+ confess(err("ARGNUM")) if ($#_<2||$#_>3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$d2,$v2,$b2,$m3)=(shift,get_V(shift),get_DVB(shift), |
|
+ get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($v2&0xf))).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",($m3<<12|RXB($v1,$v2)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VRX { |
|
+ confess(err("ARGNUM")) if ($#_<2||$#_>3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$d2,$x2,$b2,$m3)=(shift,get_V(shift),get_DXB(shift), |
|
+ get_M(shift)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|($v1&0xf)<<4|($x2))).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",($m3<<12|RXB($v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+sub VSI { |
|
+ confess(err("ARGNUM")) if ($#_!=3); |
|
+ my $ops=join(',',@_[1..$#_]); |
|
+ my $memn=(caller(1))[3]; |
|
+ $memn=~s/^.*:://; |
|
+ my ($opcode,$v1,$d2,$b2,$i3)=(shift,get_V(shift),get_DB(shift), |
|
+ get_I(shift,8)); |
|
+ |
|
+ $out.="\t.word\t"; |
|
+ $out.=sprintf("%#06x",($opcode&0xff00|$i3)).","; |
|
+ $out.=sprintf("%#06x",($b2<<12|$d2)).","; |
|
+ $out.=sprintf("%#06x",(($v1&0xf)<<12|RXB(0,0,0,$v1)<<8|$opcode&0xff)); |
|
+ $out.="\t# $memn\t$ops\n" |
|
+} |
|
+ |
|
+# |
|
+# Internal |
|
+# |
|
+ |
|
+sub get_R { |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my $r; |
|
+ |
|
+ for (shift) { |
|
+ if (!defined) { |
|
+ $r=0; |
|
+ } elsif (/^$GR$/) { |
|
+ $r=$1; |
|
+ } else { |
|
+ confess(err("PARSE")); |
|
+ } |
|
+ } |
|
+ confess(err("ARGRANGE")) if ($r&~0xf); |
|
+ |
|
+ return $r; |
|
+} |
|
+ |
|
+sub get_V { |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my $v; |
|
+ |
|
+ for (shift) { |
|
+ if (!defined) { |
|
+ $v=0; |
|
+ } elsif (/^$VR$/) { |
|
+ $v=$1; |
|
+ } else { |
|
+ confess(err("PARSE")); |
|
+ } |
|
+ } |
|
+ confess(err("ARGRANGE")) if ($v&~0x1f); |
|
+ |
|
+ return $v; |
|
+} |
|
+ |
|
+sub get_I { |
|
+ confess(err("ARGNUM")) if ($#_!=1); |
|
+ my ($i,$bits)=(shift,shift); |
|
+ |
|
+ $i=defined($i)?(eval($i)):(0); |
|
+ confess(err("PARSE")) if (!defined($i)); |
|
+ confess(err("ARGRANGE")) if (abs($i)&~(2**$bits-1)); |
|
+ |
|
+ return $i&(2**$bits-1); |
|
+} |
|
+ |
|
+sub get_M { |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my $m=shift; |
|
+ |
|
+ $m=defined($m)?(eval($m)):(0); |
|
+ confess(err("PARSE")) if (!defined($m)); |
|
+ confess(err("ARGRANGE")) if ($m&~0xf); |
|
+ |
|
+ return $m; |
|
+} |
|
+ |
|
+sub get_DB |
|
+{ |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my ($d,$b); |
|
+ |
|
+ for (shift) { |
|
+ if (!defined) { |
|
+ ($d,$b)=(0,0); |
|
+ } elsif (/^(.+)\($GR\)$/) { |
|
+ ($d,$b)=(eval($1),$2); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } elsif (/^(.+)$/) { |
|
+ ($d,$b)=(eval($1),0); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } else { |
|
+ confess(err("PARSE")); |
|
+ } |
|
+ } |
|
+ confess(err("ARGRANGE")) if ($d&~0xfff||$b&~0xf); |
|
+ |
|
+ return ($d,$b); |
|
+} |
|
+ |
|
+sub get_DVB |
|
+{ |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my ($d,$v,$b); |
|
+ |
|
+ for (shift) { |
|
+ if (!defined) { |
|
+ ($d,$v,$b)=(0,0,0); |
|
+ } elsif (/^(.+)\($VR,$GR\)$/) { |
|
+ ($d,$v,$b)=(eval($1),$2,$3); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } elsif (/^(.+)\($GR\)$/) { |
|
+ ($d,$v,$b)=(eval($1),0,$2); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } elsif (/^(.+)$/) { |
|
+ ($d,$v,$b)=(eval($1),0,0); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } else { |
|
+ confess(err("PARSE")); |
|
+ } |
|
+ } |
|
+ confess(err("ARGRANGE")) if ($d&~0xfff||$v&~0x1f||$b&~0xf); |
|
+ |
|
+ return ($d,$v,$b); |
|
+} |
|
+ |
|
+sub get_DXB |
|
+{ |
|
+ confess(err("ARGNUM")) if ($#_!=0); |
|
+ my ($d,$x,$b); |
|
+ |
|
+ for (shift) { |
|
+ if (!defined) { |
|
+ ($d,$x,$b)=(0,0,0); |
|
+ } elsif (/^(.+)\($GR,$GR\)$/) { |
|
+ ($d,$x,$b)=(eval($1),$2,$3); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } elsif (/^(.+)\($GR\)$/) { |
|
+ ($d,$x,$b)=(eval($1),0,$2); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } elsif (/^(.+)$/) { |
|
+ ($d,$x,$b)=(eval($1),0,0); |
|
+ confess(err("PARSE")) if (!defined($d)); |
|
+ } else { |
|
+ confess(err("PARSE")); |
|
+ } |
|
+ } |
|
+ confess(err("ARGRANGE")) if ($d&~0xfff||$x&~0xf||$b&~0xf); |
|
+ |
|
+ return ($d,$x,$b); |
|
+} |
|
+ |
|
+sub RXB |
|
+{ |
|
+ confess(err("ARGNUM")) if ($#_<0||3<$#_); |
|
+ my $rxb=0; |
|
+ |
|
+ $rxb|=0x08 if (defined($_[0])&&($_[0]&0x10)); |
|
+ $rxb|=0x04 if (defined($_[1])&&($_[1]&0x10)); |
|
+ $rxb|=0x02 if (defined($_[2])&&($_[2]&0x10)); |
|
+ $rxb|=0x01 if (defined($_[3])&&($_[3]&0x10)); |
|
+ |
|
+ return $rxb; |
|
+} |
|
+ |
|
+sub err { |
|
+ my %ERR = |
|
+ ( |
|
+ ARGNUM => 'Wrong number of arguments', |
|
+ ARGRANGE=> 'Argument out of range', |
|
+ PARSE => 'Parse error', |
|
+ ); |
|
+ confess($ERR{ARGNUM}) if ($#_!=0); |
|
+ |
|
+ return $ERR{$_[0]}; |
|
+} |
|
+ |
|
+1; |
|
diff -up openssl-1.1.1e/crypto/poly1305/asm/poly1305-s390x.pl.s390x-update openssl-1.1.1e/crypto/poly1305/asm/poly1305-s390x.pl |
|
--- openssl-1.1.1e/crypto/poly1305/asm/poly1305-s390x.pl.s390x-update 2020-03-19 16:20:22.041227359 +0100 |
|
+++ openssl-1.1.1e/crypto/poly1305/asm/poly1305-s390x.pl 2020-03-19 16:23:22.364098257 +0100 |
|
@@ -24,204 +24,961 @@ |
|
# |
|
# On side note, z13 enables vector base 2^26 implementation... |
|
|
|
-$flavour = shift; |
|
+# |
|
+# January 2019 |
|
+# |
|
+# Add vx code path (base 2^26). |
|
+# |
|
+# Copyright IBM Corp. 2019 |
|
+# Author: Patrick Steuer <patrick.steuer@de.ibm.com> |
|
+ |
|
+# |
|
+# January 2019 |
|
+# |
|
+# Add vector base 2^26 implementation. It's problematic to accurately |
|
+# measure performance, because reference system is hardly idle. But |
|
+# it's sub-cycle, i.e. less than 1 cycle per processed byte, and it's |
|
+# >=20% faster than IBM's submission on long inputs, and much faster on |
|
+# short ones, because calculation of key powers is postponed till we |
|
+# know that input is long enough to justify the additional overhead. |
|
+ |
|
+use strict; |
|
+use FindBin qw($Bin); |
|
+use lib "$Bin/../.."; |
|
+use perlasm::s390x qw(:DEFAULT :VX AUTOLOAD LABEL INCLUDE); |
|
+ |
|
+my $flavour = shift; |
|
|
|
+my ($z,$SIZE_T); |
|
if ($flavour =~ /3[12]/) { |
|
+ $z=0; # S/390 ABI |
|
$SIZE_T=4; |
|
- $g=""; |
|
} else { |
|
+ $z=1; # zSeries ABI |
|
$SIZE_T=8; |
|
- $g="g"; |
|
} |
|
|
|
+my $output; |
|
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} |
|
-open STDOUT,">$output"; |
|
|
|
-$sp="%r15"; |
|
+my $stdframe=16*$SIZE_T+4*8; |
|
+my $sp="%r15"; |
|
|
|
my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5)); |
|
|
|
-$code.=<<___; |
|
-.text |
|
+PERLASM_BEGIN($output); |
|
|
|
-.globl poly1305_init |
|
-.type poly1305_init,\@function |
|
-.align 16 |
|
-poly1305_init: |
|
- lghi %r0,0 |
|
- lghi %r1,-1 |
|
- stg %r0,0($ctx) # zero hash value |
|
- stg %r0,8($ctx) |
|
- stg %r0,16($ctx) |
|
- |
|
- cl${g}r $inp,%r0 |
|
- je .Lno_key |
|
- |
|
- lrvg %r4,0($inp) # load little-endian key |
|
- lrvg %r5,8($inp) |
|
- |
|
- nihl %r1,0xffc0 # 0xffffffc0ffffffff |
|
- srlg %r0,%r1,4 # 0x0ffffffc0fffffff |
|
- srlg %r1,%r1,4 |
|
- nill %r1,0xfffc # 0x0ffffffc0ffffffc |
|
- |
|
- ngr %r4,%r0 |
|
- ngr %r5,%r1 |
|
- |
|
- stg %r4,32($ctx) |
|
- stg %r5,40($ctx) |
|
- |
|
-.Lno_key: |
|
- lghi %r2,0 |
|
- br %r14 |
|
-.size poly1305_init,.-poly1305_init |
|
-___ |
|
+INCLUDE ("s390x_arch.h"); |
|
+TEXT (); |
|
+ |
|
+################ |
|
+# static void poly1305_init(void *ctx, const unsigned char key[16]) |
|
+{ |
|
+GLOBL ("poly1305_init"); |
|
+TYPE ("poly1305_init","\@function"); |
|
+ALIGN (16); |
|
+LABEL ("poly1305_init"); |
|
+ lghi ("%r0",0); |
|
+ lghi ("%r1",-1); |
|
+ stg ("%r0","0($ctx)"); # zero hash value |
|
+ stg ("%r0","8($ctx)"); |
|
+ stg ("%r0","16($ctx)"); |
|
+ st ("%r0","24($ctx)"); # clear is_base2_26 |
|
+ lgr ("%r5",$ctx); # reassign $ctx |
|
+ lghi ("%r2",0); |
|
+ |
|
+&{$z? \&clgr:\&clr} ($inp,"%r0"); |
|
+ je (".Lno_key"); |
|
+ |
|
+ lrvg ("%r2","0($inp)"); # load little-endian key |
|
+ lrvg ("%r3","8($inp)"); |
|
+ |
|
+ nihl ("%r1",0xffc0); # 0xffffffc0ffffffff |
|
+ srlg ("%r0","%r1",4); # 0x0ffffffc0fffffff |
|
+ srlg ("%r1","%r1",4); |
|
+ nill ("%r1",0xfffc); # 0x0ffffffc0ffffffc |
|
+ |
|
+ ngr ("%r2","%r0"); |
|
+ ngr ("%r3","%r1"); |
|
+ |
|
+ stmg ("%r2","%r3","32(%r5)"); |
|
+ |
|
+ larl ("%r1","OPENSSL_s390xcap_P"); |
|
+ lg ("%r0","16(%r1)"); |
|
+ srlg ("%r0","%r0",62); |
|
+ nill ("%r0",1); # extract vx bit |
|
+ lcgr ("%r0","%r0"); |
|
+ larl ("%r1",".Lpoly1305_blocks"); |
|
+ larl ("%r2",".Lpoly1305_blocks_vx"); |
|
+ larl ("%r3",".Lpoly1305_emit"); |
|
+&{$z? \&xgr:\&xr} ("%r2","%r1"); # select between scalar and vector |
|
+&{$z? \&ngr:\&nr} ("%r2","%r0"); |
|
+&{$z? \&xgr:\&xr} ("%r2","%r1"); |
|
+&{$z? \&stmg:\&stm} ("%r2","%r3","0(%r4)"); |
|
+ lghi ("%r2",1); |
|
+LABEL (".Lno_key"); |
|
+ br ("%r14"); |
|
+SIZE ("poly1305_init",".-poly1305_init"); |
|
+} |
|
+ |
|
+################ |
|
+# static void poly1305_blocks(void *ctx, const unsigned char *inp, |
|
+# size_t len, u32 padbit) |
|
{ |
|
my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14)); |
|
my ($r0,$r1,$s1) = map("%r$_",(0..2)); |
|
|
|
-$code.=<<___; |
|
-.globl poly1305_blocks |
|
-.type poly1305_blocks,\@function |
|
-.align 16 |
|
-poly1305_blocks: |
|
- srl${g} $len,4 # fixed-up in 64-bit build |
|
- lghi %r0,0 |
|
- cl${g}r $len,%r0 |
|
- je .Lno_data |
|
- |
|
- stm${g} %r6,%r14,`6*$SIZE_T`($sp) |
|
- |
|
- llgfr $padbit,$padbit # clear upper half, much needed with |
|
- # non-64-bit ABI |
|
- lg $r0,32($ctx) # load key |
|
- lg $r1,40($ctx) |
|
- |
|
- lg $h0,0($ctx) # load hash value |
|
- lg $h1,8($ctx) |
|
- lg $h2,16($ctx) |
|
- |
|
- st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx |
|
- srlg $s1,$r1,2 |
|
- algr $s1,$r1 # s1 = r1 + r1>>2 |
|
- j .Loop |
|
- |
|
-.align 16 |
|
-.Loop: |
|
- lrvg $d0lo,0($inp) # load little-endian input |
|
- lrvg $d1lo,8($inp) |
|
- la $inp,16($inp) |
|
- |
|
- algr $d0lo,$h0 # accumulate input |
|
- alcgr $d1lo,$h1 |
|
- |
|
- lgr $h0,$d0lo |
|
- mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo |
|
- lgr $h1,$d1lo |
|
- mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo |
|
- |
|
- mlgr $t0,$r1 # h0*r1 -> $t0:$h0 |
|
- mlgr $t1,$r0 # h1*r0 -> $t1:$h1 |
|
- alcgr $h2,$padbit |
|
- |
|
- algr $d0lo,$d1lo |
|
- lgr $d1lo,$h2 |
|
- alcgr $d0hi,$d1hi |
|
- lghi $d1hi,0 |
|
- |
|
- algr $h1,$h0 |
|
- alcgr $t1,$t0 |
|
- |
|
- msgr $d1lo,$s1 # h2*s1 |
|
- msgr $h2,$r0 # h2*r0 |
|
- |
|
- algr $h1,$d1lo |
|
- alcgr $t1,$d1hi # $d1hi is zero |
|
- |
|
- algr $h1,$d0hi |
|
- alcgr $h2,$t1 |
|
- |
|
- lghi $h0,-4 # final reduction step |
|
- ngr $h0,$h2 |
|
- srlg $t0,$h2,2 |
|
- algr $h0,$t0 |
|
- lghi $t1,3 |
|
- ngr $h2,$t1 |
|
- |
|
- algr $h0,$d0lo |
|
- alcgr $h1,$d1hi # $d1hi is still zero |
|
- alcgr $h2,$d1hi # $d1hi is still zero |
|
- |
|
- brct$g $len,.Loop |
|
- |
|
- l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx |
|
- |
|
- stg $h0,0($ctx) # store hash value |
|
- stg $h1,8($ctx) |
|
- stg $h2,16($ctx) |
|
- |
|
- lm${g} %r6,%r14,`6*$SIZE_T`($sp) |
|
-.Lno_data: |
|
- br %r14 |
|
-.size poly1305_blocks,.-poly1305_blocks |
|
-___ |
|
+GLOBL ("poly1305_blocks"); |
|
+TYPE ("poly1305_blocks","\@function"); |
|
+ALIGN (16); |
|
+LABEL ("poly1305_blocks"); |
|
+LABEL (".Lpoly1305_blocks"); |
|
+&{$z? \<gr:\<r} ("%r0",$len); |
|
+ jz (".Lno_data"); |
|
+ |
|
+&{$z? \&stmg:\&stm} ("%r6","%r14","6*$SIZE_T($sp)"); |
|
+ |
|
+ lg ($h0,"0($ctx)"); # load hash value |
|
+ lg ($h1,"8($ctx)"); |
|
+ lg ($h2,"16($ctx)"); |
|
+ |
|
+LABEL (".Lpoly1305_blocks_entry"); |
|
+if ($z) { |
|
+ srlg ($len,$len,4); |
|
+} else { |
|
+ srl ($len,4); |
|
+} |
|
+ llgfr ($padbit,$padbit); # clear upper half, much needed with |
|
+ # non-64-bit ABI |
|
+ lg ($r0,"32($ctx)"); # load key |
|
+ lg ($r1,"40($ctx)"); |
|
+ |
|
+&{$z? \&stg:\&st} ($ctx,"2*$SIZE_T($sp)"); # off-load $ctx |
|
+ srlg ($s1,$r1,2); |
|
+ algr ($s1,$r1); # s1 = r1 + r1>>2 |
|
+ j (".Loop"); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Loop"); |
|
+ lrvg ($d0lo,"0($inp)"); # load little-endian input |
|
+ lrvg ($d1lo,"8($inp)"); |
|
+ la ($inp,"16($inp)"); |
|
+ |
|
+ algr ($d0lo,$h0); # accumulate input |
|
+ alcgr ($d1lo,$h1); |
|
+ alcgr ($h2,$padbit); |
|
+ |
|
+ lgr ($h0,$d0lo); |
|
+ mlgr ($d0hi,$r0); # h0*r0 -> $d0hi:$d0lo |
|
+ lgr ($h1,$d1lo); |
|
+ mlgr ($d1hi,$s1); # h1*5*r1 -> $d1hi:$d1lo |
|
+ |
|
+ mlgr ($t0,$r1); # h0*r1 -> $t0:$h0 |
|
+ mlgr ($t1,$r0); # h1*r0 -> $t1:$h1 |
|
+ |
|
+ algr ($d0lo,$d1lo); |
|
+ lgr ($d1lo,$h2); |
|
+ alcgr ($d0hi,$d1hi); |
|
+ lghi ($d1hi,0); |
|
+ |
|
+ algr ($h1,$h0); |
|
+ alcgr ($t1,$t0); |
|
+ |
|
+ msgr ($d1lo,$s1); # h2*s1 |
|
+ msgr ($h2,$r0); # h2*r0 |
|
+ |
|
+ algr ($h1,$d1lo); |
|
+ alcgr ($t1,$d1hi); # $d1hi is zero |
|
+ |
|
+ algr ($h1,$d0hi); |
|
+ alcgr ($h2,$t1); |
|
+ |
|
+ lghi ($h0,-4); # final reduction step |
|
+ ngr ($h0,$h2); |
|
+ srlg ($t0,$h2,2); |
|
+ algr ($h0,$t0); |
|
+ lghi ($t1,3); |
|
+ ngr ($h2,$t1); |
|
+ |
|
+ algr ($h0,$d0lo); |
|
+ alcgr ($h1,$d1hi); # $d1hi is still zero |
|
+ alcgr ($h2,$d1hi); # $d1hi is still zero |
|
+ |
|
+&{$z? \&brctg:\&brct} ($len,".Loop"); |
|
+ |
|
+&{$z? \&lg:\&l} ($ctx,"2*$SIZE_T($sp)");# restore $ctx |
|
+ |
|
+ stg ($h0,"0($ctx)"); # store hash value |
|
+ stg ($h1,"8($ctx)"); |
|
+ stg ($h2,"16($ctx)"); |
|
+ |
|
+&{$z? \&lmg:\&lm} ("%r6","%r14","6*$SIZE_T($sp)"); |
|
+LABEL (".Lno_data"); |
|
+ br ("%r14"); |
|
+SIZE ("poly1305_blocks",".-poly1305_blocks"); |
|
} |
|
+ |
|
+################ |
|
+# static void poly1305_blocks_vx(void *ctx, const unsigned char *inp, |
|
+# size_t len, u32 padbit) |
|
+{ |
|
+my ($H0, $H1, $H2, $H3, $H4) = map("%v$_",(0..4)); |
|
+my ($I0, $I1, $I2, $I3, $I4) = map("%v$_",(5..9)); |
|
+my ($R0, $R1, $S1, $R2, $S2) = map("%v$_",(10..14)); |
|
+my ($R3, $S3, $R4, $S4) = map("%v$_",(15..18)); |
|
+my ($ACC0, $ACC1, $ACC2, $ACC3, $ACC4) = map("%v$_",(19..23)); |
|
+my ($T1, $T2, $T3, $T4) = map("%v$_",(24..27)); |
|
+my ($mask26,$bswaplo,$bswaphi,$bswapmi) = map("%v$_",(28..31)); |
|
+ |
|
+my ($d2,$d0,$h0,$d1,$h1,$h2)=map("%r$_",(9..14)); |
|
+ |
|
+TYPE ("poly1305_blocks_vx","\@function"); |
|
+ALIGN (16); |
|
+LABEL ("poly1305_blocks_vx"); |
|
+LABEL (".Lpoly1305_blocks_vx"); |
|
+&{$z? \&clgfi:\&clfi} ($len,128); |
|
+ jhe ("__poly1305_blocks_vx"); |
|
+ |
|
+&{$z? \&stmg:\&stm} ("%r6","%r14","6*$SIZE_T($sp)"); |
|
+ |
|
+ lg ($d0,"0($ctx)"); |
|
+ lg ($d1,"8($ctx)"); |
|
+ lg ($d2,"16($ctx)"); |
|
+ |
|
+ llgfr ("%r0",$d0); # base 2^26 -> base 2^64 |
|
+ srlg ($h0,$d0,32); |
|
+ llgfr ("%r1",$d1); |
|
+ srlg ($h1,$d1,32); |
|
+ srlg ($h2,$d2,32); |
|
+ |
|
+ sllg ("%r0","%r0",26); |
|
+ algr ($h0,"%r0"); |
|
+ sllg ("%r0",$h1,52); |
|
+ srlg ($h1,$h1,12); |
|
+ sllg ("%r1","%r1",14); |
|
+ algr ($h0,"%r0"); |
|
+ alcgr ($h1,"%r1"); |
|
+ sllg ("%r0",$h2,40); |
|
+ srlg ($h2,$h2,24); |
|
+ lghi ("%r1",0); |
|
+ algr ($h1,"%r0"); |
|
+ alcgr ($h2,"%r1"); |
|
+ |
|
+ llgf ("%r0","24($ctx)"); # is_base2_26 |
|
+ lcgr ("%r0","%r0"); |
|
+ |
|
+ xgr ($h0,$d0); # choose between radixes |
|
+ xgr ($h1,$d1); |
|
+ xgr ($h2,$d2); |
|
+ ngr ($h0,"%r0"); |
|
+ ngr ($h1,"%r0"); |
|
+ ngr ($h2,"%r0"); |
|
+ xgr ($h0,$d0); |
|
+ xgr ($h1,$d1); |
|
+ xgr ($h2,$d2); |
|
+ |
|
+ lhi ("%r0",0); |
|
+ st ("%r0","24($ctx)"); # clear is_base2_26 |
|
+ |
|
+ j (".Lpoly1305_blocks_entry"); |
|
+SIZE ("poly1305_blocks_vx",".-poly1305_blocks_vx"); |
|
+ |
|
+TYPE ("__poly1305_mul","\@function"); |
|
+ALIGN (16); |
|
+LABEL ("__poly1305_mul"); |
|
+ vmlof ($ACC0,$H0,$R0); |
|
+ vmlof ($ACC1,$H0,$R1); |
|
+ vmlof ($ACC2,$H0,$R2); |
|
+ vmlof ($ACC3,$H0,$R3); |
|
+ vmlof ($ACC4,$H0,$R4); |
|
+ |
|
+ vmalof ($ACC0,$H1,$S4,$ACC0); |
|
+ vmalof ($ACC1,$H1,$R0,$ACC1); |
|
+ vmalof ($ACC2,$H1,$R1,$ACC2); |
|
+ vmalof ($ACC3,$H1,$R2,$ACC3); |
|
+ vmalof ($ACC4,$H1,$R3,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H2,$S3,$ACC0); |
|
+ vmalof ($ACC1,$H2,$S4,$ACC1); |
|
+ vmalof ($ACC2,$H2,$R0,$ACC2); |
|
+ vmalof ($ACC3,$H2,$R1,$ACC3); |
|
+ vmalof ($ACC4,$H2,$R2,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H3,$S2,$ACC0); |
|
+ vmalof ($ACC1,$H3,$S3,$ACC1); |
|
+ vmalof ($ACC2,$H3,$S4,$ACC2); |
|
+ vmalof ($ACC3,$H3,$R0,$ACC3); |
|
+ vmalof ($ACC4,$H3,$R1,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H4,$S1,$ACC0); |
|
+ vmalof ($ACC1,$H4,$S2,$ACC1); |
|
+ vmalof ($ACC2,$H4,$S3,$ACC2); |
|
+ vmalof ($ACC3,$H4,$S4,$ACC3); |
|
+ vmalof ($ACC4,$H4,$R0,$ACC4); |
|
+ |
|
+ ################################################################ |
|
+ # lazy reduction |
|
+ |
|
+ vesrlg ($H4,$ACC3,26); |
|
+ vesrlg ($H1,$ACC0,26); |
|
+ vn ($H3,$ACC3,$mask26); |
|
+ vn ($H0,$ACC0,$mask26); |
|
+ vag ($H4,$H4,$ACC4); # h3 -> h4 |
|
+ vag ($H1,$H1,$ACC1); # h0 -> h1 |
|
+ |
|
+ vesrlg ($ACC4,$H4,26); |
|
+ vesrlg ($ACC1,$H1,26); |
|
+ vn ($H4,$H4,$mask26); |
|
+ vn ($H1,$H1,$mask26); |
|
+ vag ($H0,$H0,$ACC4); |
|
+ vag ($H2,$ACC2,$ACC1); # h1 -> h2 |
|
+ |
|
+ veslg ($ACC4,$ACC4,2); # <<2 |
|
+ vesrlg ($ACC2,$H2,26); |
|
+ vn ($H2,$H2,$mask26); |
|
+ vag ($H0,$H0,$ACC4); # h4 -> h0 |
|
+ vag ($H3,$H3,$ACC2); # h2 -> h3 |
|
+ |
|
+ vesrlg ($ACC0,$H0,26); |
|
+ vesrlg ($ACC3,$H3,26); |
|
+ vn ($H0,$H0,$mask26); |
|
+ vn ($H3,$H3,$mask26); |
|
+ vag ($H1,$H1,$ACC0); # h0 -> h1 |
|
+ vag ($H4,$H4,$ACC3); # h3 -> h4 |
|
+ br ("%r14"); |
|
+SIZE ("__poly1305_mul",".-__poly1305_mul"); |
|
+ |
|
+TYPE ("__poly1305_blocks_vx","\@function"); |
|
+ALIGN (16); |
|
+LABEL ("__poly1305_blocks_vx"); |
|
+&{$z? \&lgr:\&lr} ("%r0",$sp); |
|
+&{$z? \&stmg:\&stm} ("%r10","%r15","10*$SIZE_T($sp)"); |
|
+if (!$z) { |
|
+ std ("%f4","16*$SIZE_T+2*8($sp)"); |
|
+ std ("%f6","16*$SIZE_T+3*8($sp)"); |
|
+ ahi ($sp,-$stdframe); |
|
+ st ("%r0","0($sp)"); # back-chain |
|
+ |
|
+ llgfr ($len,$len); # so that srlg works on $len |
|
+} else { |
|
+ aghi ($sp,"-($stdframe+8*8)"); |
|
+ stg ("%r0","0($sp)"); # back-chain |
|
+ |
|
+ std ("%f8","$stdframe+0*8($sp)"); |
|
+ std ("%f9","$stdframe+1*8($sp)"); |
|
+ std ("%f10","$stdframe+2*8($sp)"); |
|
+ std ("%f11","$stdframe+3*8($sp)"); |
|
+ std ("%f12","$stdframe+4*8($sp)"); |
|
+ std ("%f13","$stdframe+5*8($sp)"); |
|
+ std ("%f14","$stdframe+6*8($sp)"); |
|
+ std ("%f15","$stdframe+7*8($sp)"); |
|
+} |
|
+ larl ("%r1",".Lconst"); |
|
+ vgmg ($mask26,38,63); |
|
+ vlm ($bswaplo,$bswapmi,"16(%r1)"); |
|
+ |
|
+ < ("%r0","24($ctx)"); # is_base2_26? |
|
+ jnz (".Lskip_init"); |
|
+ |
|
+ lg ($h0,"32($ctx)"); # load key base 2^64 |
|
+ lg ($h1,"40($ctx)"); |
|
+ |
|
+ risbg ($d0,$h0,38,0x80+63,38); # base 2^64 -> 2^26 |
|
+ srlg ($d1,$h0,52); |
|
+ risbg ($h0,$h0,38,0x80+63,0); |
|
+ vlvgg ($R0,$h0,0); |
|
+ risbg ($d1,$h1,38,51,12); |
|
+ vlvgg ($R1,$d0,0); |
|
+ risbg ($d0,$h1,38,63,50); |
|
+ vlvgg ($R2,$d1,0); |
|
+ srlg ($d1,$h1,40); |
|
+ vlvgg ($R3,$d0,0); |
|
+ vlvgg ($R4,$d1,0); |
|
+ |
|
+ veslg ($S1,$R1,2); |
|
+ veslg ($S2,$R2,2); |
|
+ veslg ($S3,$R3,2); |
|
+ veslg ($S4,$R4,2); |
|
+ vlr ($H0,$R0); |
|
+ vlr ($H1,$R1); |
|
+ vlr ($H2,$R2); |
|
+ vlr ($H3,$R3); |
|
+ vlr ($H4,$R4); |
|
+ vag ($S1,$S1,$R1); # * 5 |
|
+ vag ($S2,$S2,$R2); |
|
+ vag ($S3,$S3,$R3); |
|
+ vag ($S4,$S4,$R4); |
|
+ |
|
+ brasl ("%r14","__poly1305_mul"); # r^1:- * r^1:- |
|
+ |
|
+ vpdi ($R0,$H0,$R0,0); # r^2:r^1 |
|
+ vpdi ($R1,$H1,$R1,0); |
|
+ vpdi ($R2,$H2,$R2,0); |
|
+ vpdi ($R3,$H3,$R3,0); |
|
+ vpdi ($R4,$H4,$R4,0); |
|
+ vpdi ($H0,$H0,$H0,0); # r^2:r^2 |
|
+ vpdi ($H1,$H1,$H1,0); |
|
+ vpdi ($H2,$H2,$H2,0); |
|
+ vpdi ($H3,$H3,$H3,0); |
|
+ vpdi ($H4,$H4,$H4,0); |
|
+ veslg ($S1,$R1,2); |
|
+ veslg ($S2,$R2,2); |
|
+ veslg ($S3,$R3,2); |
|
+ veslg ($S4,$R4,2); |
|
+ vag ($S1,$S1,$R1); # * 5 |
|
+ vag ($S2,$S2,$R2); |
|
+ vag ($S3,$S3,$R3); |
|
+ vag ($S4,$S4,$R4); |
|
+ |
|
+ brasl ("%r14,__poly1305_mul"); # r^2:r^2 * r^2:r^1 |
|
+ |
|
+ vl ($I0,"0(%r1)"); # borrow $I0 |
|
+ vperm ($R0,$R0,$H0,$I0); # r^2:r^4:r^1:r^3 |
|
+ vperm ($R1,$R1,$H1,$I0); |
|
+ vperm ($R2,$R2,$H2,$I0); |
|
+ vperm ($R3,$R3,$H3,$I0); |
|
+ vperm ($R4,$R4,$H4,$I0); |
|
+ veslf ($S1,$R1,2); |
|
+ veslf ($S2,$R2,2); |
|
+ veslf ($S3,$R3,2); |
|
+ veslf ($S4,$R4,2); |
|
+ vaf ($S1,$S1,$R1); # * 5 |
|
+ vaf ($S2,$S2,$R2); |
|
+ vaf ($S3,$S3,$R3); |
|
+ vaf ($S4,$S4,$R4); |
|
+ |
|
+ lg ($h0,"0($ctx)"); # load hash base 2^64 |
|
+ lg ($h1,"8($ctx)"); |
|
+ lg ($h2,"16($ctx)"); |
|
+ |
|
+ vzero ($H0); |
|
+ vzero ($H1); |
|
+ vzero ($H2); |
|
+ vzero ($H3); |
|
+ vzero ($H4); |
|
+ |
|
+ risbg ($d0,$h0,38,0x80+63,38); # base 2^64 -> 2^26 |
|
+ srlg ($d1,$h0,52); |
|
+ risbg ($h0,$h0,38,0x80+63,0); |
|
+ vlvgg ($H0,$h0,0); |
|
+ risbg ($d1,$h1,38,51,12); |
|
+ vlvgg ($H1,$d0,0); |
|
+ risbg ($d0,$h1,38,63,50); |
|
+ vlvgg ($H2,$d1,0); |
|
+ srlg ($d1,$h1,40); |
|
+ vlvgg ($H3,$d0,0); |
|
+ risbg ($d1,$h2,37,39,24); |
|
+ vlvgg ($H4,$d1,0); |
|
+ |
|
+ lhi ("%r0",1); |
|
+ st ("%r0","24($ctx)"); # set is_base2_26 |
|
+ |
|
+ vstm ($R0,$S4,"48($ctx)"); # save key schedule base 2^26 |
|
+ |
|
+ vpdi ($R0,$R0,$R0,0); # broadcast r^2:r^4 |
|
+ vpdi ($R1,$R1,$R1,0); |
|
+ vpdi ($S1,$S1,$S1,0); |
|
+ vpdi ($R2,$R2,$R2,0); |
|
+ vpdi ($S2,$S2,$S2,0); |
|
+ vpdi ($R3,$R3,$R3,0); |
|
+ vpdi ($S3,$S3,$S3,0); |
|
+ vpdi ($R4,$R4,$R4,0); |
|
+ vpdi ($S4,$S4,$S4,0); |
|
+ |
|
+ j (".Loaded_hash"); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Lskip_init"); |
|
+ vllezf ($H0,"0($ctx)"); # load hash base 2^26 |
|
+ vllezf ($H1,"4($ctx)"); |
|
+ vllezf ($H2,"8($ctx)"); |
|
+ vllezf ($H3,"12($ctx)"); |
|
+ vllezf ($H4,"16($ctx)"); |
|
+ |
|
+ vlrepg ($R0,"0x30($ctx)"); # broadcast r^2:r^4 |
|
+ vlrepg ($R1,"0x40($ctx)"); |
|
+ vlrepg ($S1,"0x50($ctx)"); |
|
+ vlrepg ($R2,"0x60($ctx)"); |
|
+ vlrepg ($S2,"0x70($ctx)"); |
|
+ vlrepg ($R3,"0x80($ctx)"); |
|
+ vlrepg ($S3,"0x90($ctx)"); |
|
+ vlrepg ($R4,"0xa0($ctx)"); |
|
+ vlrepg ($S4,"0xb0($ctx)"); |
|
+ |
|
+LABEL (".Loaded_hash"); |
|
+ vzero ($I1); |
|
+ vzero ($I3); |
|
+ |
|
+ vlm ($T1,$T4,"0x00($inp)"); # load first input block |
|
+ la ($inp,"0x40($inp)"); |
|
+ vgmg ($mask26,6,31); |
|
+ vgmf ($I4,5,5); # padbit<<2 |
|
+ |
|
+ vperm ($I0,$T3,$T4,$bswaplo); |
|
+ vperm ($I2,$T3,$T4,$bswapmi); |
|
+ vperm ($T3,$T3,$T4,$bswaphi); |
|
+ |
|
+ verimg ($I1,$I0,$mask26,6); # >>26 |
|
+ veslg ($I0,$I0,32); |
|
+ veslg ($I2,$I2,28); # >>4 |
|
+ verimg ($I3,$T3,$mask26,18); # >>14 |
|
+ verimg ($I4,$T3,$mask26,58); # >>38 |
|
+ vn ($I0,$I0,$mask26); |
|
+ vn ($I2,$I2,$mask26); |
|
+ vesrlf ($I4,$I4,2); # >>2 |
|
+ |
|
+ vgmg ($mask26,38,63); |
|
+ vperm ($T3,$T1,$T2,$bswaplo); |
|
+ vperm ($T4,$T1,$T2,$bswaphi); |
|
+ vperm ($T2,$T1,$T2,$bswapmi); |
|
+ |
|
+ verimg ($I0,$T3,$mask26,0); |
|
+ verimg ($I1,$T3,$mask26,38); # >>26 |
|
+ verimg ($I2,$T2,$mask26,60); # >>4 |
|
+ verimg ($I3,$T4,$mask26,50); # >>14 |
|
+ vesrlg ($T4,$T4,40); |
|
+ vo ($I4,$I4,$T4); |
|
+ |
|
+ srlg ("%r0",$len,6); |
|
+&{$z? \&aghi:\&ahi} ("%r0",-1); |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Loop_vx"); |
|
+ vmlef ($ACC0,$I0,$R0); |
|
+ vmlef ($ACC1,$I0,$R1); |
|
+ vmlef ($ACC2,$I0,$R2); |
|
+ vmlef ($ACC3,$I0,$R3); |
|
+ vmlef ($ACC4,$I0,$R4); |
|
+ |
|
+ vmalef ($ACC0,$I1,$S4,$ACC0); |
|
+ vmalef ($ACC1,$I1,$R0,$ACC1); |
|
+ vmalef ($ACC2,$I1,$R1,$ACC2); |
|
+ vmalef ($ACC3,$I1,$R2,$ACC3); |
|
+ vmalef ($ACC4,$I1,$R3,$ACC4); |
|
+ |
|
+ vaf ($H2,$H2,$I2); |
|
+ vaf ($H0,$H0,$I0); |
|
+ vaf ($H3,$H3,$I3); |
|
+ vaf ($H1,$H1,$I1); |
|
+ vaf ($H4,$H4,$I4); |
|
+ |
|
+ vmalef ($ACC0,$I2,$S3,$ACC0); |
|
+ vmalef ($ACC1,$I2,$S4,$ACC1); |
|
+ vmalef ($ACC2,$I2,$R0,$ACC2); |
|
+ vmalef ($ACC3,$I2,$R1,$ACC3); |
|
+ vmalef ($ACC4,$I2,$R2,$ACC4); |
|
+ |
|
+ vlm ($T1,$T4,"0x00($inp)"); # load next input block |
|
+ la ($inp,"0x40($inp)"); |
|
+ vgmg ($mask26,6,31); |
|
+ |
|
+ vmalef ($ACC0,$I3,$S2,$ACC0); |
|
+ vmalef ($ACC1,$I3,$S3,$ACC1); |
|
+ vmalef ($ACC2,$I3,$S4,$ACC2); |
|
+ vmalef ($ACC3,$I3,$R0,$ACC3); |
|
+ vmalef ($ACC4,$I3,$R1,$ACC4); |
|
+ |
|
+ vperm ($I0,$T3,$T4,$bswaplo); |
|
+ vperm ($I2,$T3,$T4,$bswapmi); |
|
+ vperm ($T3,$T3,$T4,$bswaphi); |
|
+ |
|
+ vmalef ($ACC0,$I4,$S1,$ACC0); |
|
+ vmalef ($ACC1,$I4,$S2,$ACC1); |
|
+ vmalef ($ACC2,$I4,$S3,$ACC2); |
|
+ vmalef ($ACC3,$I4,$S4,$ACC3); |
|
+ vmalef ($ACC4,$I4,$R0,$ACC4); |
|
+ |
|
+ verimg ($I1,$I0,$mask26,6); # >>26 |
|
+ veslg ($I0,$I0,32); |
|
+ veslg ($I2,$I2,28); # >>4 |
|
+ verimg ($I3,$T3,$mask26,18); # >>14 |
|
+ |
|
+ vmalof ($ACC0,$H0,$R0,$ACC0); |
|
+ vmalof ($ACC1,$H0,$R1,$ACC1); |
|
+ vmalof ($ACC2,$H0,$R2,$ACC2); |
|
+ vmalof ($ACC3,$H0,$R3,$ACC3); |
|
+ vmalof ($ACC4,$H0,$R4,$ACC4); |
|
+ |
|
+ vgmf ($I4,5,5); # padbit<<2 |
|
+ verimg ($I4,$T3,$mask26,58); # >>38 |
|
+ vn ($I0,$I0,$mask26); |
|
+ vn ($I2,$I2,$mask26); |
|
+ vesrlf ($I4,$I4,2); # >>2 |
|
+ |
|
+ vmalof ($ACC0,$H1,$S4,$ACC0); |
|
+ vmalof ($ACC1,$H1,$R0,$ACC1); |
|
+ vmalof ($ACC2,$H1,$R1,$ACC2); |
|
+ vmalof ($ACC3,$H1,$R2,$ACC3); |
|
+ vmalof ($ACC4,$H1,$R3,$ACC4); |
|
+ |
|
+ vgmg ($mask26,38,63); |
|
+ vperm ($T3,$T1,$T2,$bswaplo); |
|
+ vperm ($T4,$T1,$T2,$bswaphi); |
|
+ vperm ($T2,$T1,$T2,$bswapmi); |
|
+ |
|
+ vmalof ($ACC0,$H2,$S3,$ACC0); |
|
+ vmalof ($ACC1,$H2,$S4,$ACC1); |
|
+ vmalof ($ACC2,$H2,$R0,$ACC2); |
|
+ vmalof ($ACC3,$H2,$R1,$ACC3); |
|
+ vmalof ($ACC4,$H2,$R2,$ACC4); |
|
+ |
|
+ verimg ($I0,$T3,$mask26,0); |
|
+ verimg ($I1,$T3,$mask26,38); # >>26 |
|
+ verimg ($I2,$T2,$mask26,60); # >>4 |
|
+ |
|
+ vmalof ($ACC0,$H3,$S2,$ACC0); |
|
+ vmalof ($ACC1,$H3,$S3,$ACC1); |
|
+ vmalof ($ACC2,$H3,$S4,$ACC2); |
|
+ vmalof ($ACC3,$H3,$R0,$ACC3); |
|
+ vmalof ($ACC4,$H3,$R1,$ACC4); |
|
+ |
|
+ verimg ($I3,$T4,$mask26,50); # >>14 |
|
+ vesrlg ($T4,$T4,40); |
|
+ vo ($I4,$I4,$T4); |
|
+ |
|
+ vmalof ($ACC0,$H4,$S1,$ACC0); |
|
+ vmalof ($ACC1,$H4,$S2,$ACC1); |
|
+ vmalof ($ACC2,$H4,$S3,$ACC2); |
|
+ vmalof ($ACC3,$H4,$S4,$ACC3); |
|
+ vmalof ($ACC4,$H4,$R0,$ACC4); |
|
+ |
|
+ ################################################################ |
|
+ # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein |
|
+ # and P. Schwabe |
|
+ |
|
+ vesrlg ($H4,$ACC3,26); |
|
+ vesrlg ($H1,$ACC0,26); |
|
+ vn ($H3,$ACC3,$mask26); |
|
+ vn ($H0,$ACC0,$mask26); |
|
+ vag ($H4,$H4,$ACC4); # h3 -> h4 |
|
+ vag ($H1,$H1,$ACC1); # h0 -> h1 |
|
+ |
|
+ vesrlg ($ACC4,$H4,26); |
|
+ vesrlg ($ACC1,$H1,26); |
|
+ vn ($H4,$H4,$mask26); |
|
+ vn ($H1,$H1,$mask26); |
|
+ vag ($H0,$H0,$ACC4); |
|
+ vag ($H2,$ACC2,$ACC1); # h1 -> h2 |
|
+ |
|
+ veslg ($ACC4,$ACC4,2); # <<2 |
|
+ vesrlg ($ACC2,$H2,26); |
|
+ vn ($H2,$H2,$mask26); |
|
+ vag ($H0,$H0,$ACC4); # h4 -> h0 |
|
+ vag ($H3,$H3,$ACC2); # h2 -> h3 |
|
+ |
|
+ vesrlg ($ACC0,$H0,26); |
|
+ vesrlg ($ACC3,$H3,26); |
|
+ vn ($H0,$H0,$mask26); |
|
+ vn ($H3,$H3,$mask26); |
|
+ vag ($H1,$H1,$ACC0); # h0 -> h1 |
|
+ vag ($H4,$H4,$ACC3); # h3 -> h4 |
|
+ |
|
+&{$z? \&brctg:\&brct} ("%r0",".Loop_vx"); |
|
+ |
|
+ vlm ($R0,$S4,"48($ctx)"); # load all powers |
|
+ |
|
+ lghi ("%r0",0x30); |
|
+&{$z? \&lcgr:\&lcr} ($len,$len); |
|
+&{$z? \&ngr:\&nr} ($len,"%r0"); |
|
+&{$z? \&slgr:\&slr} ($inp,$len); |
|
+ |
|
+LABEL (".Last"); |
|
+ vmlef ($ACC0,$I0,$R0); |
|
+ vmlef ($ACC1,$I0,$R1); |
|
+ vmlef ($ACC2,$I0,$R2); |
|
+ vmlef ($ACC3,$I0,$R3); |
|
+ vmlef ($ACC4,$I0,$R4); |
|
+ |
|
+ vmalef ($ACC0,$I1,$S4,$ACC0); |
|
+ vmalef ($ACC1,$I1,$R0,$ACC1); |
|
+ vmalef ($ACC2,$I1,$R1,$ACC2); |
|
+ vmalef ($ACC3,$I1,$R2,$ACC3); |
|
+ vmalef ($ACC4,$I1,$R3,$ACC4); |
|
+ |
|
+ vaf ($H0,$H0,$I0); |
|
+ vaf ($H1,$H1,$I1); |
|
+ vaf ($H2,$H2,$I2); |
|
+ vaf ($H3,$H3,$I3); |
|
+ vaf ($H4,$H4,$I4); |
|
+ |
|
+ vmalef ($ACC0,$I2,$S3,$ACC0); |
|
+ vmalef ($ACC1,$I2,$S4,$ACC1); |
|
+ vmalef ($ACC2,$I2,$R0,$ACC2); |
|
+ vmalef ($ACC3,$I2,$R1,$ACC3); |
|
+ vmalef ($ACC4,$I2,$R2,$ACC4); |
|
+ |
|
+ vmalef ($ACC0,$I3,$S2,$ACC0); |
|
+ vmalef ($ACC1,$I3,$S3,$ACC1); |
|
+ vmalef ($ACC2,$I3,$S4,$ACC2); |
|
+ vmalef ($ACC3,$I3,$R0,$ACC3); |
|
+ vmalef ($ACC4,$I3,$R1,$ACC4); |
|
+ |
|
+ vmalef ($ACC0,$I4,$S1,$ACC0); |
|
+ vmalef ($ACC1,$I4,$S2,$ACC1); |
|
+ vmalef ($ACC2,$I4,$S3,$ACC2); |
|
+ vmalef ($ACC3,$I4,$S4,$ACC3); |
|
+ vmalef ($ACC4,$I4,$R0,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H0,$R0,$ACC0); |
|
+ vmalof ($ACC1,$H0,$R1,$ACC1); |
|
+ vmalof ($ACC2,$H0,$R2,$ACC2); |
|
+ vmalof ($ACC3,$H0,$R3,$ACC3); |
|
+ vmalof ($ACC4,$H0,$R4,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H1,$S4,$ACC0); |
|
+ vmalof ($ACC1,$H1,$R0,$ACC1); |
|
+ vmalof ($ACC2,$H1,$R1,$ACC2); |
|
+ vmalof ($ACC3,$H1,$R2,$ACC3); |
|
+ vmalof ($ACC4,$H1,$R3,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H2,$S3,$ACC0); |
|
+ vmalof ($ACC1,$H2,$S4,$ACC1); |
|
+ vmalof ($ACC2,$H2,$R0,$ACC2); |
|
+ vmalof ($ACC3,$H2,$R1,$ACC3); |
|
+ vmalof ($ACC4,$H2,$R2,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H3,$S2,$ACC0); |
|
+ vmalof ($ACC1,$H3,$S3,$ACC1); |
|
+ vmalof ($ACC2,$H3,$S4,$ACC2); |
|
+ vmalof ($ACC3,$H3,$R0,$ACC3); |
|
+ vmalof ($ACC4,$H3,$R1,$ACC4); |
|
+ |
|
+ vmalof ($ACC0,$H4,$S1,$ACC0); |
|
+ vmalof ($ACC1,$H4,$S2,$ACC1); |
|
+ vmalof ($ACC2,$H4,$S3,$ACC2); |
|
+ vmalof ($ACC3,$H4,$S4,$ACC3); |
|
+ vmalof ($ACC4,$H4,$R0,$ACC4); |
|
+ |
|
+ ################################################################ |
|
+ # horizontal addition |
|
+ |
|
+ vzero ($H0); |
|
+ vsumqg ($ACC0,$ACC0,$H0); |
|
+ vsumqg ($ACC1,$ACC1,$H0); |
|
+ vsumqg ($ACC2,$ACC2,$H0); |
|
+ vsumqg ($ACC3,$ACC3,$H0); |
|
+ vsumqg ($ACC4,$ACC4,$H0); |
|
+ |
|
+ ################################################################ |
|
+ # lazy reduction |
|
+ |
|
+ vesrlg ($H4,$ACC3,26); |
|
+ vesrlg ($H1,$ACC0,26); |
|
+ vn ($H3,$ACC3,$mask26); |
|
+ vn ($H0,$ACC0,$mask26); |
|
+ vag ($H4,$H4,$ACC4); # h3 -> h4 |
|
+ vag ($H1,$H1,$ACC1); # h0 -> h1 |
|
+ |
|
+ vesrlg ($ACC4,$H4,26); |
|
+ vesrlg ($ACC1,$H1,26); |
|
+ vn ($H4,$H4,$mask26); |
|
+ vn ($H1,$H1,$mask26); |
|
+ vag ($H0,$H0,$ACC4); |
|
+ vag ($H2,$ACC2,$ACC1); # h1 -> h2 |
|
+ |
|
+ veslg ($ACC4,$ACC4,2); # <<2 |
|
+ vesrlg ($ACC2,$H2,26); |
|
+ vn ($H2,$H2,$mask26); |
|
+ vag ($H0,$H0,$ACC4); # h4 -> h0 |
|
+ vag ($H3,$H3,$ACC2); # h2 -> h3 |
|
+ |
|
+ vesrlg ($ACC0,$H0,26); |
|
+ vesrlg ($ACC3,$H3,26); |
|
+ vn ($H0,$H0,$mask26); |
|
+ vn ($H3,$H3,$mask26); |
|
+ vag ($H1,$H1,$ACC0); # h0 -> h1 |
|
+ vag ($H4,$H4,$ACC3); # h3 -> h4 |
|
+ |
|
+&{$z? \&clgfi:\&clfi} ($len,0); |
|
+ je (".Ldone"); |
|
+ |
|
+ vlm ($T1,$T4,"0x00($inp)"); # load last partial block |
|
+ vgmg ($mask26,6,31); |
|
+ vgmf ($I4,5,5); # padbit<<2 |
|
+ |
|
+ vperm ($I0,$T3,$T4,$bswaplo); |
|
+ vperm ($I2,$T3,$T4,$bswapmi); |
|
+ vperm ($T3,$T3,$T4,$bswaphi); |
|
+ |
|
+ vl ($ACC0,"0x30($len,%r1)"); # borrow $ACC0,1 |
|
+ vl ($ACC1,"0x60($len,%r1)"); |
|
+ |
|
+ verimg ($I1,$I0,$mask26,6); # >>26 |
|
+ veslg ($I0,$I0,32); |
|
+ veslg ($I2,$I2,28); # >>4 |
|
+ verimg ($I3,$T3,$mask26,18); # >>14 |
|
+ verimg ($I4,$T3,$mask26,58); # >>38 |
|
+ vn ($I0,$I0,$mask26); |
|
+ vn ($I2,$I2,$mask26); |
|
+ vesrlf ($I4,$I4,2); # >>2 |
|
+ |
|
+ vgmg ($mask26,38,63); |
|
+ vperm ($T3,$T1,$T2,$bswaplo); |
|
+ vperm ($T4,$T1,$T2,$bswaphi); |
|
+ vperm ($T2,$T1,$T2,$bswapmi); |
|
+ |
|
+ verimg ($I0,$T3,$mask26,0); |
|
+ verimg ($I1,$T3,$mask26,38); # >>26 |
|
+ verimg ($I2,$T2,$mask26,60); # >>4 |
|
+ verimg ($I3,$T4,$mask26,50); # >>14 |
|
+ vesrlg ($T4,$T4,40); |
|
+ vo ($I4,$I4,$T4); |
|
+ |
|
+ vperm ($H0,$H0,$H0,$ACC0); # move hash to right lane |
|
+ vn ($I0,$I0,$ACC1); # mask redundant lane[s] |
|
+ vperm ($H1,$H1,$H1,$ACC0); |
|
+ vn ($I1,$I1,$ACC1); |
|
+ vperm ($H2,$H2,$H2,$ACC0); |
|
+ vn ($I2,$I2,$ACC1); |
|
+ vperm ($H3,$H3,$H3,$ACC0); |
|
+ vn ($I3,$I3,$ACC1); |
|
+ vperm ($H4,$H4,$H4,$ACC0); |
|
+ vn ($I4,$I4,$ACC1); |
|
+ |
|
+ vaf ($I0,$I0,$H0); # accumulate hash |
|
+ vzero ($H0); # wipe hash value |
|
+ vaf ($I1,$I1,$H1); |
|
+ vzero ($H1); |
|
+ vaf ($I2,$I2,$H2); |
|
+ vzero ($H2); |
|
+ vaf ($I3,$I3,$H3); |
|
+ vzero ($H3); |
|
+ vaf ($I4,$I4,$H4); |
|
+ vzero ($H4); |
|
+ |
|
+&{$z? \&lghi:\&lhi} ($len,0); |
|
+ j (".Last"); |
|
+ # I don't bother to tell apart cases when only one multiplication |
|
+ # pass is sufficient, because I argue that mispredicted branch |
|
+ # penalties are comparable to overhead of sometimes redundant |
|
+ # multiplication pass... |
|
+ |
|
+LABEL (".Ldone"); |
|
+ vstef ($H0,"0($ctx)",3); # store hash base 2^26 |
|
+ vstef ($H1,"4($ctx)",3); |
|
+ vstef ($H2,"8($ctx)",3); |
|
+ vstef ($H3,"12($ctx)",3); |
|
+ vstef ($H4,"16($ctx)",3); |
|
+ |
|
+if ($z) { |
|
+ ld ("%f8","$stdframe+0*8($sp)"); |
|
+ ld ("%f9","$stdframe+1*8($sp)"); |
|
+ ld ("%f10","$stdframe+2*8($sp)"); |
|
+ ld ("%f11","$stdframe+3*8($sp)"); |
|
+ ld ("%f12","$stdframe+4*8($sp)"); |
|
+ ld ("%f13","$stdframe+5*8($sp)"); |
|
+ ld ("%f14","$stdframe+6*8($sp)"); |
|
+ ld ("%f15","$stdframe+7*8($sp)"); |
|
+&{$z? \&lmg:\&lm} ("%r10","%r15","$stdframe+8*8+10*$SIZE_T($sp)"); |
|
+} else { |
|
+ ld ("%f4","$stdframe+16*$SIZE_T+2*8($sp)"); |
|
+ ld ("%f6","$stdframe+16*$SIZE_T+3*8($sp)"); |
|
+&{$z? \&lmg:\&lm} ("%r10","%r15","$stdframe+10*$SIZE_T($sp)"); |
|
+} |
|
+ br ("%r14"); |
|
+SIZE ("__poly1305_blocks_vx",".-__poly1305_blocks_vx"); |
|
+} |
|
+ |
|
+################ |
|
+# static void poly1305_emit(void *ctx, unsigned char mac[16], |
|
+# const u32 nonce[4]) |
|
{ |
|
my ($mac,$nonce)=($inp,$len); |
|
-my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9)); |
|
+my ($h0,$h1,$h2,$d0,$d1,$d2)=map("%r$_",(5..10)); |
|
|
|
-$code.=<<___; |
|
-.globl poly1305_emit |
|
-.type poly1305_emit,\@function |
|
-.align 16 |
|
-poly1305_emit: |
|
- stm${g} %r6,%r9,`6*$SIZE_T`($sp) |
|
- |
|
- lg $h0,0($ctx) |
|
- lg $h1,8($ctx) |
|
- lg $h2,16($ctx) |
|
- |
|
- lghi %r0,5 |
|
- lghi %r1,0 |
|
- lgr $d0,$h0 |
|
- lgr $d1,$h1 |
|
- |
|
- algr $h0,%r0 # compare to modulus |
|
- alcgr $h1,%r1 |
|
- alcgr $h2,%r1 |
|
- |
|
- srlg $h2,$h2,2 # did it borrow/carry? |
|
- slgr %r1,$h2 # 0-$h2>>2 |
|
- lg $h2,0($nonce) # load nonce |
|
- lghi %r0,-1 |
|
- lg $ctx,8($nonce) |
|
- xgr %r0,%r1 # ~%r1 |
|
- |
|
- ngr $h0,%r1 |
|
- ngr $d0,%r0 |
|
- ngr $h1,%r1 |
|
- ngr $d1,%r0 |
|
- ogr $h0,$d0 |
|
- rllg $d0,$h2,32 # flip nonce words |
|
- ogr $h1,$d1 |
|
- rllg $d1,$ctx,32 |
|
- |
|
- algr $h0,$d0 # accumulate nonce |
|
- alcgr $h1,$d1 |
|
- |
|
- strvg $h0,0($mac) # write little-endian result |
|
- strvg $h1,8($mac) |
|
- |
|
- lm${g} %r6,%r9,`6*$SIZE_T`($sp) |
|
- br %r14 |
|
-.size poly1305_emit,.-poly1305_emit |
|
- |
|
-.string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
|
-___ |
|
+GLOBL ("poly1305_emit"); |
|
+TYPE ("poly1305_emit","\@function"); |
|
+ALIGN (16); |
|
+LABEL ("poly1305_emit"); |
|
+LABEL (".Lpoly1305_emit"); |
|
+&{$z? \&stmg:\&stm} ("%r6","%r10","6*$SIZE_T($sp)"); |
|
+ |
|
+ lg ($d0,"0($ctx)"); |
|
+ lg ($d1,"8($ctx)"); |
|
+ lg ($d2,"16($ctx)"); |
|
+ |
|
+ llgfr ("%r0",$d0); # base 2^26 -> base 2^64 |
|
+ srlg ($h0,$d0,32); |
|
+ llgfr ("%r1",$d1); |
|
+ srlg ($h1,$d1,32); |
|
+ srlg ($h2,$d2,32); |
|
+ |
|
+ sllg ("%r0","%r0",26); |
|
+ algr ($h0,"%r0"); |
|
+ sllg ("%r0",$h1,52); |
|
+ srlg ($h1,$h1,12); |
|
+ sllg ("%r1","%r1",14); |
|
+ algr ($h0,"%r0"); |
|
+ alcgr ($h1,"%r1"); |
|
+ sllg ("%r0",$h2,40); |
|
+ srlg ($h2,$h2,24); |
|
+ lghi ("%r1",0); |
|
+ algr ($h1,"%r0"); |
|
+ alcgr ($h2,"%r1"); |
|
+ |
|
+ llgf ("%r0","24($ctx)"); # is_base2_26 |
|
+ lcgr ("%r0","%r0"); |
|
+ |
|
+ xgr ($h0,$d0); # choose between radixes |
|
+ xgr ($h1,$d1); |
|
+ xgr ($h2,$d2); |
|
+ ngr ($h0,"%r0"); |
|
+ ngr ($h1,"%r0"); |
|
+ ngr ($h2,"%r0"); |
|
+ xgr ($h0,$d0); |
|
+ xgr ($h1,$d1); |
|
+ xgr ($h2,$d2); |
|
+ |
|
+ lghi ("%r0",5); |
|
+ lgr ($d0,$h0); |
|
+ lgr ($d1,$h1); |
|
+ |
|
+ algr ($h0,"%r0"); # compare to modulus |
|
+ alcgr ($h1,"%r1"); |
|
+ alcgr ($h2,"%r1"); |
|
+ |
|
+ srlg ($h2,$h2,2); # did it borrow/carry? |
|
+ slgr ("%r1",$h2); # 0-$h2>>2 |
|
+ lg ($d2,"0($nonce)"); # load nonce |
|
+ lg ($ctx,"8($nonce)"); |
|
+ |
|
+ xgr ($h0,$d0); |
|
+ xgr ($h1,$d1); |
|
+ ngr ($h0,"%r1"); |
|
+ ngr ($h1,"%r1"); |
|
+ xgr ($h0,$d0); |
|
+ rllg ($d0,$d2,32); # flip nonce words |
|
+ xgr ($h1,$d1); |
|
+ rllg ($d1,$ctx,32); |
|
+ |
|
+ algr ($h0,$d0); # accumulate nonce |
|
+ alcgr ($h1,$d1); |
|
+ |
|
+ strvg ($h0,"0($mac)"); # write little-endian result |
|
+ strvg ($h1,"8($mac)"); |
|
+ |
|
+&{$z? \&lmg:\&lm} ("%r6","%r10","6*$SIZE_T($sp)"); |
|
+ br ("%r14"); |
|
+SIZE ("poly1305_emit",".-poly1305_emit"); |
|
} |
|
|
|
-$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
-$code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm; |
|
+################ |
|
+ |
|
+ALIGN (16); |
|
+LABEL (".Lconst"); |
|
+LONG (0x04050607,0x14151617,0x0c0d0e0f,0x1c1d1e1f); # merge odd |
|
+LONG (0x07060504,0x03020100,0x17161514,0x13121110); # byte swap masks |
|
+LONG (0x0f0e0d0c,0x0b0a0908,0x1f1e1d1c,0x1b1a1918); |
|
+LONG (0x00000000,0x09080706,0x00000000,0x19181716); |
|
+ |
|
+LONG (0x00000000,0x00000000,0x00000000,0x0c0d0e0f); # magic tail masks |
|
+LONG (0x0c0d0e0f,0x00000000,0x00000000,0x00000000); |
|
+LONG (0x00000000,0x00000000,0x0c0d0e0f,0x00000000); |
|
+ |
|
+LONG (0xffffffff,0x00000000,0xffffffff,0xffffffff); |
|
+LONG (0xffffffff,0x00000000,0xffffffff,0x00000000); |
|
+LONG (0x00000000,0x00000000,0xffffffff,0x00000000); |
|
+ |
|
+STRING ("\"Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>\""); |
|
|
|
-print $code; |
|
-close STDOUT or die "error closing STDOUT: $!"; |
|
+PERLASM_END(); |
|
diff -up openssl-1.1.1e/crypto/poly1305/build.info.s390x-update openssl-1.1.1e/crypto/poly1305/build.info |
|
--- openssl-1.1.1e/crypto/poly1305/build.info.s390x-update 2020-03-17 15:31:17.000000000 +0100 |
|
+++ openssl-1.1.1e/crypto/poly1305/build.info 2020-03-19 16:20:22.042227342 +0100 |
|
@@ -18,6 +18,7 @@ INCLUDE[poly1305-armv8.o]=.. |
|
GENERATE[poly1305-mips.S]=asm/poly1305-mips.pl $(PERLASM_SCHEME) |
|
INCLUDE[poly1305-mips.o]=.. |
|
GENERATE[poly1305-s390x.S]=asm/poly1305-s390x.pl $(PERLASM_SCHEME) |
|
+INCLUDE[poly1305-s390x.o]=.. |
|
|
|
BEGINRAW[Makefile(unix)] |
|
{- $builddir -}/poly1305-%.S: {- $sourcedir -}/asm/poly1305-%.pl
|
|
|