You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
662 lines
14 KiB
662 lines
14 KiB
diff --git a/crypto/bn/asm/ppc64-mont-fixed.pl b/crypto/bn/asm/ppc64-mont-fixed.pl |
|
index 56df89dc27da..e69de29bb2d1 100755 |
|
--- a/crypto/bn/asm/ppc64-mont-fixed.pl |
|
+++ b/crypto/bn/asm/ppc64-mont-fixed.pl |
|
@@ -1,581 +0,0 @@ |
|
-#! /usr/bin/env perl |
|
-# Copyright 2021 The OpenSSL Project Authors. All Rights Reserved. |
|
-# |
|
-# Licensed under the Apache License 2.0 (the "License"). You may not use |
|
-# this file except in compliance with the License. You can obtain a copy |
|
-# in the file LICENSE in the source distribution or at |
|
-# https://www.openssl.org/source/license.html |
|
- |
|
-# ==================================================================== |
|
-# Written by Amitay Isaacs <amitay@ozlabs.org>, Martin Schwenke |
|
-# <martin@meltin.net> & Alastair D'Silva <alastair@d-silva.org> for |
|
-# the OpenSSL project. |
|
-# ==================================================================== |
|
- |
|
-# |
|
-# Fixed length (n=6), unrolled PPC Montgomery Multiplication |
|
-# |
|
- |
|
-# 2021 |
|
-# |
|
-# Although this is a generic implementation for unrolling Montgomery |
|
-# Multiplication for arbitrary values of n, this is currently only |
|
-# used for n = 6 to improve the performance of ECC p384. |
|
-# |
|
-# Unrolling allows intermediate results to be stored in registers, |
|
-# rather than on the stack, improving performance by ~7% compared to |
|
-# the existing PPC assembly code. |
|
-# |
|
-# The ISA 3.0 implementation uses combination multiply/add |
|
-# instructions (maddld, maddhdu) to improve performance by an |
|
-# additional ~10% on Power 9. |
|
-# |
|
-# Finally, saving non-volatile registers into volatile vector |
|
-# registers instead of onto the stack saves a little more. |
|
-# |
|
-# On a Power 9 machine we see an overall improvement of ~18%. |
|
-# |
|
- |
|
-use strict; |
|
-use warnings; |
|
- |
|
-my ($flavour, $output, $dir, $xlate); |
|
- |
|
-# $output is the last argument if it looks like a file (it has an extension) |
|
-# $flavour is the first argument if it doesn't look like a file |
|
-$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; |
|
-$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; |
|
- |
|
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
|
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or |
|
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or |
|
-die "can't locate ppc-xlate.pl"; |
|
- |
|
-open STDOUT,"| $^X $xlate $flavour \"$output\"" |
|
- or die "can't call $xlate: $!"; |
|
- |
|
-if ($flavour !~ /64/) { |
|
- die "bad flavour ($flavour) - only ppc64 permitted"; |
|
-} |
|
- |
|
-my $SIZE_T= 8; |
|
- |
|
-# Registers are global so the code is remotely readable |
|
- |
|
-# Parameters for Montgomery multiplication |
|
-my $sp = "r1"; |
|
-my $toc = "r2"; |
|
-my $rp = "r3"; |
|
-my $ap = "r4"; |
|
-my $bp = "r5"; |
|
-my $np = "r6"; |
|
-my $n0 = "r7"; |
|
-my $num = "r8"; |
|
- |
|
-my $i = "r9"; |
|
-my $c0 = "r10"; |
|
-my $bp0 = "r11"; |
|
-my $bpi = "r11"; |
|
-my $bpj = "r11"; |
|
-my $tj = "r12"; |
|
-my $apj = "r12"; |
|
-my $npj = "r12"; |
|
-my $lo = "r14"; |
|
-my $c1 = "r14"; |
|
- |
|
-# Non-volatile registers used for tp[i] |
|
-# |
|
-# 12 registers are available but the limit on unrolling is 10, |
|
-# since registers from $tp[0] to $tp[$n+1] are used. |
|
-my @tp = ("r20" .. "r31"); |
|
- |
|
-# volatile VSRs for saving non-volatile GPRs - faster than stack |
|
-my @vsrs = ("v32" .. "v46"); |
|
- |
|
-package Mont; |
|
- |
|
-sub new($$) |
|
-{ |
|
- my ($class, $n) = @_; |
|
- |
|
- if ($n > 10) { |
|
- die "Can't unroll for BN length ${n} (maximum 10)" |
|
- } |
|
- |
|
- my $self = { |
|
- code => "", |
|
- n => $n, |
|
- }; |
|
- bless $self, $class; |
|
- |
|
- return $self; |
|
-} |
|
- |
|
-sub add_code($$) |
|
-{ |
|
- my ($self, $c) = @_; |
|
- |
|
- $self->{code} .= $c; |
|
-} |
|
- |
|
-sub get_code($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- return $self->{code}; |
|
-} |
|
- |
|
-sub get_function_name($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- return "bn_mul_mont_fixed_n" . $self->{n}; |
|
-} |
|
- |
|
-sub get_label($$) |
|
-{ |
|
- my ($self, $l) = @_; |
|
- |
|
- return "L" . $l . "_" . $self->{n}; |
|
-} |
|
- |
|
-sub get_labels($@) |
|
-{ |
|
- my ($self, @labels) = @_; |
|
- |
|
- my %out = (); |
|
- |
|
- foreach my $l (@labels) { |
|
- $out{"$l"} = $self->get_label("$l"); |
|
- } |
|
- |
|
- return \%out; |
|
-} |
|
- |
|
-sub nl($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- $self->add_code("\n"); |
|
-} |
|
- |
|
-sub copy_result($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- my ($n) = $self->{n}; |
|
- |
|
- for (my $j = 0; $j < $n; $j++) { |
|
- $self->add_code(<<___); |
|
- std $tp[$j],`$j*$SIZE_T`($rp) |
|
-___ |
|
- } |
|
- |
|
-} |
|
- |
|
-sub mul_mont_fixed($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- my ($n) = $self->{n}; |
|
- my $fname = $self->get_function_name(); |
|
- my $label = $self->get_labels("outer", "enter", "sub", "copy", "end"); |
|
- |
|
- $self->add_code(<<___); |
|
- |
|
-.globl .${fname} |
|
-.align 5 |
|
-.${fname}: |
|
- |
|
-___ |
|
- |
|
- $self->save_registers(); |
|
- |
|
- $self->add_code(<<___); |
|
- ld $n0,0($n0) |
|
- |
|
- ld $bp0,0($bp) |
|
- |
|
- ld $apj,0($ap) |
|
-___ |
|
- |
|
- $self->mul_c_0($tp[0], $apj, $bp0, $c0); |
|
- |
|
- for (my $j = 1; $j < $n - 1; $j++) { |
|
- $self->add_code(<<___); |
|
- ld $apj,`$j*$SIZE_T`($ap) |
|
-___ |
|
- $self->mul($tp[$j], $apj, $bp0, $c0); |
|
- } |
|
- |
|
- $self->add_code(<<___); |
|
- ld $apj,`($n-1)*$SIZE_T`($ap) |
|
-___ |
|
- |
|
- $self->mul_last($tp[$n-1], $tp[$n], $apj, $bp0, $c0); |
|
- |
|
- $self->add_code(<<___); |
|
- li $tp[$n+1],0 |
|
- |
|
-___ |
|
- |
|
- $self->add_code(<<___); |
|
- li $i,0 |
|
- mtctr $num |
|
- b $label->{"enter"} |
|
- |
|
-.align 4 |
|
-$label->{"outer"}: |
|
- ldx $bpi,$bp,$i |
|
- |
|
- ld $apj,0($ap) |
|
-___ |
|
- |
|
- $self->mul_add_c_0($tp[0], $tp[0], $apj, $bpi, $c0); |
|
- |
|
- for (my $j = 1; $j < $n; $j++) { |
|
- $self->add_code(<<___); |
|
- ld $apj,`$j*$SIZE_T`($ap) |
|
-___ |
|
- $self->mul_add($tp[$j], $tp[$j], $apj, $bpi, $c0); |
|
- } |
|
- |
|
- $self->add_code(<<___); |
|
- addc $tp[$n],$tp[$n],$c0 |
|
- addze $tp[$n+1],$tp[$n+1] |
|
-___ |
|
- |
|
- $self->add_code(<<___); |
|
-.align 4 |
|
-$label->{"enter"}: |
|
- mulld $bpi,$tp[0],$n0 |
|
- |
|
- ld $npj,0($np) |
|
-___ |
|
- |
|
- $self->mul_add_c_0($lo, $tp[0], $bpi, $npj, $c0); |
|
- |
|
- for (my $j = 1; $j < $n; $j++) { |
|
- $self->add_code(<<___); |
|
- ld $npj,`$j*$SIZE_T`($np) |
|
-___ |
|
- $self->mul_add($tp[$j-1], $tp[$j], $npj, $bpi, $c0); |
|
- } |
|
- |
|
- $self->add_code(<<___); |
|
- addc $tp[$n-1],$tp[$n],$c0 |
|
- addze $tp[$n],$tp[$n+1] |
|
- |
|
- addi $i,$i,$SIZE_T |
|
- bdnz $label->{"outer"} |
|
- |
|
- and. $tp[$n],$tp[$n],$tp[$n] |
|
- bne $label->{"sub"} |
|
- |
|
- cmpld $tp[$n-1],$npj |
|
- blt $label->{"copy"} |
|
- |
|
-$label->{"sub"}: |
|
-___ |
|
- |
|
- # |
|
- # Reduction |
|
- # |
|
- |
|
- $self->add_code(<<___); |
|
- ld $bpj,`0*$SIZE_T`($np) |
|
- subfc $c1,$bpj,$tp[0] |
|
- std $c1,`0*$SIZE_T`($rp) |
|
- |
|
-___ |
|
- for (my $j = 1; $j < $n - 1; $j++) { |
|
- $self->add_code(<<___); |
|
- ld $bpj,`$j*$SIZE_T`($np) |
|
- subfe $c1,$bpj,$tp[$j] |
|
- std $c1,`$j*$SIZE_T`($rp) |
|
- |
|
-___ |
|
- } |
|
- |
|
- $self->add_code(<<___); |
|
- subfe $c1,$npj,$tp[$n-1] |
|
- std $c1,`($n-1)*$SIZE_T`($rp) |
|
- |
|
-___ |
|
- |
|
- $self->add_code(<<___); |
|
- addme. $tp[$n],$tp[$n] |
|
- beq $label->{"end"} |
|
- |
|
-$label->{"copy"}: |
|
-___ |
|
- |
|
- $self->copy_result(); |
|
- |
|
- $self->add_code(<<___); |
|
- |
|
-$label->{"end"}: |
|
-___ |
|
- |
|
- $self->restore_registers(); |
|
- |
|
- $self->add_code(<<___); |
|
- li r3,1 |
|
- blr |
|
-.size .${fname},.-.${fname} |
|
-___ |
|
- |
|
-} |
|
- |
|
-package Mont::GPR; |
|
- |
|
-our @ISA = ('Mont'); |
|
- |
|
-sub new($$) |
|
-{ |
|
- my ($class, $n) = @_; |
|
- |
|
- return $class->SUPER::new($n); |
|
-} |
|
- |
|
-sub save_registers($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- my $n = $self->{n}; |
|
- |
|
- $self->add_code(<<___); |
|
- std $lo,-8($sp) |
|
-___ |
|
- |
|
- for (my $j = 0; $j <= $n+1; $j++) { |
|
- $self->{code}.=<<___; |
|
- std $tp[$j],-`($j+2)*8`($sp) |
|
-___ |
|
- } |
|
- |
|
- $self->add_code(<<___); |
|
- |
|
-___ |
|
-} |
|
- |
|
-sub restore_registers($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- my $n = $self->{n}; |
|
- |
|
- $self->add_code(<<___); |
|
- ld $lo,-8($sp) |
|
-___ |
|
- |
|
- for (my $j = 0; $j <= $n+1; $j++) { |
|
- $self->{code}.=<<___; |
|
- ld $tp[$j],-`($j+2)*8`($sp) |
|
-___ |
|
- } |
|
- |
|
- $self->{code} .=<<___; |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Direct translation of C mul() |
|
-sub mul($$$$$) |
|
-{ |
|
- my ($self, $r, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- mulld $lo,$a,$w |
|
- addc $r,$lo,$c |
|
- mulhdu $c,$a,$w |
|
- addze $c,$c |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Like mul() but $c is ignored as an input - an optimisation to save a |
|
-# preliminary instruction that would set input $c to 0 |
|
-sub mul_c_0($$$$$) |
|
-{ |
|
- my ($self, $r, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- mulld $r,$a,$w |
|
- mulhdu $c,$a,$w |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Like mul() but does not to the final addition of CA into $c - an |
|
-# optimisation to save an instruction |
|
-sub mul_last($$$$$$) |
|
-{ |
|
- my ($self, $r1, $r2, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- mulld $lo,$a,$w |
|
- addc $r1,$lo,$c |
|
- mulhdu $c,$a,$w |
|
- |
|
- addze $r2,$c |
|
-___ |
|
-} |
|
- |
|
-# Like C mul_add() but allow $r_out and $r_in to be different |
|
-sub mul_add($$$$$$) |
|
-{ |
|
- my ($self, $r_out, $r_in, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- mulld $lo,$a,$w |
|
- addc $lo,$lo,$c |
|
- mulhdu $c,$a,$w |
|
- addze $c,$c |
|
- addc $r_out,$r_in,$lo |
|
- addze $c,$c |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Like mul_add() but $c is ignored as an input - an optimisation to save a |
|
-# preliminary instruction that would set input $c to 0 |
|
-sub mul_add_c_0($$$$$$) |
|
-{ |
|
- my ($self, $r_out, $r_in, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- mulld $lo,$a,$w |
|
- addc $r_out,$r_in,$lo |
|
- mulhdu $c,$a,$w |
|
- addze $c,$c |
|
- |
|
-___ |
|
-} |
|
- |
|
-package Mont::GPR_300; |
|
- |
|
-our @ISA = ('Mont::GPR'); |
|
- |
|
-sub new($$) |
|
-{ |
|
- my ($class, $n) = @_; |
|
- |
|
- my $mont = $class->SUPER::new($n); |
|
- |
|
- return $mont; |
|
-} |
|
- |
|
-sub get_function_name($) |
|
-{ |
|
- my ($self) = @_; |
|
- |
|
- return "bn_mul_mont_300_fixed_n" . $self->{n}; |
|
-} |
|
- |
|
-sub get_label($$) |
|
-{ |
|
- my ($self, $l) = @_; |
|
- |
|
- return "L" . $l . "_300_" . $self->{n}; |
|
-} |
|
- |
|
-# Direct translation of C mul() |
|
-sub mul($$$$$) |
|
-{ |
|
- my ($self, $r, $a, $w, $c, $last) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- maddld $r,$a,$w,$c |
|
- maddhdu $c,$a,$w,$c |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Save the last carry as the final entry |
|
-sub mul_last($$$$$) |
|
-{ |
|
- my ($self, $r1, $r2, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- maddld $r1,$a,$w,$c |
|
- maddhdu $r2,$a,$w,$c |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Like mul() but $c is ignored as an input - an optimisation to save a |
|
-# preliminary instruction that would set input $c to 0 |
|
-sub mul_c_0($$$$$) |
|
-{ |
|
- my ($self, $r, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- mulld $r,$a,$w |
|
- mulhdu $c,$a,$w |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Like C mul_add() but allow $r_out and $r_in to be different |
|
-sub mul_add($$$$$$) |
|
-{ |
|
- my ($self, $r_out, $r_in, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- maddld $lo,$a,$w,$c |
|
- maddhdu $c,$a,$w,$c |
|
- addc $r_out,$r_in,$lo |
|
- addze $c,$c |
|
- |
|
-___ |
|
-} |
|
- |
|
-# Like mul_add() but $c is ignored as an input - an optimisation to save a |
|
-# preliminary instruction that would set input $c to 0 |
|
-sub mul_add_c_0($$$$$$) |
|
-{ |
|
- my ($self, $r_out, $r_in, $a, $w, $c) = @_; |
|
- |
|
- $self->add_code(<<___); |
|
- maddld $lo,$a,$w,$r_in |
|
- maddhdu $c,$a,$w,$r_in |
|
-___ |
|
- |
|
- if ($r_out ne $lo) { |
|
- $self->add_code(<<___); |
|
- mr $r_out,$lo |
|
-___ |
|
- } |
|
- |
|
- $self->nl(); |
|
-} |
|
- |
|
- |
|
-package main; |
|
- |
|
-my $code; |
|
- |
|
-$code.=<<___; |
|
-.machine "any" |
|
-.text |
|
-___ |
|
- |
|
-my $mont; |
|
- |
|
-$mont = new Mont::GPR(6); |
|
-$mont->mul_mont_fixed(); |
|
-$code .= $mont->get_code(); |
|
- |
|
-$mont = new Mont::GPR_300(6); |
|
-$mont->mul_mont_fixed(); |
|
-$code .= $mont->get_code(); |
|
- |
|
-$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
- |
|
-$code.=<<___; |
|
-.asciz "Montgomery Multiplication for PPC by <amitay\@ozlabs.org>, <alastair\@d-silva.org>" |
|
-___ |
|
- |
|
-print $code; |
|
-close STDOUT or die "error closing STDOUT: $!"; |
|
diff --git a/crypto/bn/bn_ppc.c b/crypto/bn/bn_ppc.c |
|
index 1e9421bee213..3ee76ea96574 100644 |
|
--- a/crypto/bn/bn_ppc.c |
|
+++ b/crypto/bn/bn_ppc.c |
|
@@ -19,12 +19,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
const BN_ULONG *np, const BN_ULONG *n0, int num); |
|
int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
const BN_ULONG *np, const BN_ULONG *n0, int num); |
|
- int bn_mul_mont_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap, |
|
- const BN_ULONG *bp, const BN_ULONG *np, |
|
- const BN_ULONG *n0, int num); |
|
- int bn_mul_mont_300_fixed_n6(BN_ULONG *rp, const BN_ULONG *ap, |
|
- const BN_ULONG *bp, const BN_ULONG *np, |
|
- const BN_ULONG *n0, int num); |
|
|
|
if (num < 4) |
|
return 0; |
|
@@ -40,14 +34,5 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
* no opportunity to figure it out... |
|
*/ |
|
|
|
-#if defined(_ARCH_PPC64) |
|
- if (num == 6) { |
|
- if (OPENSSL_ppccap_P & PPC_MADD300) |
|
- return bn_mul_mont_300_fixed_n6(rp, ap, bp, np, n0, num); |
|
- else |
|
- return bn_mul_mont_fixed_n6(rp, ap, bp, np, n0, num); |
|
- } |
|
-#endif |
|
- |
|
return bn_mul_mont_int(rp, ap, bp, np, n0, num); |
|
} |
|
diff --git a/crypto/bn/build.info b/crypto/bn/build.info |
|
index 987a70ae263b..4f8d0689b5ea 100644 |
|
--- a/crypto/bn/build.info |
|
+++ b/crypto/bn/build.info |
|
@@ -79,7 +79,7 @@ IF[{- !$disabled{asm} -}] |
|
|
|
$BNASM_ppc32=bn_ppc.c bn-ppc.s ppc-mont.s |
|
$BNDEF_ppc32=OPENSSL_BN_ASM_MONT |
|
- $BNASM_ppc64=$BNASM_ppc32 ppc64-mont-fixed.s |
|
+ $BNASM_ppc64=$BNASM_ppc32 |
|
$BNDEF_ppc64=$BNDEF_ppc32 |
|
|
|
$BNASM_c64xplus=asm/bn-c64xplus.asm |
|
@@ -173,7 +173,6 @@ GENERATE[parisc-mont.s]=asm/parisc-mont.pl |
|
GENERATE[bn-ppc.s]=asm/ppc.pl |
|
GENERATE[ppc-mont.s]=asm/ppc-mont.pl |
|
GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl |
|
-GENERATE[ppc64-mont-fixed.s]=asm/ppc64-mont-fixed.pl |
|
|
|
GENERATE[alpha-mont.S]=asm/alpha-mont.pl |
|
|
|
diff --git a/test/recipes/30-test_evp_data/evppkey_ecdsa.txt b/test/recipes/30-test_evp_data/evppkey_ecdsa.txt |
|
index f36982845db4..1543ed9f7534 100644 |
|
--- a/test/recipes/30-test_evp_data/evppkey_ecdsa.txt |
|
+++ b/test/recipes/30-test_evp_data/evppkey_ecdsa.txt |
|
@@ -97,6 +97,18 @@ Key = P-256-PUBLIC |
|
Input = "Hello World" |
|
Output = 3046022100e7515177ec3817b77a4a94066ab3070817b7aa9d44a8a09f040da250116e8972022100ba59b0f631258e59a9026be5d84f60685f4cf22b9165a0c2736d5c21c8ec1862 |
|
|
|
+PublicKey=P-384-PUBLIC |
|
+-----BEGIN PUBLIC KEY----- |
|
+MHYwEAYHKoZIzj0CAQYFK4EEACIDYgAES/TlL5WEJ+u1kV+4yVlVUbTTo/2rZ7rd |
|
+nWwwk/QlukNjDfcfQvDrfOqpTZ9kSKhd0wMxWIJJ/S/cCzCex+2EgbwW8ngAwT19 |
|
+twD8guGxyFRaoMDTtW47/nifwYqRaIfC |
|
+-----END PUBLIC KEY----- |
|
+ |
|
+DigestVerify = SHA384 |
|
+Key = P-384-PUBLIC |
|
+Input = "123400" |
|
+Output = 304d0218389cb27e0bc8d21fa7e5f24cb74f58851313e696333ad68b023100ffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52970 |
|
+ |
|
# Oneshot tests |
|
OneShotDigestVerify = SHA256 |
|
Key = P-256-PUBLIC
|
|
|