You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3156 lines
60 KiB
3156 lines
60 KiB
7 years ago
|
From e078642ddea29bbb6ba29788a6a513796387fbbb Mon Sep 17 00:00:00 2001
|
||
|
From: Andy Polyakov <appro@openssl.org>
|
||
|
Date: Mon, 5 Jan 2015 14:52:56 +0100
|
||
|
Subject: [PATCH] Fix for CVE-2014-3570.
|
||
|
|
||
|
Reviewed-by: Emilia Kasper <emilia@openssl.org>
|
||
|
(cherry picked from commit e793809ba50c1e90ab592fb640a856168e50f3de)
|
||
|
(with 1.0.1-specific addendum)
|
||
|
---
|
||
|
crypto/bn/asm/mips.pl | 611 +++---------
|
||
|
crypto/bn/asm/mips3.s | 2201 --------------------------------------------
|
||
|
crypto/bn/asm/x86_64-gcc.c | 34 +-
|
||
|
crypto/bn/bn_asm.c | 16 +-
|
||
|
crypto/bn/bntest.c | 102 +-
|
||
|
5 files changed, 234 insertions(+), 2730 deletions(-)
|
||
|
delete mode 100644 crypto/bn/asm/mips3.s
|
||
|
|
||
|
diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl
|
||
|
index d2f3ef7..215c9a7 100644
|
||
|
--- a/crypto/bn/asm/mips.pl
|
||
|
+++ b/crypto/bn/asm/mips.pl
|
||
|
@@ -1872,6 +1872,41 @@ ___
|
||
|
|
||
|
($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
|
||
|
|
||
|
+sub add_c2 () {
|
||
|
+my ($hi,$lo,$c0,$c1,$c2,
|
||
|
+ $warm, # !$warm denotes first call with specific sequence of
|
||
|
+ # $c_[XYZ] when there is no Z-carry to accumulate yet;
|
||
|
+ $an,$bn # these two are arguments for multiplication which
|
||
|
+ # result is used in *next* step [which is why it's
|
||
|
+ # commented as "forward multiplication" below];
|
||
|
+ )=@_;
|
||
|
+$code.=<<___;
|
||
|
+ mflo $lo
|
||
|
+ mfhi $hi
|
||
|
+ $ADDU $c0,$lo
|
||
|
+ sltu $at,$c0,$lo
|
||
|
+ $MULTU $an,$bn # forward multiplication
|
||
|
+ $ADDU $c0,$lo
|
||
|
+ $ADDU $at,$hi
|
||
|
+ sltu $lo,$c0,$lo
|
||
|
+ $ADDU $c1,$at
|
||
|
+ $ADDU $hi,$lo
|
||
|
+___
|
||
|
+$code.=<<___ if (!$warm);
|
||
|
+ sltu $c2,$c1,$at
|
||
|
+ $ADDU $c1,$hi
|
||
|
+ sltu $hi,$c1,$hi
|
||
|
+ $ADDU $c2,$hi
|
||
|
+___
|
||
|
+$code.=<<___ if ($warm);
|
||
|
+ sltu $at,$c1,$at
|
||
|
+ $ADDU $c1,$hi
|
||
|
+ $ADDU $c2,$at
|
||
|
+ sltu $hi,$c1,$hi
|
||
|
+ $ADDU $c2,$hi
|
||
|
+___
|
||
|
+}
|
||
|
+
|
||
|
$code.=<<___;
|
||
|
|
||
|
.align 5
|
||
|
@@ -1920,21 +1955,10 @@ $code.=<<___;
|
||
|
sltu $at,$c_2,$t_1
|
||
|
$ADDU $c_3,$t_2,$at
|
||
|
$ST $c_2,$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_2,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
|
||
|
+ $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_3,$t_1
|
||
|
@@ -1945,67 +1969,19 @@ $code.=<<___;
|
||
|
sltu $at,$c_1,$t_2
|
||
|
$ADDU $c_2,$at
|
||
|
$ST $c_3,2*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_3,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_3,$at
|
||
|
- $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
|
||
|
+ $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
|
||
|
+ $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_1,3*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_1,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_1,$at
|
||
|
- $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
|
||
|
+ $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
|
||
|
+ $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_2,$t_1
|
||
|
@@ -2016,97 +1992,23 @@ $code.=<<___;
|
||
|
sltu $at,$c_3,$t_2
|
||
|
$ADDU $c_1,$at
|
||
|
$ST $c_2,4*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_2,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_2,$at
|
||
|
- $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3);
|
||
|
- $ADDU $c_2,$at
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
|
||
|
+ $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
|
||
|
+ $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2);
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
|
||
|
+ $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_3,5*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_3,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_3,$at
|
||
|
- $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_3,$at
|
||
|
- $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
|
||
|
+ $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3);
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
|
||
|
+ $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3);
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
|
||
|
+ $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_1,$t_1
|
||
|
@@ -2117,112 +2019,25 @@ $code.=<<___;
|
||
|
sltu $at,$c_2,$t_2
|
||
|
$ADDU $c_3,$at
|
||
|
$ST $c_1,6*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_1,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_1,$at
|
||
|
- $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_1,$at
|
||
|
- $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_1,$at
|
||
|
- $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
|
||
|
+ $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
|
||
|
+ $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1);
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
|
||
|
+ $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1);
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
|
||
|
+ $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_2,7*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_2,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_2,$at
|
||
|
- $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_2,$at
|
||
|
- $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
|
||
|
+ $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2);
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
|
||
|
+ $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2);
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
|
||
|
+ $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_3,$t_1
|
||
|
@@ -2233,82 +2048,21 @@ $code.=<<___;
|
||
|
sltu $at,$c_1,$t_2
|
||
|
$ADDU $c_2,$at
|
||
|
$ST $c_3,8*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_3,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_3,$at
|
||
|
- $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_3,$at
|
||
|
- $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
|
||
|
+ $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
|
||
|
+ $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3);
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
|
||
|
+ $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_1,9*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_1,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_1,$at
|
||
|
- $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
|
||
|
+ $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1);
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
|
||
|
+ $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_2,$t_1
|
||
|
@@ -2319,52 +2073,17 @@ $code.=<<___;
|
||
|
sltu $at,$c_3,$t_2
|
||
|
$ADDU $c_1,$at
|
||
|
$ST $c_2,10*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_2,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_2,$at
|
||
|
- $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
|
||
|
+ $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
|
||
|
+ $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_3,11*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_3,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
|
||
|
+ $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_1,$t_1
|
||
|
@@ -2375,21 +2094,10 @@ $code.=<<___;
|
||
|
sltu $at,$c_2,$t_2
|
||
|
$ADDU $c_3,$at
|
||
|
$ST $c_1,12*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_1,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
|
||
|
+ $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_2,13*$BNSZ($a0)
|
||
|
|
||
|
mflo $t_1
|
||
|
@@ -2457,21 +2165,10 @@ $code.=<<___;
|
||
|
sltu $at,$c_2,$t_1
|
||
|
$ADDU $c_3,$t_2,$at
|
||
|
$ST $c_2,$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_2,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
|
||
|
+ $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_3,$t_1
|
||
|
@@ -2482,52 +2179,17 @@ $code.=<<___;
|
||
|
sltu $at,$c_1,$t_2
|
||
|
$ADDU $c_2,$at
|
||
|
$ST $c_3,2*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_3,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $at,$t_2,$zero
|
||
|
- $ADDU $c_3,$at
|
||
|
- $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
|
||
|
- $SLL $t_2,1
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_1,$t_1
|
||
|
- sltu $at,$c_1,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_2,$t_2
|
||
|
- sltu $at,$c_2,$t_2
|
||
|
- $ADDU $c_3,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
|
||
|
+ $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
|
||
|
+ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
|
||
|
+ $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_1,3*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_1,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_2,$t_1
|
||
|
- sltu $at,$c_2,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_3,$t_2
|
||
|
- sltu $at,$c_3,$t_2
|
||
|
- $ADDU $c_1,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
|
||
|
+ $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
|
||
|
+$code.=<<___;
|
||
|
mflo $t_1
|
||
|
mfhi $t_2
|
||
|
$ADDU $c_2,$t_1
|
||
|
@@ -2538,21 +2200,10 @@ $code.=<<___;
|
||
|
sltu $at,$c_3,$t_2
|
||
|
$ADDU $c_1,$at
|
||
|
$ST $c_2,4*$BNSZ($a0)
|
||
|
-
|
||
|
- mflo $t_1
|
||
|
- mfhi $t_2
|
||
|
- slt $c_2,$t_2,$zero
|
||
|
- $SLL $t_2,1
|
||
|
- $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
- slt $a2,$t_1,$zero
|
||
|
- $ADDU $t_2,$a2
|
||
|
- $SLL $t_1,1
|
||
|
- $ADDU $c_3,$t_1
|
||
|
- sltu $at,$c_3,$t_1
|
||
|
- $ADDU $t_2,$at
|
||
|
- $ADDU $c_1,$t_2
|
||
|
- sltu $at,$c_1,$t_2
|
||
|
- $ADDU $c_2,$at
|
||
|
+___
|
||
|
+ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
|
||
|
+ $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
|
||
|
+$code.=<<___;
|
||
|
$ST $c_3,5*$BNSZ($a0)
|
||
|
|
||
|
mflo $t_1
|
||
|
diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s
|
||
|
deleted file mode 100644
|
||
|
index dca4105..0000000
|
||
|
--- a/crypto/bn/asm/mips3.s
|
||
|
+++ /dev/null
|
||
|
@@ -1,2201 +0,0 @@
|
||
|
-.rdata
|
||
|
-.asciiz "mips3.s, Version 1.1"
|
||
|
-.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
|
||
|
-
|
||
|
-/*
|
||
|
- * ====================================================================
|
||
|
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||
|
- * project.
|
||
|
- *
|
||
|
- * Rights for redistribution and usage in source and binary forms are
|
||
|
- * granted according to the OpenSSL license. Warranty of any kind is
|
||
|
- * disclaimed.
|
||
|
- * ====================================================================
|
||
|
- */
|
||
|
-
|
||
|
-/*
|
||
|
- * This is my modest contributon to the OpenSSL project (see
|
||
|
- * http://www.openssl.org/ for more information about it) and is
|
||
|
- * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
|
||
|
- * module. For updates see http://fy.chalmers.se/~appro/hpe/.
|
||
|
- *
|
||
|
- * The module is designed to work with either of the "new" MIPS ABI(5),
|
||
|
- * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
|
||
|
- * IRIX 5.x not only because it doesn't support new ABIs but also
|
||
|
- * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
|
||
|
- * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
|
||
|
- * cause illegal instruction exception:-(
|
||
|
- *
|
||
|
- * In addition the code depends on preprocessor flags set up by MIPSpro
|
||
|
- * compiler driver (either as or cc) and therefore (probably?) can't be
|
||
|
- * compiled by the GNU assembler. GNU C driver manages fine though...
|
||
|
- * I mean as long as -mmips-as is specified or is the default option,
|
||
|
- * because then it simply invokes /usr/bin/as which in turn takes
|
||
|
- * perfect care of the preprocessor definitions. Another neat feature
|
||
|
- * offered by the MIPSpro assembler is an optimization pass. This gave
|
||
|
- * me the opportunity to have the code looking more regular as all those
|
||
|
- * architecture dependent instruction rescheduling details were left to
|
||
|
- * the assembler. Cool, huh?
|
||
|
- *
|
||
|
- * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
|
||
|
- * goes way over 3 times faster!
|
||
|
- *
|
||
|
- * <appro@fy.chalmers.se>
|
||
|
- */
|
||
|
-#include <asm.h>
|
||
|
-#include <regdef.h>
|
||
|
-
|
||
|
-#if _MIPS_ISA>=4
|
||
|
-#define MOVNZ(cond,dst,src) \
|
||
|
- movn dst,src,cond
|
||
|
-#else
|
||
|
-#define MOVNZ(cond,dst,src) \
|
||
|
- .set noreorder; \
|
||
|
- bnezl cond,.+8; \
|
||
|
- move dst,src; \
|
||
|
- .set reorder
|
||
|
-#endif
|
||
|
-
|
||
|
-.text
|
||
|
-
|
||
|
-.set noat
|
||
|
-.set reorder
|
||
|
-
|
||
|
-#define MINUS4 v1
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_mul_add_words)
|
||
|
- .set noreorder
|
||
|
- bgtzl a2,.L_bn_mul_add_words_proceed
|
||
|
- ld t0,0(a1)
|
||
|
- jr ra
|
||
|
- move v0,zero
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_mul_add_words_proceed:
|
||
|
- li MINUS4,-4
|
||
|
- and ta0,a2,MINUS4
|
||
|
- move v0,zero
|
||
|
- beqz ta0,.L_bn_mul_add_words_tail
|
||
|
-
|
||
|
-.L_bn_mul_add_words_loop:
|
||
|
- dmultu t0,a3
|
||
|
- ld t1,0(a0)
|
||
|
- ld t2,8(a1)
|
||
|
- ld t3,8(a0)
|
||
|
- ld ta0,16(a1)
|
||
|
- ld ta1,16(a0)
|
||
|
- daddu t1,v0
|
||
|
- sltu v0,t1,v0 /* All manuals say it "compares 32-bit
|
||
|
- * values", but it seems to work fine
|
||
|
- * even on 64-bit registers. */
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu t1,AT
|
||
|
- daddu v0,t0
|
||
|
- sltu AT,t1,AT
|
||
|
- sd t1,0(a0)
|
||
|
- daddu v0,AT
|
||
|
-
|
||
|
- dmultu t2,a3
|
||
|
- ld ta2,24(a1)
|
||
|
- ld ta3,24(a0)
|
||
|
- daddu t3,v0
|
||
|
- sltu v0,t3,v0
|
||
|
- mflo AT
|
||
|
- mfhi t2
|
||
|
- daddu t3,AT
|
||
|
- daddu v0,t2
|
||
|
- sltu AT,t3,AT
|
||
|
- sd t3,8(a0)
|
||
|
- daddu v0,AT
|
||
|
-
|
||
|
- dmultu ta0,a3
|
||
|
- subu a2,4
|
||
|
- PTR_ADD a0,32
|
||
|
- PTR_ADD a1,32
|
||
|
- daddu ta1,v0
|
||
|
- sltu v0,ta1,v0
|
||
|
- mflo AT
|
||
|
- mfhi ta0
|
||
|
- daddu ta1,AT
|
||
|
- daddu v0,ta0
|
||
|
- sltu AT,ta1,AT
|
||
|
- sd ta1,-16(a0)
|
||
|
- daddu v0,AT
|
||
|
-
|
||
|
-
|
||
|
- dmultu ta2,a3
|
||
|
- and ta0,a2,MINUS4
|
||
|
- daddu ta3,v0
|
||
|
- sltu v0,ta3,v0
|
||
|
- mflo AT
|
||
|
- mfhi ta2
|
||
|
- daddu ta3,AT
|
||
|
- daddu v0,ta2
|
||
|
- sltu AT,ta3,AT
|
||
|
- sd ta3,-8(a0)
|
||
|
- daddu v0,AT
|
||
|
- .set noreorder
|
||
|
- bgtzl ta0,.L_bn_mul_add_words_loop
|
||
|
- ld t0,0(a1)
|
||
|
-
|
||
|
- bnezl a2,.L_bn_mul_add_words_tail
|
||
|
- ld t0,0(a1)
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_mul_add_words_return:
|
||
|
- jr ra
|
||
|
-
|
||
|
-.L_bn_mul_add_words_tail:
|
||
|
- dmultu t0,a3
|
||
|
- ld t1,0(a0)
|
||
|
- subu a2,1
|
||
|
- daddu t1,v0
|
||
|
- sltu v0,t1,v0
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu t1,AT
|
||
|
- daddu v0,t0
|
||
|
- sltu AT,t1,AT
|
||
|
- sd t1,0(a0)
|
||
|
- daddu v0,AT
|
||
|
- beqz a2,.L_bn_mul_add_words_return
|
||
|
-
|
||
|
- ld t0,8(a1)
|
||
|
- dmultu t0,a3
|
||
|
- ld t1,8(a0)
|
||
|
- subu a2,1
|
||
|
- daddu t1,v0
|
||
|
- sltu v0,t1,v0
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu t1,AT
|
||
|
- daddu v0,t0
|
||
|
- sltu AT,t1,AT
|
||
|
- sd t1,8(a0)
|
||
|
- daddu v0,AT
|
||
|
- beqz a2,.L_bn_mul_add_words_return
|
||
|
-
|
||
|
- ld t0,16(a1)
|
||
|
- dmultu t0,a3
|
||
|
- ld t1,16(a0)
|
||
|
- daddu t1,v0
|
||
|
- sltu v0,t1,v0
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu t1,AT
|
||
|
- daddu v0,t0
|
||
|
- sltu AT,t1,AT
|
||
|
- sd t1,16(a0)
|
||
|
- daddu v0,AT
|
||
|
- jr ra
|
||
|
-END(bn_mul_add_words)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_mul_words)
|
||
|
- .set noreorder
|
||
|
- bgtzl a2,.L_bn_mul_words_proceed
|
||
|
- ld t0,0(a1)
|
||
|
- jr ra
|
||
|
- move v0,zero
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_mul_words_proceed:
|
||
|
- li MINUS4,-4
|
||
|
- and ta0,a2,MINUS4
|
||
|
- move v0,zero
|
||
|
- beqz ta0,.L_bn_mul_words_tail
|
||
|
-
|
||
|
-.L_bn_mul_words_loop:
|
||
|
- dmultu t0,a3
|
||
|
- ld t2,8(a1)
|
||
|
- ld ta0,16(a1)
|
||
|
- ld ta2,24(a1)
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu v0,AT
|
||
|
- sltu t1,v0,AT
|
||
|
- sd v0,0(a0)
|
||
|
- daddu v0,t1,t0
|
||
|
-
|
||
|
- dmultu t2,a3
|
||
|
- subu a2,4
|
||
|
- PTR_ADD a0,32
|
||
|
- PTR_ADD a1,32
|
||
|
- mflo AT
|
||
|
- mfhi t2
|
||
|
- daddu v0,AT
|
||
|
- sltu t3,v0,AT
|
||
|
- sd v0,-24(a0)
|
||
|
- daddu v0,t3,t2
|
||
|
-
|
||
|
- dmultu ta0,a3
|
||
|
- mflo AT
|
||
|
- mfhi ta0
|
||
|
- daddu v0,AT
|
||
|
- sltu ta1,v0,AT
|
||
|
- sd v0,-16(a0)
|
||
|
- daddu v0,ta1,ta0
|
||
|
-
|
||
|
-
|
||
|
- dmultu ta2,a3
|
||
|
- and ta0,a2,MINUS4
|
||
|
- mflo AT
|
||
|
- mfhi ta2
|
||
|
- daddu v0,AT
|
||
|
- sltu ta3,v0,AT
|
||
|
- sd v0,-8(a0)
|
||
|
- daddu v0,ta3,ta2
|
||
|
- .set noreorder
|
||
|
- bgtzl ta0,.L_bn_mul_words_loop
|
||
|
- ld t0,0(a1)
|
||
|
-
|
||
|
- bnezl a2,.L_bn_mul_words_tail
|
||
|
- ld t0,0(a1)
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_mul_words_return:
|
||
|
- jr ra
|
||
|
-
|
||
|
-.L_bn_mul_words_tail:
|
||
|
- dmultu t0,a3
|
||
|
- subu a2,1
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu v0,AT
|
||
|
- sltu t1,v0,AT
|
||
|
- sd v0,0(a0)
|
||
|
- daddu v0,t1,t0
|
||
|
- beqz a2,.L_bn_mul_words_return
|
||
|
-
|
||
|
- ld t0,8(a1)
|
||
|
- dmultu t0,a3
|
||
|
- subu a2,1
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu v0,AT
|
||
|
- sltu t1,v0,AT
|
||
|
- sd v0,8(a0)
|
||
|
- daddu v0,t1,t0
|
||
|
- beqz a2,.L_bn_mul_words_return
|
||
|
-
|
||
|
- ld t0,16(a1)
|
||
|
- dmultu t0,a3
|
||
|
- mflo AT
|
||
|
- mfhi t0
|
||
|
- daddu v0,AT
|
||
|
- sltu t1,v0,AT
|
||
|
- sd v0,16(a0)
|
||
|
- daddu v0,t1,t0
|
||
|
- jr ra
|
||
|
-END(bn_mul_words)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_sqr_words)
|
||
|
- .set noreorder
|
||
|
- bgtzl a2,.L_bn_sqr_words_proceed
|
||
|
- ld t0,0(a1)
|
||
|
- jr ra
|
||
|
- move v0,zero
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_sqr_words_proceed:
|
||
|
- li MINUS4,-4
|
||
|
- and ta0,a2,MINUS4
|
||
|
- move v0,zero
|
||
|
- beqz ta0,.L_bn_sqr_words_tail
|
||
|
-
|
||
|
-.L_bn_sqr_words_loop:
|
||
|
- dmultu t0,t0
|
||
|
- ld t2,8(a1)
|
||
|
- ld ta0,16(a1)
|
||
|
- ld ta2,24(a1)
|
||
|
- mflo t1
|
||
|
- mfhi t0
|
||
|
- sd t1,0(a0)
|
||
|
- sd t0,8(a0)
|
||
|
-
|
||
|
- dmultu t2,t2
|
||
|
- subu a2,4
|
||
|
- PTR_ADD a0,64
|
||
|
- PTR_ADD a1,32
|
||
|
- mflo t3
|
||
|
- mfhi t2
|
||
|
- sd t3,-48(a0)
|
||
|
- sd t2,-40(a0)
|
||
|
-
|
||
|
- dmultu ta0,ta0
|
||
|
- mflo ta1
|
||
|
- mfhi ta0
|
||
|
- sd ta1,-32(a0)
|
||
|
- sd ta0,-24(a0)
|
||
|
-
|
||
|
-
|
||
|
- dmultu ta2,ta2
|
||
|
- and ta0,a2,MINUS4
|
||
|
- mflo ta3
|
||
|
- mfhi ta2
|
||
|
- sd ta3,-16(a0)
|
||
|
- sd ta2,-8(a0)
|
||
|
-
|
||
|
- .set noreorder
|
||
|
- bgtzl ta0,.L_bn_sqr_words_loop
|
||
|
- ld t0,0(a1)
|
||
|
-
|
||
|
- bnezl a2,.L_bn_sqr_words_tail
|
||
|
- ld t0,0(a1)
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_sqr_words_return:
|
||
|
- move v0,zero
|
||
|
- jr ra
|
||
|
-
|
||
|
-.L_bn_sqr_words_tail:
|
||
|
- dmultu t0,t0
|
||
|
- subu a2,1
|
||
|
- mflo t1
|
||
|
- mfhi t0
|
||
|
- sd t1,0(a0)
|
||
|
- sd t0,8(a0)
|
||
|
- beqz a2,.L_bn_sqr_words_return
|
||
|
-
|
||
|
- ld t0,8(a1)
|
||
|
- dmultu t0,t0
|
||
|
- subu a2,1
|
||
|
- mflo t1
|
||
|
- mfhi t0
|
||
|
- sd t1,16(a0)
|
||
|
- sd t0,24(a0)
|
||
|
- beqz a2,.L_bn_sqr_words_return
|
||
|
-
|
||
|
- ld t0,16(a1)
|
||
|
- dmultu t0,t0
|
||
|
- mflo t1
|
||
|
- mfhi t0
|
||
|
- sd t1,32(a0)
|
||
|
- sd t0,40(a0)
|
||
|
- jr ra
|
||
|
-END(bn_sqr_words)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_add_words)
|
||
|
- .set noreorder
|
||
|
- bgtzl a3,.L_bn_add_words_proceed
|
||
|
- ld t0,0(a1)
|
||
|
- jr ra
|
||
|
- move v0,zero
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_add_words_proceed:
|
||
|
- li MINUS4,-4
|
||
|
- and AT,a3,MINUS4
|
||
|
- move v0,zero
|
||
|
- beqz AT,.L_bn_add_words_tail
|
||
|
-
|
||
|
-.L_bn_add_words_loop:
|
||
|
- ld ta0,0(a2)
|
||
|
- subu a3,4
|
||
|
- ld t1,8(a1)
|
||
|
- and AT,a3,MINUS4
|
||
|
- ld t2,16(a1)
|
||
|
- PTR_ADD a2,32
|
||
|
- ld t3,24(a1)
|
||
|
- PTR_ADD a0,32
|
||
|
- ld ta1,-24(a2)
|
||
|
- PTR_ADD a1,32
|
||
|
- ld ta2,-16(a2)
|
||
|
- ld ta3,-8(a2)
|
||
|
- daddu ta0,t0
|
||
|
- sltu t8,ta0,t0
|
||
|
- daddu t0,ta0,v0
|
||
|
- sltu v0,t0,ta0
|
||
|
- sd t0,-32(a0)
|
||
|
- daddu v0,t8
|
||
|
-
|
||
|
- daddu ta1,t1
|
||
|
- sltu t9,ta1,t1
|
||
|
- daddu t1,ta1,v0
|
||
|
- sltu v0,t1,ta1
|
||
|
- sd t1,-24(a0)
|
||
|
- daddu v0,t9
|
||
|
-
|
||
|
- daddu ta2,t2
|
||
|
- sltu t8,ta2,t2
|
||
|
- daddu t2,ta2,v0
|
||
|
- sltu v0,t2,ta2
|
||
|
- sd t2,-16(a0)
|
||
|
- daddu v0,t8
|
||
|
-
|
||
|
- daddu ta3,t3
|
||
|
- sltu t9,ta3,t3
|
||
|
- daddu t3,ta3,v0
|
||
|
- sltu v0,t3,ta3
|
||
|
- sd t3,-8(a0)
|
||
|
- daddu v0,t9
|
||
|
-
|
||
|
- .set noreorder
|
||
|
- bgtzl AT,.L_bn_add_words_loop
|
||
|
- ld t0,0(a1)
|
||
|
-
|
||
|
- bnezl a3,.L_bn_add_words_tail
|
||
|
- ld t0,0(a1)
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_add_words_return:
|
||
|
- jr ra
|
||
|
-
|
||
|
-.L_bn_add_words_tail:
|
||
|
- ld ta0,0(a2)
|
||
|
- daddu ta0,t0
|
||
|
- subu a3,1
|
||
|
- sltu t8,ta0,t0
|
||
|
- daddu t0,ta0,v0
|
||
|
- sltu v0,t0,ta0
|
||
|
- sd t0,0(a0)
|
||
|
- daddu v0,t8
|
||
|
- beqz a3,.L_bn_add_words_return
|
||
|
-
|
||
|
- ld t1,8(a1)
|
||
|
- ld ta1,8(a2)
|
||
|
- daddu ta1,t1
|
||
|
- subu a3,1
|
||
|
- sltu t9,ta1,t1
|
||
|
- daddu t1,ta1,v0
|
||
|
- sltu v0,t1,ta1
|
||
|
- sd t1,8(a0)
|
||
|
- daddu v0,t9
|
||
|
- beqz a3,.L_bn_add_words_return
|
||
|
-
|
||
|
- ld t2,16(a1)
|
||
|
- ld ta2,16(a2)
|
||
|
- daddu ta2,t2
|
||
|
- sltu t8,ta2,t2
|
||
|
- daddu t2,ta2,v0
|
||
|
- sltu v0,t2,ta2
|
||
|
- sd t2,16(a0)
|
||
|
- daddu v0,t8
|
||
|
- jr ra
|
||
|
-END(bn_add_words)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_sub_words)
|
||
|
- .set noreorder
|
||
|
- bgtzl a3,.L_bn_sub_words_proceed
|
||
|
- ld t0,0(a1)
|
||
|
- jr ra
|
||
|
- move v0,zero
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_sub_words_proceed:
|
||
|
- li MINUS4,-4
|
||
|
- and AT,a3,MINUS4
|
||
|
- move v0,zero
|
||
|
- beqz AT,.L_bn_sub_words_tail
|
||
|
-
|
||
|
-.L_bn_sub_words_loop:
|
||
|
- ld ta0,0(a2)
|
||
|
- subu a3,4
|
||
|
- ld t1,8(a1)
|
||
|
- and AT,a3,MINUS4
|
||
|
- ld t2,16(a1)
|
||
|
- PTR_ADD a2,32
|
||
|
- ld t3,24(a1)
|
||
|
- PTR_ADD a0,32
|
||
|
- ld ta1,-24(a2)
|
||
|
- PTR_ADD a1,32
|
||
|
- ld ta2,-16(a2)
|
||
|
- ld ta3,-8(a2)
|
||
|
- sltu t8,t0,ta0
|
||
|
- dsubu t0,ta0
|
||
|
- dsubu ta0,t0,v0
|
||
|
- sd ta0,-32(a0)
|
||
|
- MOVNZ (t0,v0,t8)
|
||
|
-
|
||
|
- sltu t9,t1,ta1
|
||
|
- dsubu t1,ta1
|
||
|
- dsubu ta1,t1,v0
|
||
|
- sd ta1,-24(a0)
|
||
|
- MOVNZ (t1,v0,t9)
|
||
|
-
|
||
|
-
|
||
|
- sltu t8,t2,ta2
|
||
|
- dsubu t2,ta2
|
||
|
- dsubu ta2,t2,v0
|
||
|
- sd ta2,-16(a0)
|
||
|
- MOVNZ (t2,v0,t8)
|
||
|
-
|
||
|
- sltu t9,t3,ta3
|
||
|
- dsubu t3,ta3
|
||
|
- dsubu ta3,t3,v0
|
||
|
- sd ta3,-8(a0)
|
||
|
- MOVNZ (t3,v0,t9)
|
||
|
-
|
||
|
- .set noreorder
|
||
|
- bgtzl AT,.L_bn_sub_words_loop
|
||
|
- ld t0,0(a1)
|
||
|
-
|
||
|
- bnezl a3,.L_bn_sub_words_tail
|
||
|
- ld t0,0(a1)
|
||
|
- .set reorder
|
||
|
-
|
||
|
-.L_bn_sub_words_return:
|
||
|
- jr ra
|
||
|
-
|
||
|
-.L_bn_sub_words_tail:
|
||
|
- ld ta0,0(a2)
|
||
|
- subu a3,1
|
||
|
- sltu t8,t0,ta0
|
||
|
- dsubu t0,ta0
|
||
|
- dsubu ta0,t0,v0
|
||
|
- MOVNZ (t0,v0,t8)
|
||
|
- sd ta0,0(a0)
|
||
|
- beqz a3,.L_bn_sub_words_return
|
||
|
-
|
||
|
- ld t1,8(a1)
|
||
|
- subu a3,1
|
||
|
- ld ta1,8(a2)
|
||
|
- sltu t9,t1,ta1
|
||
|
- dsubu t1,ta1
|
||
|
- dsubu ta1,t1,v0
|
||
|
- MOVNZ (t1,v0,t9)
|
||
|
- sd ta1,8(a0)
|
||
|
- beqz a3,.L_bn_sub_words_return
|
||
|
-
|
||
|
- ld t2,16(a1)
|
||
|
- ld ta2,16(a2)
|
||
|
- sltu t8,t2,ta2
|
||
|
- dsubu t2,ta2
|
||
|
- dsubu ta2,t2,v0
|
||
|
- MOVNZ (t2,v0,t8)
|
||
|
- sd ta2,16(a0)
|
||
|
- jr ra
|
||
|
-END(bn_sub_words)
|
||
|
-
|
||
|
-#undef MINUS4
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_div_3_words)
|
||
|
- .set reorder
|
||
|
- move a3,a0 /* we know that bn_div_words doesn't
|
||
|
- * touch a3, ta2, ta3 and preserves a2
|
||
|
- * so that we can save two arguments
|
||
|
- * and return address in registers
|
||
|
- * instead of stack:-)
|
||
|
- */
|
||
|
- ld a0,(a3)
|
||
|
- move ta2,a1
|
||
|
- ld a1,-8(a3)
|
||
|
- bne a0,a2,.L_bn_div_3_words_proceed
|
||
|
- li v0,-1
|
||
|
- jr ra
|
||
|
-.L_bn_div_3_words_proceed:
|
||
|
- move ta3,ra
|
||
|
- bal bn_div_words
|
||
|
- move ra,ta3
|
||
|
- dmultu ta2,v0
|
||
|
- ld t2,-16(a3)
|
||
|
- move ta0,zero
|
||
|
- mfhi t1
|
||
|
- mflo t0
|
||
|
- sltu t8,t1,v1
|
||
|
-.L_bn_div_3_words_inner_loop:
|
||
|
- bnez t8,.L_bn_div_3_words_inner_loop_done
|
||
|
- sgeu AT,t2,t0
|
||
|
- seq t9,t1,v1
|
||
|
- and AT,t9
|
||
|
- sltu t3,t0,ta2
|
||
|
- daddu v1,a2
|
||
|
- dsubu t1,t3
|
||
|
- dsubu t0,ta2
|
||
|
- sltu t8,t1,v1
|
||
|
- sltu ta0,v1,a2
|
||
|
- or t8,ta0
|
||
|
- .set noreorder
|
||
|
- beqzl AT,.L_bn_div_3_words_inner_loop
|
||
|
- dsubu v0,1
|
||
|
- .set reorder
|
||
|
-.L_bn_div_3_words_inner_loop_done:
|
||
|
- jr ra
|
||
|
-END(bn_div_3_words)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_div_words)
|
||
|
- .set noreorder
|
||
|
- bnezl a2,.L_bn_div_words_proceed
|
||
|
- move v1,zero
|
||
|
- jr ra
|
||
|
- li v0,-1 /* I'd rather signal div-by-zero
|
||
|
- * which can be done with 'break 7' */
|
||
|
-
|
||
|
-.L_bn_div_words_proceed:
|
||
|
- bltz a2,.L_bn_div_words_body
|
||
|
- move t9,v1
|
||
|
- dsll a2,1
|
||
|
- bgtz a2,.-4
|
||
|
- addu t9,1
|
||
|
-
|
||
|
- .set reorder
|
||
|
- negu t1,t9
|
||
|
- li t2,-1
|
||
|
- dsll t2,t1
|
||
|
- and t2,a0
|
||
|
- dsrl AT,a1,t1
|
||
|
- .set noreorder
|
||
|
- bnezl t2,.+8
|
||
|
- break 6 /* signal overflow */
|
||
|
- .set reorder
|
||
|
- dsll a0,t9
|
||
|
- dsll a1,t9
|
||
|
- or a0,AT
|
||
|
-
|
||
|
-#define QT ta0
|
||
|
-#define HH ta1
|
||
|
-#define DH v1
|
||
|
-.L_bn_div_words_body:
|
||
|
- dsrl DH,a2,32
|
||
|
- sgeu AT,a0,a2
|
||
|
- .set noreorder
|
||
|
- bnezl AT,.+8
|
||
|
- dsubu a0,a2
|
||
|
- .set reorder
|
||
|
-
|
||
|
- li QT,-1
|
||
|
- dsrl HH,a0,32
|
||
|
- dsrl QT,32 /* q=0xffffffff */
|
||
|
- beq DH,HH,.L_bn_div_words_skip_div1
|
||
|
- ddivu zero,a0,DH
|
||
|
- mflo QT
|
||
|
-.L_bn_div_words_skip_div1:
|
||
|
- dmultu a2,QT
|
||
|
- dsll t3,a0,32
|
||
|
- dsrl AT,a1,32
|
||
|
- or t3,AT
|
||
|
- mflo t0
|
||
|
- mfhi t1
|
||
|
-.L_bn_div_words_inner_loop1:
|
||
|
- sltu t2,t3,t0
|
||
|
- seq t8,HH,t1
|
||
|
- sltu AT,HH,t1
|
||
|
- and t2,t8
|
||
|
- sltu v0,t0,a2
|
||
|
- or AT,t2
|
||
|
- .set noreorder
|
||
|
- beqz AT,.L_bn_div_words_inner_loop1_done
|
||
|
- dsubu t1,v0
|
||
|
- dsubu t0,a2
|
||
|
- b .L_bn_div_words_inner_loop1
|
||
|
- dsubu QT,1
|
||
|
- .set reorder
|
||
|
-.L_bn_div_words_inner_loop1_done:
|
||
|
-
|
||
|
- dsll a1,32
|
||
|
- dsubu a0,t3,t0
|
||
|
- dsll v0,QT,32
|
||
|
-
|
||
|
- li QT,-1
|
||
|
- dsrl HH,a0,32
|
||
|
- dsrl QT,32 /* q=0xffffffff */
|
||
|
- beq DH,HH,.L_bn_div_words_skip_div2
|
||
|
- ddivu zero,a0,DH
|
||
|
- mflo QT
|
||
|
-.L_bn_div_words_skip_div2:
|
||
|
-#undef DH
|
||
|
- dmultu a2,QT
|
||
|
- dsll t3,a0,32
|
||
|
- dsrl AT,a1,32
|
||
|
- or t3,AT
|
||
|
- mflo t0
|
||
|
- mfhi t1
|
||
|
-.L_bn_div_words_inner_loop2:
|
||
|
- sltu t2,t3,t0
|
||
|
- seq t8,HH,t1
|
||
|
- sltu AT,HH,t1
|
||
|
- and t2,t8
|
||
|
- sltu v1,t0,a2
|
||
|
- or AT,t2
|
||
|
- .set noreorder
|
||
|
- beqz AT,.L_bn_div_words_inner_loop2_done
|
||
|
- dsubu t1,v1
|
||
|
- dsubu t0,a2
|
||
|
- b .L_bn_div_words_inner_loop2
|
||
|
- dsubu QT,1
|
||
|
- .set reorder
|
||
|
-.L_bn_div_words_inner_loop2_done:
|
||
|
-#undef HH
|
||
|
-
|
||
|
- dsubu a0,t3,t0
|
||
|
- or v0,QT
|
||
|
- dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */
|
||
|
- dsrl a2,t9 /* restore a2 */
|
||
|
- jr ra
|
||
|
-#undef QT
|
||
|
-END(bn_div_words)
|
||
|
-
|
||
|
-#define a_0 t0
|
||
|
-#define a_1 t1
|
||
|
-#define a_2 t2
|
||
|
-#define a_3 t3
|
||
|
-#define b_0 ta0
|
||
|
-#define b_1 ta1
|
||
|
-#define b_2 ta2
|
||
|
-#define b_3 ta3
|
||
|
-
|
||
|
-#define a_4 s0
|
||
|
-#define a_5 s2
|
||
|
-#define a_6 s4
|
||
|
-#define a_7 a1 /* once we load a[7] we don't need a anymore */
|
||
|
-#define b_4 s1
|
||
|
-#define b_5 s3
|
||
|
-#define b_6 s5
|
||
|
-#define b_7 a2 /* once we load b[7] we don't need b anymore */
|
||
|
-
|
||
|
-#define t_1 t8
|
||
|
-#define t_2 t9
|
||
|
-
|
||
|
-#define c_1 v0
|
||
|
-#define c_2 v1
|
||
|
-#define c_3 a3
|
||
|
-
|
||
|
-#define FRAME_SIZE 48
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_mul_comba8)
|
||
|
- .set noreorder
|
||
|
- PTR_SUB sp,FRAME_SIZE
|
||
|
- .frame sp,64,ra
|
||
|
- .set reorder
|
||
|
- ld a_0,0(a1) /* If compiled with -mips3 option on
|
||
|
- * R5000 box assembler barks on this
|
||
|
- * line with "shouldn't have mult/div
|
||
|
- * as last instruction in bb (R10K
|
||
|
- * bug)" warning. If anybody out there
|
||
|
- * has a clue about how to circumvent
|
||
|
- * this do send me a note.
|
||
|
- * <appro@fy.chalmers.se>
|
||
|
- */
|
||
|
- ld b_0,0(a2)
|
||
|
- ld a_1,8(a1)
|
||
|
- ld a_2,16(a1)
|
||
|
- ld a_3,24(a1)
|
||
|
- ld b_1,8(a2)
|
||
|
- ld b_2,16(a2)
|
||
|
- ld b_3,24(a2)
|
||
|
- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
|
||
|
- sd s0,0(sp)
|
||
|
- sd s1,8(sp)
|
||
|
- sd s2,16(sp)
|
||
|
- sd s3,24(sp)
|
||
|
- sd s4,32(sp)
|
||
|
- sd s5,40(sp)
|
||
|
- mflo c_1
|
||
|
- mfhi c_2
|
||
|
-
|
||
|
- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
|
||
|
- ld a_4,32(a1)
|
||
|
- ld a_5,40(a1)
|
||
|
- ld a_6,48(a1)
|
||
|
- ld a_7,56(a1)
|
||
|
- ld b_4,32(a2)
|
||
|
- ld b_5,40(a2)
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu c_3,t_2,AT
|
||
|
- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
|
||
|
- ld b_6,48(a2)
|
||
|
- ld b_7,56(a2)
|
||
|
- sd c_1,0(a0) /* r[0]=c1; */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- sd c_2,8(a0) /* r[1]=c2; */
|
||
|
-
|
||
|
- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu c_2,c_1,t_2
|
||
|
- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,16(a0) /* r[2]=c3; */
|
||
|
-
|
||
|
- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu c_3,c_2,t_2
|
||
|
- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,24(a0) /* r[3]=c1; */
|
||
|
-
|
||
|
- dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,32(a0) /* r[4]=c2; */
|
||
|
-
|
||
|
- dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu c_2,c_1,t_2
|
||
|
- dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,40(a0) /* r[5]=c3; */
|
||
|
-
|
||
|
- dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu c_3,c_2,t_2
|
||
|
- dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,48(a0) /* r[6]=c1; */
|
||
|
-
|
||
|
- dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,56(a0) /* r[7]=c2; */
|
||
|
-
|
||
|
- dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu c_2,c_1,t_2
|
||
|
- dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,64(a0) /* r[8]=c3; */
|
||
|
-
|
||
|
- dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu c_3,c_2,t_2
|
||
|
- dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,72(a0) /* r[9]=c1; */
|
||
|
-
|
||
|
- dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,80(a0) /* r[10]=c2; */
|
||
|
-
|
||
|
- dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu c_2,c_1,t_2
|
||
|
- dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,88(a0) /* r[11]=c3; */
|
||
|
-
|
||
|
- dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu c_3,c_2,t_2
|
||
|
- dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,96(a0) /* r[12]=c1; */
|
||
|
-
|
||
|
- dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,104(a0) /* r[13]=c2; */
|
||
|
-
|
||
|
- dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
|
||
|
- ld s0,0(sp)
|
||
|
- ld s1,8(sp)
|
||
|
- ld s2,16(sp)
|
||
|
- ld s3,24(sp)
|
||
|
- ld s4,32(sp)
|
||
|
- ld s5,40(sp)
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sd c_3,112(a0) /* r[14]=c3; */
|
||
|
- sd c_1,120(a0) /* r[15]=c1; */
|
||
|
-
|
||
|
- PTR_ADD sp,FRAME_SIZE
|
||
|
-
|
||
|
- jr ra
|
||
|
-END(bn_mul_comba8)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_mul_comba4)
|
||
|
- .set reorder
|
||
|
- ld a_0,0(a1)
|
||
|
- ld b_0,0(a2)
|
||
|
- ld a_1,8(a1)
|
||
|
- ld a_2,16(a1)
|
||
|
- dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
|
||
|
- ld a_3,24(a1)
|
||
|
- ld b_1,8(a2)
|
||
|
- ld b_2,16(a2)
|
||
|
- ld b_3,24(a2)
|
||
|
- mflo c_1
|
||
|
- mfhi c_2
|
||
|
- sd c_1,0(a0)
|
||
|
-
|
||
|
- dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu c_3,t_2,AT
|
||
|
- dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- sd c_2,8(a0)
|
||
|
-
|
||
|
- dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu c_2,c_1,t_2
|
||
|
- dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,16(a0)
|
||
|
-
|
||
|
- dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu c_3,c_2,t_2
|
||
|
- dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,24(a0)
|
||
|
-
|
||
|
- dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu c_1,c_3,t_2
|
||
|
- dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,32(a0)
|
||
|
-
|
||
|
- dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu c_2,c_1,t_2
|
||
|
- dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,40(a0)
|
||
|
-
|
||
|
- dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sd c_1,48(a0)
|
||
|
- sd c_2,56(a0)
|
||
|
-
|
||
|
- jr ra
|
||
|
-END(bn_mul_comba4)
|
||
|
-
|
||
|
-#undef a_4
|
||
|
-#undef a_5
|
||
|
-#undef a_6
|
||
|
-#undef a_7
|
||
|
-#define a_4 b_0
|
||
|
-#define a_5 b_1
|
||
|
-#define a_6 b_2
|
||
|
-#define a_7 b_3
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_sqr_comba8)
|
||
|
- .set reorder
|
||
|
- ld a_0,0(a1)
|
||
|
- ld a_1,8(a1)
|
||
|
- ld a_2,16(a1)
|
||
|
- ld a_3,24(a1)
|
||
|
-
|
||
|
- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
|
||
|
- ld a_4,32(a1)
|
||
|
- ld a_5,40(a1)
|
||
|
- ld a_6,48(a1)
|
||
|
- ld a_7,56(a1)
|
||
|
- mflo c_1
|
||
|
- mfhi c_2
|
||
|
- sd c_1,0(a0)
|
||
|
-
|
||
|
- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu c_3,t_2,AT
|
||
|
- sd c_2,8(a0)
|
||
|
-
|
||
|
- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_2,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,16(a0)
|
||
|
-
|
||
|
- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_3,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_3,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,24(a0)
|
||
|
-
|
||
|
- dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_1,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,32(a0)
|
||
|
-
|
||
|
- dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_2,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_2,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_2,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,40(a0)
|
||
|
-
|
||
|
- dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_3,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_3,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_3,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,48(a0)
|
||
|
-
|
||
|
- dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_1,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_1,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_1,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,56(a0)
|
||
|
-
|
||
|
- dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_2,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_2,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_2,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,64(a0)
|
||
|
-
|
||
|
- dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_3,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_3,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_3,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,72(a0)
|
||
|
-
|
||
|
- dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_1,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,80(a0)
|
||
|
-
|
||
|
- dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_2,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_2,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,88(a0)
|
||
|
-
|
||
|
- dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_3,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,96(a0)
|
||
|
-
|
||
|
- dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,104(a0)
|
||
|
-
|
||
|
- dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sd c_3,112(a0)
|
||
|
- sd c_1,120(a0)
|
||
|
-
|
||
|
- jr ra
|
||
|
-END(bn_sqr_comba8)
|
||
|
-
|
||
|
-.align 5
|
||
|
-LEAF(bn_sqr_comba4)
|
||
|
- .set reorder
|
||
|
- ld a_0,0(a1)
|
||
|
- ld a_1,8(a1)
|
||
|
- ld a_2,16(a1)
|
||
|
- ld a_3,24(a1)
|
||
|
- dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */
|
||
|
- mflo c_1
|
||
|
- mfhi c_2
|
||
|
- sd c_1,0(a0)
|
||
|
-
|
||
|
- dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu c_3,t_2,AT
|
||
|
- sd c_2,8(a0)
|
||
|
-
|
||
|
- dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_2,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,16(a0)
|
||
|
-
|
||
|
- dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_3,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt AT,t_2,zero
|
||
|
- daddu c_3,AT
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sltu AT,c_2,t_2
|
||
|
- daddu c_3,AT
|
||
|
- sd c_1,24(a0)
|
||
|
-
|
||
|
- dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_1,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_2,t_1
|
||
|
- sltu AT,c_2,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_3,t_2
|
||
|
- sltu AT,c_3,t_2
|
||
|
- daddu c_1,AT
|
||
|
- sd c_2,32(a0)
|
||
|
-
|
||
|
- dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- slt c_2,t_2,zero
|
||
|
- dsll t_2,1
|
||
|
- slt a2,t_1,zero
|
||
|
- daddu t_2,a2
|
||
|
- dsll t_1,1
|
||
|
- daddu c_3,t_1
|
||
|
- sltu AT,c_3,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_1,t_2
|
||
|
- sltu AT,c_1,t_2
|
||
|
- daddu c_2,AT
|
||
|
- sd c_3,40(a0)
|
||
|
-
|
||
|
- dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */
|
||
|
- mflo t_1
|
||
|
- mfhi t_2
|
||
|
- daddu c_1,t_1
|
||
|
- sltu AT,c_1,t_1
|
||
|
- daddu t_2,AT
|
||
|
- daddu c_2,t_2
|
||
|
- sd c_1,48(a0)
|
||
|
- sd c_2,56(a0)
|
||
|
-
|
||
|
- jr ra
|
||
|
-END(bn_sqr_comba4)
|
||
|
diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
|
||
|
index 31476ab..2d39407 100644
|
||
|
--- a/crypto/bn/asm/x86_64-gcc.c
|
||
|
+++ b/crypto/bn/asm/x86_64-gcc.c
|
||
|
@@ -273,6 +273,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
|
||
|
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
|
||
|
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
|
||
|
|
||
|
+/*
|
||
|
+ * Keep in mind that carrying into high part of multiplication result
|
||
|
+ * can not overflow, because it cannot be all-ones.
|
||
|
+ */
|
||
|
#if 0
|
||
|
/* original macros are kept for reference purposes */
|
||
|
#define mul_add_c(a,b,c0,c1,c2) { \
|
||
|
@@ -287,10 +291,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
|
||
|
BN_ULONG ta=(a),tb=(b),t0; \
|
||
|
t1 = BN_UMULT_HIGH(ta,tb); \
|
||
|
t0 = ta * tb; \
|
||
|
- t2 = t1+t1; c2 += (t2<t1)?1:0; \
|
||
|
- t1 = t0+t0; t2 += (t1<t0)?1:0; \
|
||
|
- c0 += t1; t2 += (c0<t1)?1:0; \
|
||
|
+ c0 += t0; t2 = t1+((c0<t0)?1:0);\
|
||
|
c1 += t2; c2 += (c1<t2)?1:0; \
|
||
|
+ c0 += t0; t1 += (c0<t0)?1:0; \
|
||
|
+ c1 += t1; c2 += (c1<t1)?1:0; \
|
||
|
}
|
||
|
#else
|
||
|
#define mul_add_c(a,b,c0,c1,c2) do { \
|
||
|
@@ -328,22 +332,14 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
|
||
|
: "=a"(t1),"=d"(t2) \
|
||
|
: "a"(a),"m"(b) \
|
||
|
: "cc"); \
|
||
|
- asm ("addq %0,%0; adcq %2,%1" \
|
||
|
- : "+d"(t2),"+r"(c2) \
|
||
|
- : "g"(0) \
|
||
|
- : "cc"); \
|
||
|
- asm ("addq %0,%0; adcq %2,%1" \
|
||
|
- : "+a"(t1),"+d"(t2) \
|
||
|
- : "g"(0) \
|
||
|
- : "cc"); \
|
||
|
- asm ("addq %2,%0; adcq %3,%1" \
|
||
|
- : "+r"(c0),"+d"(t2) \
|
||
|
- : "a"(t1),"g"(0) \
|
||
|
- : "cc"); \
|
||
|
- asm ("addq %2,%0; adcq %3,%1" \
|
||
|
- : "+r"(c1),"+r"(c2) \
|
||
|
- : "d"(t2),"g"(0) \
|
||
|
- : "cc"); \
|
||
|
+ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
|
||
|
+ : "+r"(c0),"+r"(c1),"+r"(c2) \
|
||
|
+ : "r"(t1),"r"(t2),"g"(0) \
|
||
|
+ : "cc"); \
|
||
|
+ asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
|
||
|
+ : "+r"(c0),"+r"(c1),"+r"(c2) \
|
||
|
+ : "r"(t1),"r"(t2),"g"(0) \
|
||
|
+ : "cc"); \
|
||
|
} while (0)
|
||
|
#endif
|
||
|
|
||
|
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
|
||
|
index c43c91c..a33b634 100644
|
||
|
--- a/crypto/bn/bn_asm.c
|
||
|
+++ b/crypto/bn/bn_asm.c
|
||
|
@@ -438,6 +438,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
||
|
/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
|
||
|
/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
|
||
|
|
||
|
+/*
|
||
|
+ * Keep in mind that carrying into high part of multiplication result
|
||
|
+ * can not overflow, because it cannot be all-ones.
|
||
|
+ */
|
||
|
#ifdef BN_LLONG
|
||
|
#define mul_add_c(a,b,c0,c1,c2) \
|
||
|
t=(BN_ULLONG)a*b; \
|
||
|
@@ -478,10 +482,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
||
|
#define mul_add_c2(a,b,c0,c1,c2) { \
|
||
|
BN_ULONG ta=(a),tb=(b),t0; \
|
||
|
BN_UMULT_LOHI(t0,t1,ta,tb); \
|
||
|
- t2 = t1+t1; c2 += (t2<t1)?1:0; \
|
||
|
- t1 = t0+t0; t2 += (t1<t0)?1:0; \
|
||
|
- c0 += t1; t2 += (c0<t1)?1:0; \
|
||
|
+ c0 += t0; t2 = t1+((c0<t0)?1:0);\
|
||
|
c1 += t2; c2 += (c1<t2)?1:0; \
|
||
|
+ c0 += t0; t1 += (c0<t0)?1:0; \
|
||
|
+ c1 += t1; c2 += (c1<t1)?1:0; \
|
||
|
}
|
||
|
|
||
|
#define sqr_add_c(a,i,c0,c1,c2) { \
|
||
|
@@ -508,10 +512,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
|
||
|
BN_ULONG ta=(a),tb=(b),t0; \
|
||
|
t1 = BN_UMULT_HIGH(ta,tb); \
|
||
|
t0 = ta * tb; \
|
||
|
- t2 = t1+t1; c2 += (t2<t1)?1:0; \
|
||
|
- t1 = t0+t0; t2 += (t1<t0)?1:0; \
|
||
|
- c0 += t1; t2 += (c0<t1)?1:0; \
|
||
|
+ c0 += t0; t2 = t1+((c0<t0)?1:0);\
|
||
|
c1 += t2; c2 += (c1<t2)?1:0; \
|
||
|
+ c0 += t0; t1 += (c0<t0)?1:0; \
|
||
|
+ c1 += t1; c2 += (c1<t1)?1:0; \
|
||
|
}
|
||
|
|
||
|
#define sqr_add_c(a,i,c0,c1,c2) { \
|
||
|
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
|
||
|
index 7771e92..48bc633 100644
|
||
|
--- a/crypto/bn/bntest.c
|
||
|
+++ b/crypto/bn/bntest.c
|
||
|
@@ -678,44 +678,98 @@ int test_mul(BIO *bp)
|
||
|
|
||
|
int test_sqr(BIO *bp, BN_CTX *ctx)
|
||
|
{
|
||
|
- BIGNUM a,c,d,e;
|
||
|
- int i;
|
||
|
+ BIGNUM *a,*c,*d,*e;
|
||
|
+ int i, ret = 0;
|
||
|
|
||
|
- BN_init(&a);
|
||
|
- BN_init(&c);
|
||
|
- BN_init(&d);
|
||
|
- BN_init(&e);
|
||
|
+ a = BN_new();
|
||
|
+ c = BN_new();
|
||
|
+ d = BN_new();
|
||
|
+ e = BN_new();
|
||
|
+ if (a == NULL || c == NULL || d == NULL || e == NULL)
|
||
|
+ {
|
||
|
+ goto err;
|
||
|
+ }
|
||
|
|
||
|
for (i=0; i<num0; i++)
|
||
|
{
|
||
|
- BN_bntest_rand(&a,40+i*10,0,0);
|
||
|
- a.neg=rand_neg();
|
||
|
- BN_sqr(&c,&a,ctx);
|
||
|
+ BN_bntest_rand(a,40+i*10,0,0);
|
||
|
+ a->neg=rand_neg();
|
||
|
+ BN_sqr(c,a,ctx);
|
||
|
if (bp != NULL)
|
||
|
{
|
||
|
if (!results)
|
||
|
{
|
||
|
- BN_print(bp,&a);
|
||
|
+ BN_print(bp,a);
|
||
|
BIO_puts(bp," * ");
|
||
|
- BN_print(bp,&a);
|
||
|
+ BN_print(bp,a);
|
||
|
BIO_puts(bp," - ");
|
||
|
}
|
||
|
- BN_print(bp,&c);
|
||
|
+ BN_print(bp,c);
|
||
|
BIO_puts(bp,"\n");
|
||
|
}
|
||
|
- BN_div(&d,&e,&c,&a,ctx);
|
||
|
- BN_sub(&d,&d,&a);
|
||
|
- if(!BN_is_zero(&d) || !BN_is_zero(&e))
|
||
|
- {
|
||
|
- fprintf(stderr,"Square test failed!\n");
|
||
|
- return 0;
|
||
|
- }
|
||
|
+ BN_div(d,e,c,a,ctx);
|
||
|
+ BN_sub(d,d,a);
|
||
|
+ if(!BN_is_zero(d) || !BN_is_zero(e))
|
||
|
+ {
|
||
|
+ fprintf(stderr,"Square test failed!\n");
|
||
|
+ goto err;
|
||
|
+ }
|
||
|
}
|
||
|
- BN_free(&a);
|
||
|
- BN_free(&c);
|
||
|
- BN_free(&d);
|
||
|
- BN_free(&e);
|
||
|
- return(1);
|
||
|
+
|
||
|
+ /* Regression test for a BN_sqr overflow bug. */
|
||
|
+ BN_hex2bn(&a,
|
||
|
+ "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000");
|
||
|
+ BN_sqr(c, a, ctx);
|
||
|
+ if (bp != NULL)
|
||
|
+ {
|
||
|
+ if (!results)
|
||
|
+ {
|
||
|
+ BN_print(bp,a);
|
||
|
+ BIO_puts(bp," * ");
|
||
|
+ BN_print(bp,a);
|
||
|
+ BIO_puts(bp," - ");
|
||
|
+ }
|
||
|
+ BN_print(bp,c);
|
||
|
+ BIO_puts(bp,"\n");
|
||
|
+ }
|
||
|
+ BN_mul(d, a, a, ctx);
|
||
|
+ if (BN_cmp(c, d))
|
||
|
+ {
|
||
|
+ fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
|
||
|
+ "different results!\n");
|
||
|
+ goto err;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Regression test for a BN_sqr overflow bug. */
|
||
|
+ BN_hex2bn(&a,
|
||
|
+ "80000000000000000000000080000001FFFFFFFE000000000000000000000000");
|
||
|
+ BN_sqr(c, a, ctx);
|
||
|
+ if (bp != NULL)
|
||
|
+ {
|
||
|
+ if (!results)
|
||
|
+ {
|
||
|
+ BN_print(bp,a);
|
||
|
+ BIO_puts(bp," * ");
|
||
|
+ BN_print(bp,a);
|
||
|
+ BIO_puts(bp," - ");
|
||
|
+ }
|
||
|
+ BN_print(bp,c);
|
||
|
+ BIO_puts(bp,"\n");
|
||
|
+ }
|
||
|
+ BN_mul(d, a, a, ctx);
|
||
|
+ if (BN_cmp(c, d))
|
||
|
+ {
|
||
|
+ fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce "
|
||
|
+ "different results!\n");
|
||
|
+ goto err;
|
||
|
+ }
|
||
|
+ ret = 1;
|
||
|
+err:
|
||
|
+ if (a != NULL) BN_free(a);
|
||
|
+ if (c != NULL) BN_free(c);
|
||
|
+ if (d != NULL) BN_free(d);
|
||
|
+ if (e != NULL) BN_free(e);
|
||
|
+ return ret;
|
||
|
}
|
||
|
|
||
|
int test_mont(BIO *bp, BN_CTX *ctx)
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|