You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
949 lines
30 KiB
949 lines
30 KiB
diff --git a/lib/freebl/mpi/mpi-priv.h b/lib/freebl/mpi/mpi-priv.h |
|
--- a/lib/freebl/mpi/mpi-priv.h |
|
+++ b/lib/freebl/mpi/mpi-priv.h |
|
@@ -199,16 +199,19 @@ void MPI_ASM_DECL s_mpv_mul_d(const mp_d |
|
void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, |
|
mp_digit b, mp_digit *c); |
|
|
|
#endif |
|
|
|
void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, |
|
mp_size a_len, mp_digit b, |
|
mp_digit *c); |
|
+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a, |
|
+ mp_size a_len, mp_digit b, |
|
+ mp_digit *c, mp_size c_len); |
|
void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a, |
|
mp_size a_len, |
|
mp_digit *sqrs); |
|
|
|
mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, |
|
mp_digit divisor, mp_digit *quot, mp_digit *rem); |
|
|
|
/* c += a * b * (MP_RADIX ** offset); */ |
|
diff --git a/lib/freebl/mpi/mpi.c b/lib/freebl/mpi/mpi.c |
|
--- a/lib/freebl/mpi/mpi.c |
|
+++ b/lib/freebl/mpi/mpi.c |
|
@@ -5,16 +5,18 @@ |
|
* |
|
* This Source Code Form is subject to the terms of the Mozilla Public |
|
* License, v. 2.0. If a copy of the MPL was not distributed with this |
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
|
|
#include "mpi-priv.h" |
|
#include "mplogic.h" |
|
|
|
+#include <assert.h> |
|
+ |
|
#if defined(__arm__) && \ |
|
((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__)) |
|
/* 16-bit thumb or ARM v3 doesn't work inlined assember version */ |
|
#undef MP_ASSEMBLY_MULTIPLY |
|
#undef MP_ASSEMBLY_SQUARE |
|
#endif |
|
|
|
#if MP_LOGTAB |
|
@@ -797,25 +799,28 @@ mp_sub(const mp_int *a, const mp_int *b, |
|
|
|
CLEANUP: |
|
return res; |
|
|
|
} /* end mp_sub() */ |
|
|
|
/* }}} */ |
|
|
|
-/* {{{ mp_mul(a, b, c) */ |
|
+/* {{{ s_mp_mulg(a, b, c) */ |
|
|
|
/* |
|
- mp_mul(a, b, c) |
|
- |
|
- Compute c = a * b. All parameters may be identical. |
|
+ s_mp_mulg(a, b, c) |
|
+ |
|
+ Compute c = a * b. All parameters may be identical. if constantTime is set, |
|
+ then the operations are done in constant time. The original is mostly |
|
+ constant time as long as s_mpv_mul_d_add() is constant time. This is true |
|
+ of the x86 assembler, as well as the current c code. |
|
*/ |
|
mp_err |
|
-mp_mul(const mp_int *a, const mp_int *b, mp_int *c) |
|
+s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime) |
|
{ |
|
mp_digit *pb; |
|
mp_int tmp; |
|
mp_err res; |
|
mp_size ib; |
|
mp_size useda, usedb; |
|
|
|
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); |
|
@@ -841,17 +846,24 @@ mp_mul(const mp_int *a, const mp_int *b, |
|
} |
|
|
|
MP_USED(c) = 1; |
|
MP_DIGIT(c, 0) = 0; |
|
if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY) |
|
goto CLEANUP; |
|
|
|
#ifdef NSS_USE_COMBA |
|
- if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { |
|
+ /* comba isn't constant time because it clamps! If we cared |
|
+ * (we needed a constant time version of multiply that was 'faster' |
|
+ * we could easily pass constantTime down to the comba code and |
|
+ * get it to skip the clamp... but here are assembler versions |
|
+ * which add comba to platforms that can't compile the normal |
|
+ * comba's imbedded assembler which would also need to change, so |
|
+ * for now we just skip comba when we are running constant time. */ |
|
+ if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { |
|
if (MP_USED(a) == 4) { |
|
s_mp_mul_comba_4(a, b, c); |
|
goto CLEANUP; |
|
} |
|
if (MP_USED(a) == 8) { |
|
s_mp_mul_comba_8(a, b, c); |
|
goto CLEANUP; |
|
} |
|
@@ -871,36 +883,82 @@ mp_mul(const mp_int *a, const mp_int *b, |
|
|
|
/* Outer loop: Digits of b */ |
|
useda = MP_USED(a); |
|
usedb = MP_USED(b); |
|
for (ib = 1; ib < usedb; ib++) { |
|
mp_digit b_i = *pb++; |
|
|
|
/* Inner product: Digits of a */ |
|
- if (b_i) |
|
+ if (constantTime || b_i) |
|
s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); |
|
else |
|
MP_DIGIT(c, ib + useda) = b_i; |
|
} |
|
|
|
- s_mp_clamp(c); |
|
+ if (!constantTime) { |
|
+ s_mp_clamp(c); |
|
+ } |
|
|
|
if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ) |
|
SIGN(c) = ZPOS; |
|
else |
|
SIGN(c) = NEG; |
|
|
|
CLEANUP: |
|
mp_clear(&tmp); |
|
return res; |
|
+} /* end smp_mulg() */ |
|
+ |
|
+/* }}} */ |
|
+ |
|
+/* {{{ mp_mul(a, b, c) */ |
|
+ |
|
+/* |
|
+ mp_mul(a, b, c) |
|
+ |
|
+ Compute c = a * b. All parameters may be identical. |
|
+ */ |
|
+ |
|
+mp_err |
|
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c) |
|
+{ |
|
+ return s_mp_mulg(a, b, c, 0); |
|
} /* end mp_mul() */ |
|
|
|
/* }}} */ |
|
|
|
+/* {{{ mp_mulCT(a, b, c) */ |
|
+ |
|
+/* |
|
+ mp_mulCT(a, b, c) |
|
+ |
|
+ Compute c = a * b. In constant time. Parameters may not be identical. |
|
+ NOTE: a and b may be modified. |
|
+ */ |
|
+ |
|
+mp_err |
|
+mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize) |
|
+{ |
|
+ mp_err res; |
|
+ |
|
+ /* make the multiply values fixed length so multiply |
|
+ * doesn't leak the length. at this point all the |
|
+ * values are blinded, but once we finish we want the |
|
+ * output size to be hidden (so no clamping the out put) */ |
|
+ MP_CHECKOK(s_mp_pad(a, setSize)); |
|
+ MP_CHECKOK(s_mp_pad(b, setSize)); |
|
+ MP_CHECKOK(s_mp_pad(c, 2*setSize)); |
|
+ MP_CHECKOK(s_mp_mulg(a, b, c, 1)); |
|
+CLEANUP: |
|
+ return res; |
|
+} /* end mp_mulCT() */ |
|
+ |
|
+/* }}} */ |
|
+ |
|
/* {{{ mp_sqr(a, sqr) */ |
|
|
|
#if MP_SQUARE |
|
/* |
|
Computes the square of a. This can be done more |
|
efficiently than a general multiplication, because many of the |
|
computation steps are redundant when squaring. The inner product |
|
step is a bit more complicated, but we save a fair number of |
|
@@ -1263,16 +1321,174 @@ mp_mod(const mp_int *a, const mp_int *m, |
|
} |
|
|
|
return MP_OKAY; |
|
|
|
} /* end mp_mod() */ |
|
|
|
/* }}} */ |
|
|
|
+/* {{{ s_mp_subCT_d(a, b, borrow, c) */ |
|
+ |
|
+/* |
|
+ s_mp_subCT_d(a, b, borrow, c) |
|
+ |
|
+ Compute c = (a -b) - subtract in constant time. returns borrow |
|
+ */ |
|
+mp_digit |
|
+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret) { |
|
+ mp_digit borrow1, borrow2, t; |
|
+#ifdef MP_COMPILER_USES_CARRY |
|
+ /* while it doesn't look constant-time, this is idiomatic code |
|
+ * to tell compilers to use the carry bit from subtraction */ |
|
+ t = a - borrow; |
|
+ if (t > a) { |
|
+ borrow1 = 1; |
|
+ } else { |
|
+ borrow1 = 0; |
|
+ } |
|
+ *ret = t - b; |
|
+ if (*ret > t) { |
|
+ borrow2 = 1; |
|
+ } else { |
|
+ borrow2 = 0; |
|
+ } |
|
+#else |
|
+ mp_digit bitr, bitb, nbitt; |
|
+ /* this is constant time independent of compilier */ |
|
+ t = a - borrow; |
|
+ borrow1 = ((~a) >> (MP_DIGIT_BIT-1)) & ((t) >> (MP_DIGIT_BIT-1)); |
|
+ *ret = t - b; |
|
+ bitb = b >> (MP_DIGIT_BIT-1); |
|
+ bitr = *ret >> (MP_DIGIT_BIT-1); |
|
+ nbitt = (~t) >> (MP_DIGIT_BIT-1); |
|
+ borrow2 = (nbitt & bitb) | (bitb & bitr) | (nbitt & bitr); |
|
+#endif |
|
+ /* only borrow 1 or borrow 2 should be 1, we want to guarrentee |
|
+ * the overall borrow is 1, so use | here */ |
|
+ return borrow1 | borrow2; |
|
+} /* s_mp_subCT_d() */ |
|
+ |
|
+/* }}} */ |
|
+ |
|
+/* {{{ mp_subCT(a, b, ret, borrow) */ |
|
+ |
|
+/* return ret= a - b and borrow in borrow. done in constant time. |
|
+ * b could be modified. |
|
+ */ |
|
+mp_err |
|
+mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow) |
|
+{ |
|
+ mp_size used_a = MP_USED(a); |
|
+ mp_size i; |
|
+ mp_err res; |
|
+ |
|
+ MP_CHECKOK(s_mp_pad(b, used_a)); |
|
+ MP_CHECKOK(s_mp_pad(ret, used_a)); |
|
+ *borrow = 0; |
|
+ for (i=0; i < used_a; i++) { |
|
+ *borrow = s_mp_subCT_d(MP_DIGIT(a,i), MP_DIGIT(b,i), *borrow, |
|
+ &MP_DIGIT(ret,i)); |
|
+ } |
|
+ |
|
+ res = MP_OKAY; |
|
+CLEANUP: |
|
+ return res; |
|
+} /* end mp_subCT() */ |
|
+ |
|
+/* }}} */ |
|
+ |
|
+/* {{{ mp_selectCT(cond, a, b, ret) */ |
|
+ |
|
+/* |
|
+ * return ret= cond ? a : b; cond should be either 0 or 1 |
|
+ */ |
|
+mp_err |
|
+mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret) |
|
+{ |
|
+ mp_size used_a = MP_USED(a); |
|
+ mp_err res; |
|
+ mp_size i; |
|
+ |
|
+ cond *= MP_DIGIT_MAX; |
|
+ |
|
+ /* we currently require these to be equal on input, |
|
+ * we could use pad to extend one of them, but that might |
|
+ * leak data as it wouldn't be constant time */ |
|
+ assert(used_a == MP_USED(b)); |
|
+ |
|
+ MP_CHECKOK(s_mp_pad(ret, used_a)); |
|
+ for (i=0; i < used_a; i++) { |
|
+ MP_DIGIT(ret,i) = (MP_DIGIT(a,i)&cond) | (MP_DIGIT(b,i)&~cond); |
|
+ } |
|
+ res = MP_OKAY; |
|
+CLEANUP: |
|
+ return res; |
|
+} /* end mp_selectCT() */ |
|
+ |
|
+ |
|
+/* {{{ mp_reduceCT(a, m, c) */ |
|
+ |
|
+/* |
|
+ mp_reduceCT(a, m, c) |
|
+ |
|
+ Compute c = aR^-1 (mod m) in constant time. |
|
+ input should be in montgomery form. If input is the |
|
+ result of a montgomery multiply then out put will be |
|
+ in mongomery form. |
|
+ Result will be reduced to MP_USED(m), but not be |
|
+ clamped. |
|
+ */ |
|
+ |
|
+mp_err |
|
+mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c) |
|
+{ |
|
+ mp_size used_m = MP_USED(m); |
|
+ mp_size used_c = used_m*2+1; |
|
+ mp_digit *m_digits, *c_digits; |
|
+ mp_size i; |
|
+ mp_digit borrow, carry; |
|
+ mp_err res; |
|
+ mp_int sub; |
|
+ |
|
+ MP_DIGITS(&sub) = 0; |
|
+ MP_CHECKOK(mp_init_size(&sub,used_m)); |
|
+ |
|
+ if (a != c) { |
|
+ MP_CHECKOK(mp_copy(a, c)); |
|
+ } |
|
+ MP_CHECKOK(s_mp_pad(c, used_c)); |
|
+ m_digits = MP_DIGITS(m); |
|
+ c_digits = MP_DIGITS(c); |
|
+ for (i=0; i < used_m; i++) { |
|
+ mp_digit m_i = MP_DIGIT(c,i)*n0i; |
|
+ s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--); |
|
+ } |
|
+ s_mp_rshd(c, used_m); |
|
+ /* MP_USED(c) should be used_m+1 with the high word being any carry |
|
+ * from the previous multiply, save that carry and drop the high |
|
+ * word for the substraction below */ |
|
+ carry = MP_DIGIT(c,used_m); |
|
+ MP_DIGIT(c,used_m) = 0; |
|
+ MP_USED(c) = used_m; |
|
+ /* mp_subCT wants c and m to be the same size, we've already |
|
+ * guarrenteed that in the previous statement, so mp_subCT won't actually |
|
+ * modify m, so it's safe to recast */ |
|
+ MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow)); |
|
+ |
|
+ /* we return c-m if c >= m no borrow or there was a borrow and a carry */ |
|
+ MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c)); |
|
+ res = MP_OKAY; |
|
+CLEANUP: |
|
+ mp_clear(&sub); |
|
+ return res; |
|
+} /* end mp_reduceCT() */ |
|
+ |
|
+/* }}} */ |
|
+ |
|
/* {{{ mp_mod_d(a, d, c) */ |
|
|
|
/* |
|
mp_mod_d(a, d, c) |
|
|
|
Compute c = a (mod d). Result will always be 0 <= c < d |
|
*/ |
|
mp_err |
|
@@ -1379,16 +1595,47 @@ mp_mulmod(const mp_int *a, const mp_int |
|
if ((res = mp_mod(c, m, c)) != MP_OKAY) |
|
return res; |
|
|
|
return MP_OKAY; |
|
} |
|
|
|
/* }}} */ |
|
|
|
+/* {{{ mp_mulmontmodCT(a, b, m, c) */ |
|
+ |
|
+/* |
|
+ mp_mulmontmodCT(a, b, m, c) |
|
+ |
|
+ Compute c = (a * b) mod m in constant time wrt a and b. either a or b |
|
+ should be in montgomery form and the output is native. If both a and b |
|
+ are in montgomery form, then the output will also be in montgomery form |
|
+ and can be recovered with an mp_reduceCT call. |
|
+ NOTE: a and b may be modified. |
|
+ */ |
|
+ |
|
+mp_err |
|
+mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, |
|
+ mp_int *c) |
|
+{ |
|
+ mp_err res; |
|
+ |
|
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); |
|
+ |
|
+ if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY) |
|
+ return res; |
|
+ |
|
+ if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY) |
|
+ return res; |
|
+ |
|
+ return MP_OKAY; |
|
+} |
|
+ |
|
+/* }}} */ |
|
+ |
|
/* {{{ mp_sqrmod(a, m, c) */ |
|
|
|
#if MP_SQUARE |
|
mp_err |
|
mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c) |
|
{ |
|
mp_err res; |
|
|
|
@@ -3936,25 +4183,73 @@ s_mp_mul(mp_int *a, const mp_int *b) |
|
{ \ |
|
mp_digit a0b1, a1b0; \ |
|
Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \ |
|
Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \ |
|
a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \ |
|
a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \ |
|
a1b0 += a0b1; \ |
|
Phi += a1b0 >> MP_HALF_DIGIT_BIT; \ |
|
- if (a1b0 < a0b1) \ |
|
- Phi += MP_HALF_RADIX; \ |
|
+ Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \ |
|
a1b0 <<= MP_HALF_DIGIT_BIT; \ |
|
Plo += a1b0; \ |
|
- if (Plo < a1b0) \ |
|
- ++Phi; \ |
|
+ Phi += MP_CT_LTU(Plo, a1b0); \ |
|
} |
|
#endif |
|
|
|
+/* Constant time version of s_mpv_mul_d_add_prop. |
|
+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */ |
|
+/* c += a * b */ |
|
+void |
|
+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b, |
|
+ mp_digit *c, mp_size c_len) |
|
+{ |
|
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) |
|
+ mp_digit d = 0; |
|
+ |
|
+ c_len -= a_len; |
|
+ /* Inner product: Digits of a */ |
|
+ while (a_len--) { |
|
+ mp_word w = ((mp_word)b * *a++) + *c + d; |
|
+ *c++ = ACCUM(w); |
|
+ d = CARRYOUT(w); |
|
+ } |
|
+ |
|
+ /* propagate the carry to the end, even if carry is zero */ |
|
+ while (c_len--) { |
|
+ mp_word w = (mp_word)*c + d; |
|
+ *c++ = ACCUM(w); |
|
+ d = CARRYOUT(w); |
|
+ } |
|
+#else |
|
+ mp_digit carry = 0; |
|
+ c_len -= a_len; |
|
+ while (a_len--) { |
|
+ mp_digit a_i = *a++; |
|
+ mp_digit a0b0, a1b1; |
|
+ MP_MUL_DxD(a_i, b, a1b1, a0b0); |
|
+ |
|
+ a0b0 += carry; |
|
+ a1b1 += MP_CT_LTU(a0b0, carry); |
|
+ a0b0 += a_i = *c; |
|
+ a1b1 += MP_CT_LTU(a0b0, a_i); |
|
+ |
|
+ *c++ = a0b0; |
|
+ carry = a1b1; |
|
+ } |
|
+ /* propagate the carry to the end, even if carry is zero */ |
|
+ while (c_len--) { |
|
+ mp_digit c_i = *c; |
|
+ carry += c_i; |
|
+ *c++ = carry; |
|
+ carry = MP_CT_LTU(carry, c_i); |
|
+ } |
|
+#endif |
|
+} |
|
+ |
|
#if !defined(MP_ASSEMBLY_MULTIPLY) |
|
/* c = a * b */ |
|
void |
|
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) |
|
{ |
|
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) |
|
mp_digit d = 0; |
|
|
|
@@ -3969,18 +4264,17 @@ s_mpv_mul_d(const mp_digit *a, mp_size a |
|
mp_digit carry = 0; |
|
while (a_len--) { |
|
mp_digit a_i = *a++; |
|
mp_digit a0b0, a1b1; |
|
|
|
MP_MUL_DxD(a_i, b, a1b1, a0b0); |
|
|
|
a0b0 += carry; |
|
- if (a0b0 < carry) |
|
- ++a1b1; |
|
+ a1b1 += a0b0 < carry; |
|
*c++ = a0b0; |
|
carry = a1b1; |
|
} |
|
*c = carry; |
|
#endif |
|
} |
|
|
|
/* c += a * b */ |
|
@@ -4002,21 +4296,19 @@ s_mpv_mul_d_add(const mp_digit *a, mp_si |
|
mp_digit carry = 0; |
|
while (a_len--) { |
|
mp_digit a_i = *a++; |
|
mp_digit a0b0, a1b1; |
|
|
|
MP_MUL_DxD(a_i, b, a1b1, a0b0); |
|
|
|
a0b0 += carry; |
|
- if (a0b0 < carry) |
|
- ++a1b1; |
|
+ a1b1 += (a0b0 < carry); |
|
a0b0 += a_i = *c; |
|
- if (a0b0 < a_i) |
|
- ++a1b1; |
|
+ a1b1 += (a0b0 < a_i); |
|
*c++ = a0b0; |
|
carry = a1b1; |
|
} |
|
*c = carry; |
|
#endif |
|
} |
|
|
|
/* Presently, this is only used by the Montgomery arithmetic code. */ |
|
diff --git a/lib/freebl/mpi/mpi.h b/lib/freebl/mpi/mpi.h |
|
--- a/lib/freebl/mpi/mpi.h |
|
+++ b/lib/freebl/mpi/mpi.h |
|
@@ -145,16 +145,54 @@ typedef int mp_sword; |
|
#define MP_USED(MP) ((MP)->used) |
|
#define MP_ALLOC(MP) ((MP)->alloc) |
|
#define MP_DIGITS(MP) ((MP)->dp) |
|
#define MP_DIGIT(MP, N) (MP)->dp[(N)] |
|
|
|
/* This defines the maximum I/O base (minimum is 2) */ |
|
#define MP_MAX_RADIX 64 |
|
|
|
+/* Constant Time Macros on mp_digits */ |
|
+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1))) |
|
+ |
|
+/* basic zero and non zero tests */ |
|
+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x))))) |
|
+#define MP_CT_ZERO(x) (~MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x))))) |
|
+ |
|
+ |
|
+/* basic constant-time helper macro for equalities and inequalities. |
|
+ * The inequalities will produce incorrect results if |
|
+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay |
|
+ * within the range 0-MP_DIGIT_MAX/2. */ |
|
+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) - (b))) |
|
+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) - (b))) |
|
+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a)) |
|
+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b)) |
|
+#define MP_CT_GE(a, b) (1^MP_CT_LT(a, b)) |
|
+#define MP_CT_LE(a, b) (1^MP_CT_GT(a, b)) |
|
+#define MP_CT_TRUE ((mp_digit)1) |
|
+#define MP_CT_FALSE ((mp_digit)0) |
|
+ |
|
+/* use constant time result to select a boolean value */ |
|
+#define MP_CT_SELB(m, l, r) (((m) & (l)) | (~(m) & (r))) |
|
+ |
|
+/* full inequalities that work with full mp_digit values */ |
|
+#define MP_CT_OVERFLOW(a,b,c,d) \ |
|
+ MP_CT_SELB(MP_CT_HIGH_TO_LOW((a)^(b)), \ |
|
+ (MP_CT_HIGH_TO_LOW(d)),c) |
|
+#define MP_CT_GTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),a) |
|
+#define MP_CT_LTU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),b) |
|
+#define MP_CT_GEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),a) |
|
+#define MP_CT_LEU(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),b) |
|
+#define MP_CT_GTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GT(a,b),b) |
|
+#define MP_CT_LTS(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LT(a,b),a) |
|
+#define MP_CT_GES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_GE(a,b),b) |
|
+#define MP_CT_LES(a,b) MP_CT_OVERFLOW(a,b,MP_CT_LE(a,b),a) |
|
+ |
|
+ |
|
typedef struct { |
|
mp_sign sign; /* sign of this quantity */ |
|
mp_size alloc; /* how many digits allocated */ |
|
mp_size used; /* how many digits used */ |
|
mp_digit *dp; /* the digits themselves */ |
|
} mp_int; |
|
|
|
/* Default precision */ |
|
@@ -185,17 +223,19 @@ mp_err mp_expt_d(const mp_int *a, mp_dig |
|
|
|
/* Sign manipulations */ |
|
mp_err mp_abs(const mp_int *a, mp_int *b); |
|
mp_err mp_neg(const mp_int *a, mp_int *b); |
|
|
|
/* Full arithmetic */ |
|
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c); |
|
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c); |
|
+mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow); |
|
mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c); |
|
+mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize); |
|
#if MP_SQUARE |
|
mp_err mp_sqr(const mp_int *a, mp_int *b); |
|
#else |
|
#define mp_sqr(a, b) mp_mul(a, a, b) |
|
#endif |
|
mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r); |
|
mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r); |
|
mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c); |
|
@@ -212,23 +252,30 @@ mp_err mp_mulmod(const mp_int *a, const |
|
mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c); |
|
#else |
|
#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c) |
|
#endif |
|
mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); |
|
mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c); |
|
#endif /* MP_MODARITH */ |
|
|
|
+/* montgomery math */ |
|
+mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont); |
|
+mp_digit mp_calculate_mont_n0i(const mp_int *N); |
|
+mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct); |
|
+mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c); |
|
+ |
|
/* Comparisons */ |
|
int mp_cmp_z(const mp_int *a); |
|
int mp_cmp_d(const mp_int *a, mp_digit d); |
|
int mp_cmp(const mp_int *a, const mp_int *b); |
|
int mp_cmp_mag(const mp_int *a, const mp_int *b); |
|
int mp_isodd(const mp_int *a); |
|
int mp_iseven(const mp_int *a); |
|
+mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret); |
|
|
|
/* Number theoretic */ |
|
mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c); |
|
mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c); |
|
mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y); |
|
mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c); |
|
mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c); |
|
|
|
diff --git a/lib/freebl/mpi/mpmontg.c b/lib/freebl/mpi/mpmontg.c |
|
--- a/lib/freebl/mpi/mpmontg.c |
|
+++ b/lib/freebl/mpi/mpmontg.c |
|
@@ -124,30 +124,37 @@ s_mp_mul_mont(const mp_int *a, const mp_ |
|
} |
|
res = MP_OKAY; |
|
|
|
CLEANUP: |
|
return res; |
|
} |
|
#endif |
|
|
|
-STATIC |
|
mp_err |
|
-s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont) |
|
+mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont) |
|
{ |
|
mp_err res; |
|
|
|
/* xMont = x * R mod N where N is modulus */ |
|
- MP_CHECKOK(mp_copy(x, xMont)); |
|
- MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */ |
|
- MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */ |
|
+ if (x != xMont) { |
|
+ MP_CHECKOK(mp_copy(x, xMont)); |
|
+ } |
|
+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */ |
|
+ MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */ |
|
CLEANUP: |
|
return res; |
|
} |
|
|
|
+mp_digit |
|
+mp_calculate_mont_n0i(const mp_int *N) |
|
+{ |
|
+ return 0 - s_mp_invmod_radix(MP_DIGIT(N,0)); |
|
+} |
|
+ |
|
#ifdef MP_USING_MONT_MULF |
|
|
|
/* the floating point multiply is already cache safe, |
|
* don't turn on cache safe unless we specifically |
|
* force it */ |
|
#ifndef MP_FORCE_CACHE_SAFE |
|
#undef MP_USING_CACHE_SAFE_MOD_EXP |
|
#endif |
|
@@ -193,17 +200,17 @@ mp_exptmod_f(const mp_int *montBase, |
|
MP_DIGITS(&accum1) = 0; |
|
|
|
for (i = 0; i < MAX_ODD_INTS; ++i) |
|
oddPowers[i] = 0; |
|
|
|
MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); |
|
|
|
mp_set(&accum1, 1); |
|
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); |
|
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); |
|
MP_CHECKOK(s_mp_pad(&accum1, nLen)); |
|
|
|
oddPowSize = 2 * nLen + 1; |
|
dTmpSize = 2 * oddPowSize; |
|
dSize = sizeof(double) * (nLen * 4 + 1 + |
|
((odd_ints + 1) * oddPowSize) + dTmpSize); |
|
dBuf = malloc(dSize); |
|
if (!dBuf) { |
|
@@ -473,17 +480,17 @@ mp_exptmod_i(const mp_int *montBase, |
|
for (i = 1; i < odd_ints; ++i) { |
|
MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2)); |
|
MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i)); |
|
MP_CHECKOK(s_mp_redc(oddPowers + i, mmm)); |
|
} |
|
|
|
/* set accumulator to montgomery residue of 1 */ |
|
mp_set(&accum1, 1); |
|
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); |
|
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); |
|
pa1 = &accum1; |
|
pa2 = &accum2; |
|
|
|
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { |
|
mp_size smallExp; |
|
MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); |
|
smallExp = (mp_size)res; |
|
|
|
@@ -862,17 +869,17 @@ mp_exptmod_safe_i(const mp_int *montBase |
|
/* build the first WEAVE_WORD powers inline */ |
|
/* if WEAVE_WORD_SIZE is not 4, this code will have to change */ |
|
if (num_powers > 2) { |
|
MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2)); |
|
MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2)); |
|
MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2)); |
|
MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2)); |
|
mp_set(&accum[0], 1); |
|
- MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0])); |
|
+ MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0])); |
|
MP_CHECKOK(mp_copy(montBase, &accum[1])); |
|
SQR(montBase, &accum[2]); |
|
MUL_NOWEAVE(montBase, &accum[2], &accum[3]); |
|
powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1)); |
|
if (!powersArray) { |
|
res = MP_MEM; |
|
goto CLEANUP; |
|
} |
|
@@ -881,17 +888,17 @@ mp_exptmod_safe_i(const mp_int *montBase |
|
MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers)); |
|
if (first_window < 4) { |
|
MP_CHECKOK(mp_copy(&accum[first_window], &accum1)); |
|
first_window = num_powers; |
|
} |
|
} else { |
|
if (first_window == 0) { |
|
mp_set(&accum1, 1); |
|
- MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); |
|
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); |
|
} else { |
|
/* assert first_window == 1? */ |
|
MP_CHECKOK(mp_copy(montBase, &accum1)); |
|
} |
|
} |
|
|
|
/* |
|
* calculate all the powers in the powers array. |
|
@@ -1054,19 +1061,19 @@ mp_exptmod(const mp_int *inBase, const m |
|
nLen = MP_USED(modulus); |
|
MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2)); |
|
|
|
mmm.N = *modulus; /* a copy of the mp_int struct */ |
|
|
|
/* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX |
|
** where n0 = least significant mp_digit of N, the modulus. |
|
*/ |
|
- mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0)); |
|
+ mmm.n0prime = mp_calculate_mont_n0i(modulus); |
|
|
|
- MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase)); |
|
+ MP_CHECKOK(mp_to_mont(base, modulus, &montBase)); |
|
|
|
bits_in_exponent = mpl_significant_bits(exponent); |
|
#ifdef MP_USING_CACHE_SAFE_MOD_EXP |
|
if (mp_using_cache_safe_exp) { |
|
if (bits_in_exponent > 780) |
|
window_bits = 6; |
|
else if (bits_in_exponent > 256) |
|
window_bits = 5; |
|
diff --git a/lib/freebl/rsa.c b/lib/freebl/rsa.c |
|
--- a/lib/freebl/rsa.c |
|
+++ b/lib/freebl/rsa.c |
|
@@ -65,16 +65,18 @@ struct blindingParamsStr { |
|
** the Handbook of Applied Cryptography, 11.118-11.119. |
|
*/ |
|
struct RSABlindingParamsStr { |
|
/* Blinding-specific parameters */ |
|
PRCList link; /* link to list of structs */ |
|
SECItem modulus; /* list element "key" */ |
|
blindingParams *free, *bp; /* Blinding parameters queue */ |
|
blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE]; |
|
+ /* precalculate montegomery reduction value */ |
|
+ mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */ |
|
}; |
|
typedef struct RSABlindingParamsStr RSABlindingParams; |
|
|
|
/* |
|
** RSABlindingParamsListStr |
|
** |
|
** List of key-specific blinding params. The arena holds the volatile pool |
|
** of memory for each entry and the list itself. The lock is for list |
|
@@ -1210,16 +1212,18 @@ generate_blinding_params(RSAPrivateKey * |
|
CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen)); |
|
CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen)); |
|
/* k < n */ |
|
CHECK_MPI_OK(mp_mod(&k, n, &k)); |
|
/* f = k**e mod n */ |
|
CHECK_MPI_OK(mp_exptmod(&k, &e, n, f)); |
|
/* g = k**-1 mod n */ |
|
CHECK_MPI_OK(mp_invmod(&k, n, g)); |
|
+ /* g in montgomery form.. */ |
|
+ CHECK_MPI_OK(mp_to_mont(g, n, g)); |
|
cleanup: |
|
if (kb) |
|
PORT_ZFree(kb, modLen); |
|
mp_clear(&k); |
|
mp_clear(&e); |
|
if (err) { |
|
MP_TO_SEC_ERROR(err); |
|
rv = SECFailure; |
|
@@ -1246,23 +1250,26 @@ init_blinding_params(RSABlindingParams * |
|
* of rsabp->array pointer and must be set to NULL |
|
*/ |
|
rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL; |
|
|
|
bp = rsabp->array; |
|
rsabp->bp = NULL; |
|
rsabp->free = bp; |
|
|
|
+ /* precalculate montgomery reduction parameter */ |
|
+ rsabp->n0i = mp_calculate_mont_n0i(n); |
|
+ |
|
/* List elements are keyed using the modulus */ |
|
return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus); |
|
} |
|
|
|
static SECStatus |
|
get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen, |
|
- mp_int *f, mp_int *g) |
|
+ mp_int *f, mp_int *g, mp_digit *n0i) |
|
{ |
|
RSABlindingParams *rsabp = NULL; |
|
blindingParams *bpUnlinked = NULL; |
|
blindingParams *bp; |
|
PRCList *el; |
|
SECStatus rv = SECSuccess; |
|
mp_err err = MP_OKAY; |
|
int cmp = -1; |
|
@@ -1312,16 +1319,17 @@ get_blinding_params(RSAPrivateKey *key, |
|
** head (since el would have looped back to the head). |
|
*/ |
|
PR_INSERT_BEFORE(&rsabp->link, el); |
|
} |
|
|
|
/* We've found (or created) the RSAblindingParams struct for this key. |
|
* Now, search its list of ready blinding params for a usable one. |
|
*/ |
|
+ *n0i = rsabp->n0i; |
|
while (0 != (bp = rsabp->bp)) { |
|
#ifdef UNSAFE_FUZZER_MODE |
|
/* Found a match and there are still remaining uses left */ |
|
/* Return the parameters */ |
|
CHECK_MPI_OK(mp_copy(&bp->f, f)); |
|
CHECK_MPI_OK(mp_copy(&bp->g, g)); |
|
|
|
PZ_Unlock(blindingParamsList.lock); |
|
@@ -1426,16 +1434,17 @@ cleanup: |
|
rsabp->free = bp; |
|
} |
|
if (holdingLock) { |
|
PZ_Unlock(blindingParamsList.lock); |
|
} |
|
if (err) { |
|
MP_TO_SEC_ERROR(err); |
|
} |
|
+ *n0i = 0; |
|
return SECFailure; |
|
} |
|
|
|
/* |
|
** Perform a raw private-key operation |
|
** Length of input and output buffers are equal to key's modulus len. |
|
*/ |
|
static SECStatus |
|
@@ -1445,16 +1454,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key, |
|
PRBool check) |
|
{ |
|
unsigned int modLen; |
|
unsigned int offset; |
|
SECStatus rv = SECSuccess; |
|
mp_err err; |
|
mp_int n, c, m; |
|
mp_int f, g; |
|
+ mp_digit n0i; |
|
if (!key || !output || !input) { |
|
PORT_SetError(SEC_ERROR_INVALID_ARGS); |
|
return SECFailure; |
|
} |
|
/* check input out of range (needs to be in range [0..n-1]) */ |
|
modLen = rsa_modulusLen(&key->modulus); |
|
if (modLen == 0) { |
|
PORT_SetError(SEC_ERROR_INVALID_ARGS); |
|
@@ -1476,17 +1486,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key, |
|
CHECK_MPI_OK(mp_init(&f)); |
|
CHECK_MPI_OK(mp_init(&g)); |
|
SECITEM_TO_MPINT(key->modulus, &n); |
|
OCTETS_TO_MPINT(input, &c, modLen); |
|
/* If blinding, compute pre-image of ciphertext by multiplying by |
|
** blinding factor |
|
*/ |
|
if (nssRSAUseBlinding) { |
|
- CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g)); |
|
+ CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i)); |
|
/* c' = c*f mod n */ |
|
CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c)); |
|
} |
|
/* Do the private key operation m = c**d mod n */ |
|
if (key->prime1.len == 0 || |
|
key->prime2.len == 0 || |
|
key->exponent1.len == 0 || |
|
key->exponent2.len == 0 || |
|
@@ -1497,17 +1507,17 @@ rsa_PrivateKeyOp(RSAPrivateKey *key, |
|
} else { |
|
CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c)); |
|
} |
|
/* If blinding, compute post-image of plaintext by multiplying by |
|
** blinding factor |
|
*/ |
|
if (nssRSAUseBlinding) { |
|
/* m = m'*g mod n */ |
|
- CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m)); |
|
+ CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m)); |
|
} |
|
err = mp_to_fixlen_octets(&m, output, modLen); |
|
if (err >= 0) |
|
err = MP_OKAY; |
|
cleanup: |
|
mp_clear(&n); |
|
mp_clear(&c); |
|
mp_clear(&m);
|
|
|