From 9a3743c7c444cbc264d8ba0417c7bd7106d2665a Mon Sep 17 00:00:00 2001 From: Sean Bowe Date: Mon, 3 Apr 2017 21:41:38 -0600 Subject: [PATCH] Various improvements to BLS implementation: * Switch from rayon to crossbeam * Allow windows to be reused per batch exp * Allow batchexp to take vector by value * Allow access to thread-local engine context * Allow cloning of Engine * Clean up wNAF abstractions to reduce heap allocation --- Cargo.toml | 3 +- src/curves/bls381/ec.rs | 21 +++- src/curves/bls381/fp.rs | 2 + src/curves/bls381/mod.rs | 201 ++++++++++++++++++--------------- src/curves/bls381/tests/mod.rs | 4 +- src/curves/mod.rs | 63 +++++++++-- src/curves/tests/mod.rs | 12 +- 7 files changed, 201 insertions(+), 105 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 31092f6..85aaa76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,9 +10,10 @@ version = "0.0.1" [dependencies] rand = "0.3.*" -rayon = "0.6.*" byteorder = "1.*" serde = "0.9.*" +crossbeam = "0.2" +num_cpus = "1.0" [dev-dependencies.bincode] git = "https://github.com/TyOverby/bincode.git" diff --git a/src/curves/bls381/ec.rs b/src/curves/bls381/ec.rs index 0820b9d..f0a7652 100644 --- a/src/curves/bls381/ec.rs +++ b/src/curves/bls381/ec.rs @@ -26,11 +26,13 @@ macro_rules! curve_impl { z: $basefield } + #[derive(Clone)] struct $params_name { zero: $name, one: $name, coeff_b: $basefield, - windows: Vec + windows: Vec, + batch_windows: (usize, Vec) } impl Convert<$name_affine, $engine> for $name { @@ -130,6 +132,23 @@ macro_rules! curve_impl { None } + fn optimal_window_batch(&self, engine: &$engine, scalars: usize) -> WindowTable<$engine, $name, Vec<$name>> { + let mut window = engine.$params_field.batch_windows.0; + + for i in &engine.$params_field.batch_windows.1 { + if scalars >= *i { + window += 1; + } else { + break; + } + } + + let mut table = WindowTable::new(); + table.set_base(engine, self, window); + + table + } + fn zero(engine: &$engine) -> Self { engine.$params_field.zero } diff --git a/src/curves/bls381/fp.rs b/src/curves/bls381/fp.rs index ee4a45d..0612e1a 100644 --- a/src/curves/bls381/fp.rs +++ b/src/curves/bls381/fp.rs @@ -11,6 +11,7 @@ macro_rules! fp_params_impl { modulus_minus_1_over_2 = $modulus_minus_1_over_2:expr, inv = $inv:expr ) => { + #[derive(Clone)] struct $params_name { modulus: [u64; $limbs], r1: $name, @@ -57,6 +58,7 @@ macro_rules! fp_params_impl { t_plus_1_over_2 = $t_plus_1_over_2:expr, inv = $inv:expr ) => { + #[derive(Clone)] struct $params_name { modulus: [u64; $limbs], r1: $name, diff --git a/src/curves/bls381/mod.rs b/src/curves/bls381/mod.rs index 05e843a..7a2a8ee 100644 --- a/src/curves/bls381/mod.rs +++ b/src/curves/bls381/mod.rs @@ -3,6 +3,7 @@ use std::fmt; use std::borrow::Borrow; use super::{ + WindowTable, Engine, Group, GroupAffine, @@ -690,6 +691,7 @@ impl G2Prepared { } } +#[derive(Clone)] pub struct Bls381 { fqparams: FqParams, frparams: FrParams, @@ -724,6 +726,12 @@ impl Engine for Bls381 { type G1 = G1; type G2 = G2; + fn with FnOnce(&'a Self) -> R>(cb: F) -> R { + ENGINE.with(|e| { + cb(e) + }) + } + fn new() -> Bls381 { let mut tmp = Bls381 { fqparams: FqParams::partial_init(), @@ -732,13 +740,15 @@ impl Engine for Bls381 { zero: G1 { x: Fq::zero(), y: Fq::zero(), z: Fq::zero() }, one: G1 { x: Fq::zero(), y: Fq::zero(), z: Fq::zero() }, coeff_b: Fq::zero(), - windows: vec![11, 35, 110] + windows: vec![11, 35, 110], + batch_windows: (4, vec![2, 3, 10, 20, 53, 111, 266, 426, 1273, 4742, 6054, 6054, 6054]) }, g2params: G2Params { zero: G2 { x: Fq2::zero(), y: Fq2::zero(), z: Fq2::zero() }, one: G2 { x: Fq2::zero(), y: Fq2::zero(), z: Fq2::zero() }, coeff_b: Fq2::zero(), - windows: vec![11, 35, 114] + windows: vec![11, 35, 114], + batch_windows: (4, vec![2, 4, 10, 29, 54, 120, 314, 314, 314, 314]) }, frobenius_coeff_fq2: [Fq::zero(); 2], frobenius_coeff_fq6_c1: [Fq2::zero(); 6], @@ -999,36 +1009,58 @@ impl Engine for Bls381 { f } - fn batch_baseexp>(&self, base: &G, s: &[Fr]) -> Vec + fn batch_baseexp, S: AsRef<[Self::Fr]>>(&self, table: &WindowTable>, s: S) -> Vec { - // TODO: pick an optimal window size based on number of elements and - // considering the exact group - const WINDOW_SIZE_BASE: usize = 18; + use crossbeam; + use num_cpus; + + let s = s.as_ref(); + let mut ret = vec![G::zero(self).to_affine(self); s.len()]; - use rayon::prelude::*; + crossbeam::scope(|scope| { + let chunk = (s.len() / num_cpus::get()) + 1; - let mut table = vec![]; - window_table(self, WINDOW_SIZE_BASE, base, &mut table); + for (s, b) in s.chunks(chunk).zip(ret.chunks_mut(chunk)) { + let mut table = table.shared(); - s.par_iter().map(|s| { - let mut b = G::zero(self); - windowed_exp(self, WINDOW_SIZE_BASE, &table, &mut b, &s.into_repr(self)); - b.to_affine(self) - }).collect() + scope.spawn(move || { + for (s, b) in s.iter().zip(b.iter_mut()) { + let mut tmp = G::zero(self); + table.exp(self, &mut tmp, s.into_repr(self)); + *b = tmp.to_affine(self); + } + }); + } + }); + + ret } - fn multiexp>(&self, g: &[G::Affine], s: &[Fr]) -> G { - use rayon::prelude::*; - use rayon::par_iter::zip::ZipIter; - - return ZipIter::new( - g.par_chunks((g.len() / 32) + 1), - s.par_chunks((g.len() / 32) + 1) - ).map(|(g, s)| { - multiexp_inner::(self, g, s) - }).reduce(|| G::zero(self), |mut a, b| { - a.add_assign(self, &b); - a + fn multiexp>(&self, g: &[G::Affine], s: &[Fr]) -> Result { + if g.len() != s.len() { + return Err(()); + } + + use crossbeam; + use num_cpus; + + return crossbeam::scope(|scope| { + let mut threads = vec![]; + + let chunk = (s.len() / num_cpus::get()) + 1; + + for (g, s) in g.chunks(chunk).zip(s.chunks(chunk)) { + threads.push(scope.spawn(move || { + multiexp_inner(self, g, s) + })); + } + + let mut acc = G::zero(self); + for t in threads { + acc.add_assign(self, &t.join()); + } + + Ok(acc) }); fn multiexp_inner>(engine: &Bls381, g: &[G::Affine], s: &[Fr]) -> G @@ -1132,7 +1164,7 @@ impl Engine for Bls381 { }); } - let mut table_space = vec![]; + let mut table = WindowTable::new(); while let Some(mut greatest) = heap.pop() { { @@ -1140,7 +1172,7 @@ impl Engine for Bls381 { if second_greatest.is_none() || greatest.justexp(second_greatest.unwrap()) { // Either this is the last value or multiplying is considered more efficient than // rewriting and reinsertion into the heap. - opt_exp(engine, &mut elements[greatest.index], &greatest.value, &mut table_space); + opt_exp(engine, &mut elements[greatest.index], greatest.value, &mut table); result.add_assign(engine, &elements[greatest.index]); continue; } else { @@ -1164,87 +1196,72 @@ impl Engine for Bls381 { } } -// Converts a scalar into wNAF form based on given window size. -// TODO: instead of a function, and allocating a vector, create a smart -// iterator. -fn wnaf(e: &Bls381, window: usize, s: &>::Repr) -> Vec -{ - use std::default::Default; - let mut res = Vec::with_capacity(Fr::num_bits(e) + 1); - let mut c = *s; +impl, B: Borrow<[G]>> WindowTable { + fn exp(&mut self, e: &Bls381, into: &mut G, mut c: >::Repr) { + assert!(self.window > 1); - let mut tmp = >::Repr::default(); + self.wnaf.truncate(0); + self.wnaf.reserve(Fr::num_bits(e) + 1); - while !c.iter().all(|&e| e==0) { - let mut u; - if fr_arith::odd(&c) { - u = (c[0] % (1 << (window+1))) as i64; + // Convert the scalar `c` into wNAF form. + { + use std::default::Default; + let mut tmp = >::Repr::default(); - if u > (1 << window) { - u -= 1 << (window+1); - } + while !c.iter().all(|&e| e==0) { + let mut u; + if fr_arith::odd(&c) { + u = (c[0] % (1 << (self.window+1))) as i64; + + if u > (1 << self.window) { + u -= 1 << (self.window+1); + } + + if u > 0 { + tmp[0] = u as u64; + fr_arith::sub_noborrow(&mut c, &tmp); + } else { + tmp[0] = (-u) as u64; + fr_arith::add_nocarry(&mut c, &tmp); + } + } else { + u = 0; + } + + self.wnaf.push(u); - if u > 0 { - tmp[0] = u as u64; - fr_arith::sub_noborrow(&mut c, &tmp); - } else { - tmp[0] = (-u) as u64; - fr_arith::add_nocarry(&mut c, &tmp); + fr_arith::div2(&mut c); } - } else { - u = 0; } - res.push(u); + // Perform wNAF exponentiation. + *into = G::zero(e); - fr_arith::div2(&mut c); - } + for n in self.wnaf.iter().rev() { + into.double(e); - res + if *n != 0 { + if *n > 0 { + into.add_assign(e, &self.table.borrow()[(n/2) as usize]); + } else { + into.sub_assign(e, &self.table.borrow()[((-n)/2) as usize]); + } + } + } + } } // Performs optimal exponentiation -fn opt_exp>(e: &Bls381, base: &mut G, scalar: &>::Repr, table: &mut Vec) +fn opt_exp>(e: &Bls381, base: &mut G, scalar: >::Repr, table: &mut WindowTable>) { - let bits = fr_arith::num_bits(scalar); + let bits = fr_arith::num_bits(&scalar); match G::optimal_window(e, bits) { Some(window) => { - window_table(e, window, base, table); - windowed_exp(e, window, &table, base, scalar); + table.set_base(e, base, window); + table.exp(e, base, scalar); }, None => { - base.mul_assign(e, scalar); - } - } -} - -fn window_table>(e: &Bls381, window: usize, base: &G, table: &mut Vec) -{ - table.truncate(0); - - let mut tmp = *base; - let mut dbl = tmp; - dbl.double(e); - - for _ in 0..(1 << (window-1)) { - table.push(tmp); - tmp.add_assign(e, &dbl); - } -} - -fn windowed_exp>(e: &Bls381, window: usize, table: &[G], base: &mut G, scalar: &>::Repr) -{ - *base = G::zero(e); - - for n in wnaf(e, window, scalar).into_iter().rev() { - base.double(e); - - if n != 0 { - if n > 0 { - base.add_assign(e, &table[(n/2) as usize]); - } else { - base.sub_assign(e, &table[((-n)/2) as usize]); - } + base.mul_assign(e, &scalar); } } } diff --git a/src/curves/bls381/tests/mod.rs b/src/curves/bls381/tests/mod.rs index befe4b7..99aca36 100644 --- a/src/curves/bls381/tests/mod.rs +++ b/src/curves/bls381/tests/mod.rs @@ -12,12 +12,12 @@ fn test_vectors>(e: &E, expected: &[u8]) { { let acc = acc.to_affine(e); let exp: >::Uncompressed = - bincode::deserialize_from(&mut expected_reader, bincode::SizeLimit::Infinite).unwrap(); + bincode::deserialize_from(&mut expected_reader, bincode::Infinite).unwrap(); assert!(acc == exp.to_affine(e).unwrap()); let acc = acc.to_uncompressed(e); - bincode::serialize_into(&mut bytes, &acc, bincode::SizeLimit::Infinite).unwrap(); + bincode::serialize_into(&mut bytes, &acc, bincode::Infinite).unwrap(); } acc.double(e); acc.add_assign(e, &G::one(e)); diff --git a/src/curves/mod.rs b/src/curves/mod.rs index f4c4309..21b93b7 100644 --- a/src/curves/mod.rs +++ b/src/curves/mod.rs @@ -1,11 +1,14 @@ use rand; use std::fmt; +use std::ops::Deref; +use std::borrow::Borrow; +use std::marker::PhantomData; use serde::{Serialize, Deserialize}; pub mod bls381; -pub trait Engine: Sized +pub trait Engine: Sized + Clone { type Fq: PrimeField; type Fr: SnarkField; @@ -16,6 +19,9 @@ pub trait Engine: Sized fn new() -> Self; + /// Operate over the thread-local engine instance + fn with FnOnce(&'a Self) -> R>(F) -> R; + fn pairing(&self, p: &G1, q: &G2) -> Self::Fqk where G1: Convert<>::Affine, Self>, G2: Convert<>::Affine, Self> @@ -34,8 +40,9 @@ pub trait Engine: Sized )>; fn final_exponentiation(&self, &Self::Fqk) -> Self::Fqk; - fn multiexp>(&self, &[G::Affine], &[Self::Fr]) -> G; - fn batch_baseexp>(&self, base: &G, scalars: &[Self::Fr]) -> Vec; + /// Perform multi-exponentiation. g and s must have the same length. + fn multiexp>(&self, g: &[G::Affine], s: &[Self::Fr]) -> Result; + fn batch_baseexp, S: AsRef<[Self::Fr]>>(&self, table: &WindowTable>, scalars: S) -> Vec; } pub trait Group: Sized + @@ -67,6 +74,7 @@ pub trait Group: Sized + fn mul_assign>(&mut self, &E, other: &S); fn optimal_window(&E, scalar_bits: usize) -> Option; + fn optimal_window_batch(&self, &E, scalars: usize) -> WindowTable>; } pub trait GroupAffine>: Copy + @@ -163,7 +171,7 @@ pub trait SqrtField: Field pub trait PrimeField: SqrtField + Convert<[u64], E> { /// Little endian representation of a field element. - type Repr: Convert<[u64], E>; + type Repr: Convert<[u64], E> + Eq + Clone; fn from_u64(&E, u64) -> Self; fn from_str(&E, s: &str) -> Result; fn from_repr(&E, Self::Repr) -> Result; @@ -196,6 +204,50 @@ pub struct BitIterator { n: usize } +pub struct WindowTable> { + table: Table, + wnaf: Vec, + window: usize, + _marker: PhantomData<(E, G)> +} + +impl> WindowTable> { + fn new() -> Self { + WindowTable { + table: vec![], + wnaf: vec![], + window: 0, + _marker: PhantomData + } + } + + fn set_base(&mut self, e: &E, base: &G, window: usize) { + assert!(window > 1); + + self.window = window; + self.table.truncate(0); + self.table.reserve(1 << (window-1)); + + let mut tmp = *base; + let mut dbl = tmp; + dbl.double(e); + + for _ in 0..(1 << (window-1)) { + self.table.push(tmp); + tmp.add_assign(e, &dbl); + } + } + + fn shared(&self) -> WindowTable { + WindowTable { + table: &self.table[..], + wnaf: vec![], + window: self.window, + _marker: PhantomData + } + } +} + impl> Iterator for BitIterator { type Item = bool; @@ -224,9 +276,6 @@ impl<'a> From<&'a [u64]> for BitIterator<&'a [u64]> } } -use std::ops::Deref; -use std::borrow::Borrow; - pub enum Cow<'a, T: 'a> { Owned(T), Borrowed(&'a T) diff --git a/src/curves/tests/mod.rs b/src/curves/tests/mod.rs index cced874..dec568f 100644 --- a/src/curves/tests/mod.rs +++ b/src/curves/tests/mod.rs @@ -24,7 +24,7 @@ fn test_multiexp>(e: &E) { let s: Vec = (0..1000).map(|_| E::Fr::random(e, rng)).collect(); let naive = naiveexp::(e, &g, &s); - let multi = e.multiexp::(&g, &s); + let multi = e.multiexp::(&g, &s).unwrap(); assert!(naive.is_equal(e, &multi)); assert!(multi.is_equal(e, &naive)); @@ -36,11 +36,19 @@ fn test_multiexp>(e: &E) { let s = vec![E::Fr::from_str(e, "3435973836800000000000000000000000").unwrap(), E::Fr::from_str(e, "3435973836700000000000000000000000").unwrap()]; let naive = naiveexp::(e, &g, &s); - let multi = e.multiexp::(&g, &s); + let multi = e.multiexp::(&g, &s).unwrap(); assert!(naive.is_equal(e, &multi)); assert!(multi.is_equal(e, &naive)); } + + { + let rng = &mut rand::thread_rng(); + let s = vec![E::Fr::one(e); 100]; + let g = vec![G::random(e, rng).to_affine(e); 101]; + + assert!(e.multiexp::(&g, &s).is_err()); + } } fn test_bilinearity(e: &E) {