argon2: add parallelism (#547)
Some checks failed
Security Audit / Security Audit (push) Has been cancelled
argon2 / build (1.85.0, thumbv7em-none-eabi) (push) Has been cancelled
argon2 / build (1.85.0, wasm32-unknown-unknown) (push) Has been cancelled
argon2 / build (stable, thumbv7em-none-eabi) (push) Has been cancelled
argon2 / build (stable, wasm32-unknown-unknown) (push) Has been cancelled
argon2 / minimal-versions (push) Has been cancelled
argon2 / test (1.85.0, x86_64-unknown-linux-gnu) (push) Has been cancelled
argon2 / test (stable, x86_64-unknown-linux-gnu) (push) Has been cancelled
argon2 / test (sudo apt update && sudo apt install gcc-multilib, 1.85.0, i686-unknown-linux-gnu) (push) Has been cancelled
argon2 / test (sudo apt update && sudo apt install gcc-multilib, stable, i686-unknown-linux-gnu) (push) Has been cancelled
argon2 / careful (push) Has been cancelled
argon2 / cross (1.85.0, powerpc-unknown-linux-gnu) (push) Has been cancelled
argon2 / cross (stable, powerpc-unknown-linux-gnu) (push) Has been cancelled
balloon-hash / build (1.85.0, thumbv7em-none-eabi) (push) Has been cancelled
balloon-hash / build (1.85.0, wasm32-unknown-unknown) (push) Has been cancelled
balloon-hash / build (stable, thumbv7em-none-eabi) (push) Has been cancelled
balloon-hash / build (stable, wasm32-unknown-unknown) (push) Has been cancelled
balloon-hash / minimal-versions (push) Has been cancelled
balloon-hash / test (1.85.0) (push) Has been cancelled
balloon-hash / test (stable) (push) Has been cancelled
bcrypt-pbkdf / build (1.85.0, thumbv7em-none-eabi) (push) Has been cancelled
bcrypt-pbkdf / build (1.85.0, wasm32-unknown-unknown) (push) Has been cancelled
bcrypt-pbkdf / build (stable, thumbv7em-none-eabi) (push) Has been cancelled
bcrypt-pbkdf / build (stable, wasm32-unknown-unknown) (push) Has been cancelled
bcrypt-pbkdf / minimal-versions (push) Has been cancelled
bcrypt-pbkdf / test (1.85.0) (push) Has been cancelled
bcrypt-pbkdf / test (stable) (push) Has been cancelled
fuzz-build / build (push) Has been cancelled
password-auth / test (1.85.0) (push) Has been cancelled
password-auth / test (stable) (push) Has been cancelled
password-auth / wasm (1.85.0) (push) Has been cancelled
password-auth / wasm (stable) (push) Has been cancelled
pbkdf2 / build (1.85.0, thumbv7em-none-eabi) (push) Has been cancelled
pbkdf2 / build (1.85.0, wasm32-unknown-unknown) (push) Has been cancelled
pbkdf2 / build (stable, thumbv7em-none-eabi) (push) Has been cancelled
pbkdf2 / build (stable, wasm32-unknown-unknown) (push) Has been cancelled
pbkdf2 / minimal-versions (push) Has been cancelled
pbkdf2 / test (1.85.0) (push) Has been cancelled
pbkdf2 / test (stable) (push) Has been cancelled
readme / test (push) Has been cancelled
scrypt / build (1.85.0, thumbv7em-none-eabi) (push) Has been cancelled
scrypt / build (1.85.0, wasm32-unknown-unknown) (push) Has been cancelled
scrypt / build (stable, thumbv7em-none-eabi) (push) Has been cancelled
scrypt / build (stable, wasm32-unknown-unknown) (push) Has been cancelled
scrypt / minimal-versions (push) Has been cancelled
scrypt / test (1.85.0) (push) Has been cancelled
scrypt / test (stable) (push) Has been cancelled
sha-crypt / build (1.85.0, thumbv7em-none-eabi) (push) Has been cancelled
sha-crypt / build (1.85.0, wasm32-unknown-unknown) (push) Has been cancelled
sha-crypt / build (stable, thumbv7em-none-eabi) (push) Has been cancelled
sha-crypt / build (stable, wasm32-unknown-unknown) (push) Has been cancelled
sha-crypt / minimal-versions (push) Has been cancelled
sha-crypt / test (1.85.0) (push) Has been cancelled
sha-crypt / test (stable) (push) Has been cancelled
Workspace / clippy (push) Has been cancelled
Workspace / rustfmt (push) Has been cancelled
Workspace / typos (push) Has been cancelled
yescrypt / minimal-versions (push) Has been cancelled
yescrypt / test (1.85.0, x86_64-unknown-linux-gnu) (push) Has been cancelled
yescrypt / test (stable, x86_64-unknown-linux-gnu) (push) Has been cancelled
yescrypt / test (sudo apt update && sudo apt install gcc-multilib, 1.85.0, i686-unknown-linux-gnu) (push) Has been cancelled
yescrypt / test (sudo apt update && sudo apt install gcc-multilib, stable, i686-unknown-linux-gnu) (push) Has been cancelled

Adds a `parallel` feature, with an optional dependency on `rayon`, and
parallelizes the filling of blocks.

Coordinated shared access in the memory blocks is implemented with a
`SegmentViewIter` iterator, which implements either
`rayon::iter::ParallelIterator` or `core::iter::Iterator` and returns
`SegmentView` views into the Argon2 blocks memory that are safe to be
used in parallel.

The views alias in the regions that are read-only, but are disjoint in
the regions where mutation happens. Effectively, they implement, with a
combination of mutable borrowing and runtime checking, the cooperative
contract outlined in RFC 9106. This is similar to what was suggested in
#380.

To avoid aliasing mutable references into the entire buffer of blocks
(which would be UB), pointers are used up to the moment where a
reference (shared or mutable) into a specific block is returned. At that
point, aliasing is no longer possible.

The following tests have been tried in and pass Miri (modulo unrelated
warnings):

    reference_argon2i_v0x13_2_8_2
    reference_argon2id_v0x13_2_8_2
This commit is contained in:
Jonas Malaco
2025-07-21 13:37:02 -03:00
committed by GitHub
parent 6527622507
commit e75b27dbd8
6 changed files with 297 additions and 146 deletions

1
Cargo.lock generated
View File

@@ -11,6 +11,7 @@ dependencies = [
"cpufeatures",
"hex-literal",
"password-hash",
"rayon",
"zeroize",
]

View File

@@ -21,6 +21,7 @@ base64ct = "1.7"
blake2 = { version = "0.11.0-rc.0", default-features = false }
# optional dependencies
rayon = { version = "1.7", optional = true }
password-hash = { version = "0.6.0-rc.1", optional = true }
zeroize = { version = "1", default-features = false, optional = true }
@@ -36,6 +37,7 @@ default = ["alloc", "password-hash", "rand"]
alloc = ["password-hash?/alloc"]
std = ["alloc", "password-hash?/os_rng", "base64ct/std"]
parallel = ["dep:rayon"]
rand = ["password-hash?/rand_core"]
simple = ["password-hash"]
zeroize = ["dep:zeroize"]

View File

@@ -13,8 +13,10 @@
clippy::cast_sign_loss,
clippy::checked_conversions,
clippy::implicit_saturating_sub,
clippy::missing_safety_doc,
clippy::panic,
clippy::panic_in_result_fn,
clippy::undocumented_unsafe_blocks,
clippy::unwrap_used,
missing_docs,
rust_2018_idioms,
@@ -153,6 +155,7 @@ mod algorithm;
mod blake2b_long;
mod block;
mod error;
mod memory;
mod params;
mod version;
@@ -173,6 +176,7 @@ pub use {
use crate::blake2b_long::blake2b_long;
use blake2::{Blake2b512, Digest, digest};
use core::fmt;
use memory::Memory;
#[cfg(all(feature = "alloc", feature = "password-hash"))]
use password_hash::{Decimal, Ident, ParamsString, Salt};
@@ -347,7 +351,7 @@ impl<'key> Argon2<'key> {
mut initial_hash: digest::Output<Blake2b512>,
) -> Result<()> {
let block_count = self.params.block_count();
let memory_blocks = memory_blocks
let mut memory_blocks = memory_blocks
.get_mut(..block_count)
.ok_or(Error::MemoryTooLittle)?;
@@ -381,31 +385,59 @@ impl<'key> Argon2<'key> {
// Run passes on blocks
for pass in 0..iterations {
for slice in 0..SYNC_POINTS {
memory_blocks.for_each_segment(lanes, |mut memory_view, slice, lane| {
let data_independent_addressing = self.algorithm == Algorithm::Argon2i
|| (self.algorithm == Algorithm::Argon2id
&& pass == 0
&& slice < SYNC_POINTS / 2);
for lane in 0..lanes {
let mut address_block = Block::default();
let mut input_block = Block::default();
let zero_block = Block::default();
let mut address_block = Block::default();
let mut input_block = Block::default();
let zero_block = Block::default();
if data_independent_addressing {
input_block.as_mut()[..6].copy_from_slice(&[
pass as u64,
lane as u64,
slice as u64,
block_count as u64,
iterations as u64,
self.algorithm as u64,
]);
}
let first_block = if pass == 0 && slice == 0 {
if data_independent_addressing {
input_block.as_mut()[..6].copy_from_slice(&[
pass as u64,
lane as u64,
slice as u64,
memory_blocks.len() as u64,
iterations as u64,
self.algorithm as u64,
]);
// Generate first set of addresses
self.update_address_block(
&mut address_block,
&mut input_block,
&zero_block,
);
}
let first_block = if pass == 0 && slice == 0 {
if data_independent_addressing {
// Generate first set of addresses
// The first two blocks of each lane are already initialized
2
} else {
0
};
let mut cur_index = lane * lane_length + slice * segment_length + first_block;
let mut prev_index = if slice == 0 && first_block == 0 {
// Last block in current lane
cur_index + lane_length - 1
} else {
// Previous block
cur_index - 1
};
// Fill blocks in the segment
for block in first_block..segment_length {
// Extract entropy
let rand = if data_independent_addressing {
let address_index = block % ADDRESSES_IN_BLOCK;
if address_index == 0 {
self.update_address_block(
&mut address_block,
&mut input_block,
@@ -413,101 +445,73 @@ impl<'key> Argon2<'key> {
);
}
// The first two blocks of each lane are already initialized
2
address_block.as_ref()[address_index]
} else {
memory_view.get_block(prev_index).as_ref()[0]
};
// Calculate source block index for compress function
let ref_lane = if pass == 0 && slice == 0 {
// Cannot reference other lanes yet
lane
} else {
(rand >> 32) as usize % lanes
};
let reference_area_size = if pass == 0 {
// First pass
if slice == 0 {
// First slice
block - 1 // all but the previous
} else if ref_lane == lane {
// The same lane => add current segment
slice * segment_length + block - 1
} else {
slice * segment_length - if block == 0 { 1 } else { 0 }
}
} else {
// Second pass
if ref_lane == lane {
lane_length - segment_length + block - 1
} else {
lane_length - segment_length - if block == 0 { 1 } else { 0 }
}
};
// 1.2.4. Mapping rand to 0..<reference_area_size-1> and produce
// relative position
let mut map = rand & 0xFFFFFFFF;
map = (map * map) >> 32;
let relative_position = reference_area_size
- 1
- ((reference_area_size as u64 * map) >> 32) as usize;
// 1.2.5 Computing starting position
let start_position = if pass != 0 && slice != SYNC_POINTS - 1 {
(slice + 1) * segment_length
} else {
0
};
let mut cur_index = lane * lane_length + slice * segment_length + first_block;
let mut prev_index = if slice == 0 && first_block == 0 {
// Last block in current lane
cur_index + lane_length - 1
let lane_index = (start_position + relative_position) % lane_length;
let ref_index = ref_lane * lane_length + lane_index;
// Calculate new block
let result = self.compress(
memory_view.get_block(prev_index),
memory_view.get_block(ref_index),
);
if self.version == Version::V0x10 || pass == 0 {
*memory_view.get_block_mut(cur_index) = result;
} else {
// Previous block
cur_index - 1
*memory_view.get_block_mut(cur_index) ^= &result;
};
// Fill blocks in the segment
for block in first_block..segment_length {
// Extract entropy
let rand = if data_independent_addressing {
let address_index = block % ADDRESSES_IN_BLOCK;
if address_index == 0 {
self.update_address_block(
&mut address_block,
&mut input_block,
&zero_block,
);
}
address_block.as_ref()[address_index]
} else {
memory_blocks[prev_index].as_ref()[0]
};
// Calculate source block index for compress function
let ref_lane = if pass == 0 && slice == 0 {
// Cannot reference other lanes yet
lane
} else {
(rand >> 32) as usize % lanes
};
let reference_area_size = if pass == 0 {
// First pass
if slice == 0 {
// First slice
block - 1 // all but the previous
} else if ref_lane == lane {
// The same lane => add current segment
slice * segment_length + block - 1
} else {
slice * segment_length - if block == 0 { 1 } else { 0 }
}
} else {
// Second pass
if ref_lane == lane {
lane_length - segment_length + block - 1
} else {
lane_length - segment_length - if block == 0 { 1 } else { 0 }
}
};
// 1.2.4. Mapping rand to 0..<reference_area_size-1> and produce
// relative position
let mut map = rand & 0xFFFFFFFF;
map = (map * map) >> 32;
let relative_position = reference_area_size
- 1
- ((reference_area_size as u64 * map) >> 32) as usize;
// 1.2.5 Computing starting position
let start_position = if pass != 0 && slice != SYNC_POINTS - 1 {
(slice + 1) * segment_length
} else {
0
};
let lane_index = (start_position + relative_position) % lane_length;
let ref_index = ref_lane * lane_length + lane_index;
// Calculate new block
let result =
self.compress(&memory_blocks[prev_index], &memory_blocks[ref_index]);
if self.version == Version::V0x10 || pass == 0 {
memory_blocks[cur_index] = result;
} else {
memory_blocks[cur_index] ^= &result;
};
prev_index = cur_index;
cur_index += 1;
}
prev_index = cur_index;
cur_index += 1;
}
}
});
}
Ok(())
@@ -523,6 +527,7 @@ impl<'key> Argon2<'key> {
}
if self.cpu_feat_avx2.get() {
// SAFETY: checked that AVX2 was detected.
return unsafe { compress_avx2(rhs, lhs) };
}
}

156
argon2/src/memory.rs Normal file
View File

@@ -0,0 +1,156 @@
//! Views into Argon2 memory that can be processed in parallel.
//!
//! This module implements, with a combination of compile-time borrowing and runtime checking, the
//! cooperative contract described in section 3.4 (Indexing) of RFC 9106:
//!
//! > To enable parallel block computation, we further partition the memory matrix into SL = 4
//! > vertical slices. The intersection of a slice and a lane is called a segment, which has a
//! > length of q/SL. Segments of the same slice can be computed in parallel and do not reference
//! > blocks from each other. All other blocks can be referenced.
use core::marker::PhantomData;
use core::ptr::NonNull;
#[cfg(feature = "parallel")]
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use crate::{Block, SYNC_POINTS};
/// Extension trait for Argon2 memory blocks.
pub(crate) trait Memory<'a> {
/// Compute each Argon2 segment.
///
/// By default computation is single threaded. Parallel computation can be enabled with the
/// `parallel` feature, in which case [rayon] is used to compute as many lanes in parallel as
/// possible.
fn for_each_segment<F>(&mut self, lanes: usize, f: F)
where
F: Fn(SegmentView<'_>, usize, usize) + Sync + Send;
}
impl Memory<'_> for &mut [Block] {
#[cfg(not(feature = "parallel"))]
fn for_each_segment<F>(&mut self, lanes: usize, f: F)
where
F: Fn(SegmentView<'_>, usize, usize) + Sync + Send,
{
let inner = MemoryInner::new(self, lanes);
for slice in 0..SYNC_POINTS {
for lane in 0..lanes {
// SAFETY: `self` exclusively borrows the blocks, and we sequentially process
// slices and segments.
let segment = unsafe { SegmentView::new(inner, slice, lane) };
f(segment, slice, lane);
}
}
}
#[cfg(feature = "parallel")]
fn for_each_segment<F>(&mut self, lanes: usize, f: F)
where
F: Fn(SegmentView<'_>, usize, usize) + Sync + Send,
{
let inner = MemoryInner::new(self, lanes);
for slice in 0..SYNC_POINTS {
(0..lanes).into_par_iter().for_each(|lane| {
// SAFETY: `self` exclusively borrows the blocks, we sequentially process slices,
// and we create exactly one segment view per lane in a slice.
let segment = unsafe { SegmentView::new(inner, slice, lane) };
f(segment, slice, lane);
});
}
}
}
/// Low-level pointer and metadata for an Argon2 memory region.
#[derive(Clone, Copy)]
struct MemoryInner<'a> {
blocks: NonNull<Block>,
block_count: usize,
lane_length: usize,
phantom: PhantomData<&'a mut Block>,
}
impl MemoryInner<'_> {
fn new(memory_blocks: &mut [Block], lanes: usize) -> Self {
let block_count = memory_blocks.len();
let lane_length = block_count / lanes;
// SAFETY: the pointer needs to be derived from a mutable reference because (later)
// mutating the blocks through a pointer derived from a shared reference would be UB.
let blocks = NonNull::from(memory_blocks);
MemoryInner {
blocks: blocks.cast(),
block_count,
lane_length,
phantom: PhantomData,
}
}
fn lane_of(&self, index: usize) -> usize {
index / self.lane_length
}
fn slice_of(&self, index: usize) -> usize {
index / (self.lane_length / SYNC_POINTS) % SYNC_POINTS
}
}
// SAFETY: private type, and just a pointer with some metadata.
unsafe impl Send for MemoryInner<'_> {}
// SAFETY: private type, and just a pointer with some metadata.
unsafe impl Sync for MemoryInner<'_> {}
/// A view into Argon2 memory for a particular segment (i.e. slice × lane).
pub(crate) struct SegmentView<'a> {
inner: MemoryInner<'a>,
slice: usize,
lane: usize,
}
impl<'a> SegmentView<'a> {
/// Create a view into Argon2 memory for a particular segment (i.e. slice × lane).
///
/// # Safety
///
/// At any time, there can be at most one view for a given Argon2 segment. Additionally, all
/// concurrent segment views must be for the same slice.
unsafe fn new(inner: MemoryInner<'a>, slice: usize, lane: usize) -> Self {
SegmentView { inner, slice, lane }
}
/// Get a shared reference to a block.
///
/// # Panics
///
/// Panics if the index is out of bounds or if the desired block *could* be mutably aliased (if
/// it is on the current slice but on a different lane/segment).
pub fn get_block(&self, index: usize) -> &Block {
assert!(index < self.inner.block_count);
assert!(self.inner.lane_of(index) == self.lane || self.inner.slice_of(index) != self.slice);
// SAFETY: by construction, the base pointer is valid for reads, and we assert that the
// index is in bounds. We also assert that the index either lies on this lane, or is on
// another slice. Finally, we're the only view into this segment, and mutating through it
// requires `&mut self` and is restricted to blocks within the segment.
unsafe { self.inner.blocks.add(index).as_ref() }
}
/// Get a mutable reference to a block.
///
/// # Panics
///
/// Panics if the index is out of bounds or if the desired block lies outside this segment.
pub fn get_block_mut(&mut self, index: usize) -> &mut Block {
assert!(index < self.inner.block_count);
assert_eq!(self.inner.lane_of(index), self.lane);
assert_eq!(self.inner.slice_of(index), self.slice);
// SAFETY: by construction, the base pointer is valid for reads and writes, and we assert
// that the index is in bounds. We also assert that the index lies on this segment, and
// we're the only view for it, taking `&mut self`.
unsafe { self.inner.blocks.add(index).as_mut() }
}
}

View File

@@ -8,10 +8,17 @@ publish = false
[dev-dependencies]
argon2 = { path = "../argon2" }
criterion = { version = "0.4", features = ["html_reports"] }
criterion = { version = "0.5", features = ["html_reports"] }
pprof = { version = "0.14", features = ["flamegraph", "criterion"] }
[features]
default = []
parallel = ["argon2/parallel"]
[[bench]]
name = "argon2"
path = "src/argon2.rs"
harness = false
[patch.crates-io]
password-hash = { git = "https://github.com/RustCrypto/traits.git" }

View File

@@ -1,3 +1,5 @@
use std::collections::BTreeSet;
use argon2::*;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use pprof::criterion::{Output, PProfProfiler};
@@ -26,46 +28,26 @@ fn bench_default_params(c: &mut Criterion) {
}
}
fn bench_vary_m(c: &mut Criterion) {
let t_cost = 4;
let p_cost = 4;
for m_cost in [2 * 1024, 16 * 1024, 64 * 1024, 256 * 1024] {
let test_name = format!("argon2id V0x13 m={m_cost} t={t_cost} p={p_cost}");
c.bench_function(&test_name, |b| {
let mut out = [0u8; 32];
let params = Params::new(m_cost, t_cost, p_cost, Some(32)).unwrap();
let argon2 = Argon2::new(Algorithm::Argon2id, Version::V0x13, params);
b.iter(|| {
argon2
.hash_password_into(black_box(BENCH_PASSWORD), black_box(BENCH_SALT), &mut out)
.unwrap()
})
});
fn bench_vary_params(c: &mut Criterion) {
let mut tests = BTreeSet::new();
// Vary `m_cost`.
for m_cost in [2 * 1024, 16 * 1024, 32 * 1024, 64 * 1024, 256 * 1024] {
tests.insert((m_cost, 4, 4));
}
}
fn bench_vary_t(c: &mut Criterion) {
let m_cost = 32 * 1024;
let p_cost = 4;
for t_cost in [2, 8, 16, 24] {
let test_name = format!("argon2id V0x13 m={m_cost} t={t_cost} p={p_cost}");
c.bench_function(&test_name, |b| {
let mut out = [0u8; 32];
let params = Params::new(m_cost, t_cost, p_cost, Some(32)).unwrap();
let argon2 = Argon2::new(Algorithm::Argon2id, Version::V0x13, params);
b.iter(|| {
argon2
.hash_password_into(black_box(BENCH_PASSWORD), black_box(BENCH_SALT), &mut out)
.unwrap()
})
});
// Vary `t_cost`.
for t_cost in [1, 2, 4, 8, 16] {
tests.insert((32 * 1024, t_cost, 4));
}
}
fn bench_vary_p(c: &mut Criterion) {
let m_cost = 32 * 1024;
let t_cost = 4;
for p_cost in [2, 8, 16, 64] {
// Vary `p_cost`.
for p_cost in [1, 2, 4, 8, 16] {
for m_mib in [256 * 1024, 1024 * 1024] {
tests.insert((m_mib, 1, p_cost));
}
for t_cost in [1, 2, 4] {
tests.insert((32 * 1024, t_cost, p_cost));
}
}
for (m_cost, t_cost, p_cost) in tests {
let test_name = format!("argon2id V0x13 m={m_cost} t={t_cost} p={p_cost}");
c.bench_function(&test_name, |b| {
let mut out = [0u8; 32];
@@ -85,8 +67,6 @@ criterion_group!(
config = Criterion::default().with_profiler(PProfProfiler::new(300, Output::Flamegraph(None)));
targets =
bench_default_params,
bench_vary_m,
bench_vary_t,
bench_vary_p,
bench_vary_params,
);
criterion_main!(benches);