Skip to content

Instantly share code, notes, and snippets.

@jedisct1
Created June 22, 2026 14:42
Show Gist options
  • Select an option

  • Save jedisct1/4f689c97c78e26d19c07d7f76df9f7d6 to your computer and use it in GitHub Desktop.

Select an option

Save jedisct1/4f689c97c78e26d19c07d7f76df9f7d6 to your computer and use it in GitHub Desktop.
const std = @import("std");
const mem = std.mem;
const math = std.math;
const Blake2b512 = std.crypto.hash.blake2.Blake2b512;
const block_length = 128;
const sync_points = 4;
const version = 0x13;
/// One Argon2 memory block: 1 KiB, 16-byte aligned.
pub const Block = [block_length]u64;
const H0 = [Blake2b512.digest_length + 8]u8;
/// Number of `Block`s the matrix holds for the given memory/parallelism, after
/// Argon2's rounding. Size your buffer to this.
pub fn blockCount(m_cost: u32, lanes: u32) u32 {
return @max(
m_cost / (sync_points * lanes) * (sync_points * lanes),
2 * sync_points * lanes,
);
}
/// Fill `blocks` with the Argon2d memory matrix and leave it filled — no final
/// tag is extracted. `blocks.len` must equal `blockCount(m_cost, lanes)`.
/// `tag_len` is the Argon2 output length folded into the seed (RandomX uses 0).
pub fn fill(
blocks: []align(16) Block,
password: []const u8,
salt: []const u8,
t_cost: u32,
m_cost: u32,
lanes: u32,
tag_len: u32,
) void {
const memory = blockCount(m_cost, lanes);
std.debug.assert(blocks.len == memory);
@memset(mem.sliceAsBytes(blocks), 0);
var h0 = initHash(password, salt, t_cost, m_cost, lanes, tag_len);
initBlocks(blocks, &h0, memory, lanes);
const lane_len = memory / lanes;
const segment = lane_len / sync_points;
var n: u32 = 0;
while (n < t_cost) : (n += 1) {
var slice: u32 = 0;
while (slice < sync_points) : (slice += 1) {
var lane: u32 = 0;
while (lane < lanes) : (lane += 1) {
fillSegment(blocks, lane_len, segment, lanes, n, slice, lane);
}
}
}
}
fn initHash(password: []const u8, salt: []const u8, t_cost: u32, m_cost: u32, lanes: u32, tag_len: u32) H0 {
var h0: H0 = undefined;
var p: [24]u8 = undefined;
var tmp: [4]u8 = undefined;
var b2 = Blake2b512.init(.{});
mem.writeInt(u32, p[0..4], lanes, .little);
mem.writeInt(u32, p[4..8], tag_len, .little);
mem.writeInt(u32, p[8..12], m_cost, .little);
mem.writeInt(u32, p[12..16], t_cost, .little);
mem.writeInt(u32, p[16..20], version, .little);
mem.writeInt(u32, p[20..24], 0, .little); // 0 = Argon2d
b2.update(&p);
mem.writeInt(u32, &tmp, @intCast(password.len), .little);
b2.update(&tmp);
b2.update(password);
mem.writeInt(u32, &tmp, @intCast(salt.len), .little);
b2.update(&tmp);
b2.update(salt);
mem.writeInt(u32, &tmp, 0, .little); // no secret
b2.update(&tmp);
mem.writeInt(u32, &tmp, 0, .little); // no associated data
b2.update(&tmp);
b2.final(h0[0..Blake2b512.digest_length]);
return h0;
}
fn initBlocks(blocks: []align(16) Block, h0: *H0, memory: u32, lanes: u32) void {
var block0: [1024]u8 = undefined;
var lane: u32 = 0;
while (lane < lanes) : (lane += 1) {
const j = lane * (memory / lanes);
mem.writeInt(u32, h0[Blake2b512.digest_length + 4 ..][0..4], lane, .little);
for ([_]u32{ 0, 1 }) |index| {
mem.writeInt(u32, h0[Blake2b512.digest_length..][0..4], index, .little);
blake2bLong(&block0, h0);
for (&blocks[j + index], 0..) |*v, i| {
v.* = mem.readInt(u64, block0[i * 8 ..][0..8], .little);
}
}
}
}
fn fillSegment(blocks: []align(16) Block, lane_len: u32, segment: u32, lanes: u32, n: u32, slice: u32, lane: u32) void {
var index: u32 = if (n == 0 and slice == 0) 2 else 0;
var offset = lane * lane_len + slice * segment + index;
while (index < segment) : ({
index += 1;
offset += 1;
}) {
var prev = offset -% 1;
if (index == 0 and slice == 0) prev +%= lane_len;
const random = blocks[prev][0];
const ref = indexAlpha(random, lane_len, segment, lanes, n, slice, lane, index);
processBlockXor(&blocks[offset], &blocks[prev], &blocks[ref]);
}
}
fn indexAlpha(rand: u64, lane_len: u32, segment: u32, lanes: u32, n: u32, slice: u32, lane: u32, index: u32) u32 {
var ref_lane = @as(u32, @intCast(rand >> 32)) % lanes;
if (n == 0 and slice == 0) ref_lane = lane;
var m = 3 * segment;
var s = ((slice + 1) % sync_points) * segment;
if (lane == ref_lane) m += index;
if (n == 0) {
m = slice * segment;
s = 0;
if (slice == 0 or lane == ref_lane) m += index;
}
if (index == 0 or lane == ref_lane) m -= 1;
var p = @as(u64, @as(u32, @truncate(rand)));
p = (p * p) >> 32;
p = (p * m) >> 32;
return ref_lane * lane_len + @as(u32, @intCast((s + m - (p + 1)) % lane_len));
}
fn fBlaMka(x: u64, y: u64) u64 {
const xy = @as(u64, @as(u32, @truncate(x))) * @as(u64, @as(u32, @truncate(y)));
return x +% y +% 2 *% xy;
}
fn blamka(x: *[16]u64) void {
const rounds = [_][4]usize{
.{ 0, 4, 8, 12 }, .{ 1, 5, 9, 13 }, .{ 2, 6, 10, 14 }, .{ 3, 7, 11, 15 },
.{ 0, 5, 10, 15 }, .{ 1, 6, 11, 12 }, .{ 2, 7, 8, 13 }, .{ 3, 4, 9, 14 },
};
inline for (rounds) |r| {
const a = r[0];
const b = r[1];
const c = r[2];
const d = r[3];
x[a] = fBlaMka(x[a], x[b]);
x[d] = math.rotr(u64, x[d] ^ x[a], 32);
x[c] = fBlaMka(x[c], x[d]);
x[b] = math.rotr(u64, x[b] ^ x[c], 24);
x[a] = fBlaMka(x[a], x[b]);
x[d] = math.rotr(u64, x[d] ^ x[a], 16);
x[c] = fBlaMka(x[c], x[d]);
x[b] = math.rotr(u64, x[b] ^ x[c], 63);
}
}
fn processBlockXor(out: *align(16) Block, in1: *const Block, in2: *const Block) void {
var t: Block = undefined;
for (&t, 0..) |*v, i| v.* = in1[i] ^ in2[i];
var i: usize = 0;
while (i < block_length) : (i += 16) blamka(t[i..][0..16]);
i = 0;
var buf: [16]u64 = undefined;
while (i < block_length / 8) : (i += 2) {
var j: usize = 0;
while (j < block_length / 8) : (j += 2) {
buf[j] = t[j * 8 + i];
buf[j + 1] = t[j * 8 + i + 1];
}
blamka(&buf);
j = 0;
while (j < block_length / 8) : (j += 2) {
t[j * 8 + i] = buf[j];
t[j * 8 + i + 1] = buf[j + 1];
}
}
for (t, 0..) |v, j| out[j] ^= in1[j] ^ in2[j] ^ v;
}
fn blake2bLong(out: []u8, in: []const u8) void {
const H = Blake2b512;
var outlen_bytes: [4]u8 = undefined;
mem.writeInt(u32, &outlen_bytes, @intCast(out.len), .little);
var out_buf: [H.digest_length]u8 = undefined;
if (out.len <= H.digest_length) {
var h = H.init(.{ .expected_out_bits = out.len * 8 });
h.update(&outlen_bytes);
h.update(in);
h.final(&out_buf);
@memcpy(out, out_buf[0..out.len]);
return;
}
var h = H.init(.{});
h.update(&outlen_bytes);
h.update(in);
h.final(&out_buf);
var out_slice = out;
out_slice[0 .. H.digest_length / 2].* = out_buf[0 .. H.digest_length / 2].*;
out_slice = out_slice[H.digest_length / 2 ..];
var in_buf: [H.digest_length]u8 = undefined;
while (out_slice.len > H.digest_length) {
in_buf = out_buf;
H.hash(&in_buf, &out_buf, .{});
out_slice[0 .. H.digest_length / 2].* = out_buf[0 .. H.digest_length / 2].*;
out_slice = out_slice[H.digest_length / 2 ..];
}
in_buf = out_buf;
H.hash(&in_buf, &out_buf, .{ .expected_out_bits = out_slice.len * 8 });
@memcpy(out_slice, out_buf[0..out_slice.len]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment