Created
June 22, 2026 14:42
-
-
Save jedisct1/4f689c97c78e26d19c07d7f76df9f7d6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const std = @import("std"); | |
| const mem = std.mem; | |
| const math = std.math; | |
| const Blake2b512 = std.crypto.hash.blake2.Blake2b512; | |
| const block_length = 128; | |
| const sync_points = 4; | |
| const version = 0x13; | |
| /// One Argon2 memory block: 1 KiB, 16-byte aligned. | |
| pub const Block = [block_length]u64; | |
| const H0 = [Blake2b512.digest_length + 8]u8; | |
| /// Number of `Block`s the matrix holds for the given memory/parallelism, after | |
| /// Argon2's rounding. Size your buffer to this. | |
| pub fn blockCount(m_cost: u32, lanes: u32) u32 { | |
| return @max( | |
| m_cost / (sync_points * lanes) * (sync_points * lanes), | |
| 2 * sync_points * lanes, | |
| ); | |
| } | |
| /// Fill `blocks` with the Argon2d memory matrix and leave it filled — no final | |
| /// tag is extracted. `blocks.len` must equal `blockCount(m_cost, lanes)`. | |
| /// `tag_len` is the Argon2 output length folded into the seed (RandomX uses 0). | |
| pub fn fill( | |
| blocks: []align(16) Block, | |
| password: []const u8, | |
| salt: []const u8, | |
| t_cost: u32, | |
| m_cost: u32, | |
| lanes: u32, | |
| tag_len: u32, | |
| ) void { | |
| const memory = blockCount(m_cost, lanes); | |
| std.debug.assert(blocks.len == memory); | |
| @memset(mem.sliceAsBytes(blocks), 0); | |
| var h0 = initHash(password, salt, t_cost, m_cost, lanes, tag_len); | |
| initBlocks(blocks, &h0, memory, lanes); | |
| const lane_len = memory / lanes; | |
| const segment = lane_len / sync_points; | |
| var n: u32 = 0; | |
| while (n < t_cost) : (n += 1) { | |
| var slice: u32 = 0; | |
| while (slice < sync_points) : (slice += 1) { | |
| var lane: u32 = 0; | |
| while (lane < lanes) : (lane += 1) { | |
| fillSegment(blocks, lane_len, segment, lanes, n, slice, lane); | |
| } | |
| } | |
| } | |
| } | |
| fn initHash(password: []const u8, salt: []const u8, t_cost: u32, m_cost: u32, lanes: u32, tag_len: u32) H0 { | |
| var h0: H0 = undefined; | |
| var p: [24]u8 = undefined; | |
| var tmp: [4]u8 = undefined; | |
| var b2 = Blake2b512.init(.{}); | |
| mem.writeInt(u32, p[0..4], lanes, .little); | |
| mem.writeInt(u32, p[4..8], tag_len, .little); | |
| mem.writeInt(u32, p[8..12], m_cost, .little); | |
| mem.writeInt(u32, p[12..16], t_cost, .little); | |
| mem.writeInt(u32, p[16..20], version, .little); | |
| mem.writeInt(u32, p[20..24], 0, .little); // 0 = Argon2d | |
| b2.update(&p); | |
| mem.writeInt(u32, &tmp, @intCast(password.len), .little); | |
| b2.update(&tmp); | |
| b2.update(password); | |
| mem.writeInt(u32, &tmp, @intCast(salt.len), .little); | |
| b2.update(&tmp); | |
| b2.update(salt); | |
| mem.writeInt(u32, &tmp, 0, .little); // no secret | |
| b2.update(&tmp); | |
| mem.writeInt(u32, &tmp, 0, .little); // no associated data | |
| b2.update(&tmp); | |
| b2.final(h0[0..Blake2b512.digest_length]); | |
| return h0; | |
| } | |
| fn initBlocks(blocks: []align(16) Block, h0: *H0, memory: u32, lanes: u32) void { | |
| var block0: [1024]u8 = undefined; | |
| var lane: u32 = 0; | |
| while (lane < lanes) : (lane += 1) { | |
| const j = lane * (memory / lanes); | |
| mem.writeInt(u32, h0[Blake2b512.digest_length + 4 ..][0..4], lane, .little); | |
| for ([_]u32{ 0, 1 }) |index| { | |
| mem.writeInt(u32, h0[Blake2b512.digest_length..][0..4], index, .little); | |
| blake2bLong(&block0, h0); | |
| for (&blocks[j + index], 0..) |*v, i| { | |
| v.* = mem.readInt(u64, block0[i * 8 ..][0..8], .little); | |
| } | |
| } | |
| } | |
| } | |
| fn fillSegment(blocks: []align(16) Block, lane_len: u32, segment: u32, lanes: u32, n: u32, slice: u32, lane: u32) void { | |
| var index: u32 = if (n == 0 and slice == 0) 2 else 0; | |
| var offset = lane * lane_len + slice * segment + index; | |
| while (index < segment) : ({ | |
| index += 1; | |
| offset += 1; | |
| }) { | |
| var prev = offset -% 1; | |
| if (index == 0 and slice == 0) prev +%= lane_len; | |
| const random = blocks[prev][0]; | |
| const ref = indexAlpha(random, lane_len, segment, lanes, n, slice, lane, index); | |
| processBlockXor(&blocks[offset], &blocks[prev], &blocks[ref]); | |
| } | |
| } | |
| fn indexAlpha(rand: u64, lane_len: u32, segment: u32, lanes: u32, n: u32, slice: u32, lane: u32, index: u32) u32 { | |
| var ref_lane = @as(u32, @intCast(rand >> 32)) % lanes; | |
| if (n == 0 and slice == 0) ref_lane = lane; | |
| var m = 3 * segment; | |
| var s = ((slice + 1) % sync_points) * segment; | |
| if (lane == ref_lane) m += index; | |
| if (n == 0) { | |
| m = slice * segment; | |
| s = 0; | |
| if (slice == 0 or lane == ref_lane) m += index; | |
| } | |
| if (index == 0 or lane == ref_lane) m -= 1; | |
| var p = @as(u64, @as(u32, @truncate(rand))); | |
| p = (p * p) >> 32; | |
| p = (p * m) >> 32; | |
| return ref_lane * lane_len + @as(u32, @intCast((s + m - (p + 1)) % lane_len)); | |
| } | |
| fn fBlaMka(x: u64, y: u64) u64 { | |
| const xy = @as(u64, @as(u32, @truncate(x))) * @as(u64, @as(u32, @truncate(y))); | |
| return x +% y +% 2 *% xy; | |
| } | |
| fn blamka(x: *[16]u64) void { | |
| const rounds = [_][4]usize{ | |
| .{ 0, 4, 8, 12 }, .{ 1, 5, 9, 13 }, .{ 2, 6, 10, 14 }, .{ 3, 7, 11, 15 }, | |
| .{ 0, 5, 10, 15 }, .{ 1, 6, 11, 12 }, .{ 2, 7, 8, 13 }, .{ 3, 4, 9, 14 }, | |
| }; | |
| inline for (rounds) |r| { | |
| const a = r[0]; | |
| const b = r[1]; | |
| const c = r[2]; | |
| const d = r[3]; | |
| x[a] = fBlaMka(x[a], x[b]); | |
| x[d] = math.rotr(u64, x[d] ^ x[a], 32); | |
| x[c] = fBlaMka(x[c], x[d]); | |
| x[b] = math.rotr(u64, x[b] ^ x[c], 24); | |
| x[a] = fBlaMka(x[a], x[b]); | |
| x[d] = math.rotr(u64, x[d] ^ x[a], 16); | |
| x[c] = fBlaMka(x[c], x[d]); | |
| x[b] = math.rotr(u64, x[b] ^ x[c], 63); | |
| } | |
| } | |
| fn processBlockXor(out: *align(16) Block, in1: *const Block, in2: *const Block) void { | |
| var t: Block = undefined; | |
| for (&t, 0..) |*v, i| v.* = in1[i] ^ in2[i]; | |
| var i: usize = 0; | |
| while (i < block_length) : (i += 16) blamka(t[i..][0..16]); | |
| i = 0; | |
| var buf: [16]u64 = undefined; | |
| while (i < block_length / 8) : (i += 2) { | |
| var j: usize = 0; | |
| while (j < block_length / 8) : (j += 2) { | |
| buf[j] = t[j * 8 + i]; | |
| buf[j + 1] = t[j * 8 + i + 1]; | |
| } | |
| blamka(&buf); | |
| j = 0; | |
| while (j < block_length / 8) : (j += 2) { | |
| t[j * 8 + i] = buf[j]; | |
| t[j * 8 + i + 1] = buf[j + 1]; | |
| } | |
| } | |
| for (t, 0..) |v, j| out[j] ^= in1[j] ^ in2[j] ^ v; | |
| } | |
| fn blake2bLong(out: []u8, in: []const u8) void { | |
| const H = Blake2b512; | |
| var outlen_bytes: [4]u8 = undefined; | |
| mem.writeInt(u32, &outlen_bytes, @intCast(out.len), .little); | |
| var out_buf: [H.digest_length]u8 = undefined; | |
| if (out.len <= H.digest_length) { | |
| var h = H.init(.{ .expected_out_bits = out.len * 8 }); | |
| h.update(&outlen_bytes); | |
| h.update(in); | |
| h.final(&out_buf); | |
| @memcpy(out, out_buf[0..out.len]); | |
| return; | |
| } | |
| var h = H.init(.{}); | |
| h.update(&outlen_bytes); | |
| h.update(in); | |
| h.final(&out_buf); | |
| var out_slice = out; | |
| out_slice[0 .. H.digest_length / 2].* = out_buf[0 .. H.digest_length / 2].*; | |
| out_slice = out_slice[H.digest_length / 2 ..]; | |
| var in_buf: [H.digest_length]u8 = undefined; | |
| while (out_slice.len > H.digest_length) { | |
| in_buf = out_buf; | |
| H.hash(&in_buf, &out_buf, .{}); | |
| out_slice[0 .. H.digest_length / 2].* = out_buf[0 .. H.digest_length / 2].*; | |
| out_slice = out_slice[H.digest_length / 2 ..]; | |
| } | |
| in_buf = out_buf; | |
| H.hash(&in_buf, &out_buf, .{ .expected_out_bits = out_slice.len * 8 }); | |
| @memcpy(out_slice, out_buf[0..out_slice.len]); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment