Skip to content

Instantly share code, notes, and snippets.

@jjrv
Last active March 2, 2025 16:31
Show Gist options
  • Save jjrv/b9cc022faec13109a99691b2c38ceeff to your computer and use it in GitHub Desktop.
Save jjrv/b9cc022faec13109a99691b2c38ceeff to your computer and use it in GitHub Desktop.
// Copyright 2025- Juha Järvi
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.
const std = @import("std");
// Repeat given bit mask to fill an unsigned 64-bit int.
inline fn splat64(Type: type, n: Type) u64 {
return @bitCast(@as(@Vector(64 / @typeInfo(Type).int.bits, Type), @splat(n)));
}
const sign_mask: u64 = splat64(u8, 1 << 7);
/// Branchless bit parallel varint decode, up to 64 bits input and 56 bits output.
/// Increments data pointer by number of consumed bytes and returns decoded value.
/// Always reads 8 bytes, make sure enough bytes are readable.
pub fn decodeVar(data: *[*]const u8, T: type) T {
// Read 8 unaligned bytes.
var n = std.mem.readInt(u64, data.*[0..8], .little);
// Varint ends when the high byte is zero, so mask off all bytes after
// (more significant than) such byte.
n &= (~n & sign_mask) - 1;
// Varint byte length is number of high bits set in included bytes, plus one.
// Equal alternative: data.* += ((n & sign_mask) >> 7) % 255 + 1;
data.* += @popCount(n & sign_mask) + 1;
// Pack 4 pairs of u8 together to remove now useless continuation bit in between.
var mask = splat64(u16, (1 << 7) - 1);
n = (n & mask) | ((n >> 1) & (mask << 7));
// Pack 2 pairs of u16 to remove 2 useless bits in between.
mask = splat64(u32, (1 << 14) - 1);
n = (n & mask) | ((n >> 2) & (mask << 14));
// Pack pair of u32 to remove 4 useless bits in between.
mask = (1 << 28) - 1;
n = (n & mask) | ((n >> 4) & (mask << 28));
// If output is signed, zigzag decode sign from least significant bit.
if(@typeInfo(T).int.signedness == .signed) {
const s: i64 = @bitCast(n);
return @intCast((s >> 1) ^ -(s & 1));
}
return @intCast(n);
}
/// Branchless bit parallel varint encode, up to 56 bits input and 64 bits output.
/// Always writes 8 bytes and returns number of valid bytes written, rest are zero.
/// Caller must ensure output buffer has enough space.
pub fn encodeVar(data: [*]u8, value: anytype) u8 {
var n: u64 = undefined;
const info = @typeInfo(@TypeOf(value)).int;
// If input is signed, zigzag encode sign to least significant bit.
if(info.signedness == .signed) {
n = @intCast((value >> (info.bits - 1)) ^ (value << 1));
} else n = @intCast(value);
// Number of groups of 7 bits required to represent entire input number.
const len = @divFloor(70 - @clz(n | 1), 7);
// Split into 2 groups of 4x7 bits with a 4-bit gap.
var mask: u64 = (1 << 28) - 1;
n = (n & mask) | ((n << 4) & (mask << 32));
// Split into 4 groups of 2x7 bits with 2-bit gaps.
mask = splat64(u32, (1 << 14) - 1);
n = (n & mask) | ((n << 2) & (mask << 16));
// Split into 8 groups of 7 bits with 1-bit gaps and set the bits in the gaps.
mask = splat64(u16, (1 << 7) - 1);
n = (n & mask) | ((n << 1) & (mask << 8)) | sign_mask;
// Mask out everything but populated 7-bit groups and gaps in between.
n &= (@as(u64, 1) << @intCast(len * 8 - 1)) - 1;
// Write 8 unaligned bytes.
std.mem.writeInt(u64, data[0..8], n, .little);
return len;
}
test {
var buffer: [32]u8 = undefined;
var rng = std.Random.DefaultPrng.init(1);
const expectEqual = std.testing.expectEqual;
const rand = rng.random();
inline for(1..57) |size| {
comptime var info = @typeInfo(i32);
info.int.bits = size;
const Type = @Type(info);
info.int.signedness = .unsigned;
const UType = @Type(info);
for(0..100) |_| {
const a = @as(u64, rand.int(UType));
const b = @as(u64, rand.int(UType));
const c = @as(i64, rand.int(Type));
const d = @as(i64, rand.int(Type));
// std.debug.print("{} {}\n", .{ a, b });
var write_stream: [*]u8 = &buffer;
write_stream += encodeVar(write_stream, a);
write_stream += encodeVar(write_stream, b);
write_stream += encodeVar(write_stream, c);
write_stream += encodeVar(write_stream, d);
var read_stream: [*]const u8 = &buffer;
try expectEqual(a, decodeVar(&read_stream, u64));
try expectEqual(b, decodeVar(&read_stream, u64));
try expectEqual(c, decodeVar(&read_stream, i64));
try expectEqual(d, decodeVar(&read_stream, i64));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment