Last active
March 2, 2025 16:31
-
-
Save jjrv/b9cc022faec13109a99691b2c38ceeff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2025- Juha Järvi | |
// | |
// Permission to use, copy, modify, and/or distribute this software for any | |
// purpose with or without fee is hereby granted. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
// PERFORMANCE OF THIS SOFTWARE. | |
const std = @import("std"); | |
// Repeat given bit mask to fill an unsigned 64-bit int. | |
inline fn splat64(Type: type, n: Type) u64 { | |
return @bitCast(@as(@Vector(64 / @typeInfo(Type).int.bits, Type), @splat(n))); | |
} | |
const sign_mask: u64 = splat64(u8, 1 << 7); | |
/// Branchless bit parallel varint decode, up to 64 bits input and 56 bits output. | |
/// Increments data pointer by number of consumed bytes and returns decoded value. | |
/// Always reads 8 bytes, make sure enough bytes are readable. | |
pub fn decodeVar(data: *[*]const u8, T: type) T { | |
// Read 8 unaligned bytes. | |
var n = std.mem.readInt(u64, data.*[0..8], .little); | |
// Varint ends when the high byte is zero, so mask off all bytes after | |
// (more significant than) such byte. | |
n &= (~n & sign_mask) - 1; | |
// Varint byte length is number of high bits set in included bytes, plus one. | |
// Equal alternative: data.* += ((n & sign_mask) >> 7) % 255 + 1; | |
data.* += @popCount(n & sign_mask) + 1; | |
// Pack 4 pairs of u8 together to remove now useless continuation bit in between. | |
var mask = splat64(u16, (1 << 7) - 1); | |
n = (n & mask) | ((n >> 1) & (mask << 7)); | |
// Pack 2 pairs of u16 to remove 2 useless bits in between. | |
mask = splat64(u32, (1 << 14) - 1); | |
n = (n & mask) | ((n >> 2) & (mask << 14)); | |
// Pack pair of u32 to remove 4 useless bits in between. | |
mask = (1 << 28) - 1; | |
n = (n & mask) | ((n >> 4) & (mask << 28)); | |
// If output is signed, zigzag decode sign from least significant bit. | |
if(@typeInfo(T).int.signedness == .signed) { | |
const s: i64 = @bitCast(n); | |
return @intCast((s >> 1) ^ -(s & 1)); | |
} | |
return @intCast(n); | |
} | |
/// Branchless bit parallel varint encode, up to 56 bits input and 64 bits output. | |
/// Always writes 8 bytes and returns number of valid bytes written, rest are zero. | |
/// Caller must ensure output buffer has enough space. | |
pub fn encodeVar(data: [*]u8, value: anytype) u8 { | |
var n: u64 = undefined; | |
const info = @typeInfo(@TypeOf(value)).int; | |
// If input is signed, zigzag encode sign to least significant bit. | |
if(info.signedness == .signed) { | |
n = @intCast((value >> (info.bits - 1)) ^ (value << 1)); | |
} else n = @intCast(value); | |
// Number of groups of 7 bits required to represent entire input number. | |
const len = @divFloor(70 - @clz(n | 1), 7); | |
// Split into 2 groups of 4x7 bits with a 4-bit gap. | |
var mask: u64 = (1 << 28) - 1; | |
n = (n & mask) | ((n << 4) & (mask << 32)); | |
// Split into 4 groups of 2x7 bits with 2-bit gaps. | |
mask = splat64(u32, (1 << 14) - 1); | |
n = (n & mask) | ((n << 2) & (mask << 16)); | |
// Split into 8 groups of 7 bits with 1-bit gaps and set the bits in the gaps. | |
mask = splat64(u16, (1 << 7) - 1); | |
n = (n & mask) | ((n << 1) & (mask << 8)) | sign_mask; | |
// Mask out everything but populated 7-bit groups and gaps in between. | |
n &= (@as(u64, 1) << @intCast(len * 8 - 1)) - 1; | |
// Write 8 unaligned bytes. | |
std.mem.writeInt(u64, data[0..8], n, .little); | |
return len; | |
} | |
test { | |
var buffer: [32]u8 = undefined; | |
var rng = std.Random.DefaultPrng.init(1); | |
const expectEqual = std.testing.expectEqual; | |
const rand = rng.random(); | |
inline for(1..57) |size| { | |
comptime var info = @typeInfo(i32); | |
info.int.bits = size; | |
const Type = @Type(info); | |
info.int.signedness = .unsigned; | |
const UType = @Type(info); | |
for(0..100) |_| { | |
const a = @as(u64, rand.int(UType)); | |
const b = @as(u64, rand.int(UType)); | |
const c = @as(i64, rand.int(Type)); | |
const d = @as(i64, rand.int(Type)); | |
// std.debug.print("{} {}\n", .{ a, b }); | |
var write_stream: [*]u8 = &buffer; | |
write_stream += encodeVar(write_stream, a); | |
write_stream += encodeVar(write_stream, b); | |
write_stream += encodeVar(write_stream, c); | |
write_stream += encodeVar(write_stream, d); | |
var read_stream: [*]const u8 = &buffer; | |
try expectEqual(a, decodeVar(&read_stream, u64)); | |
try expectEqual(b, decodeVar(&read_stream, u64)); | |
try expectEqual(c, decodeVar(&read_stream, i64)); | |
try expectEqual(d, decodeVar(&read_stream, i64)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment