3 BYTE TANGENT FRAMES + bitmask array for TBN sign. Doom Eternal 2020

3-Byte Tangent Frame Encoding (Doom Eternal 2020)

This method stores a full TBN frame using just 3 bytes per vertex (plus a bitfield), instead of the traditional 28 bytes (normal: vec3, tangent: vec4). It leverages octahedral encoding for normals and a rotation angle for tangent reconstruction.

Motivation

Storing both glm::vec3 normal and glm::vec4 tangent is costly. Since the bitangent can be reconstructed via a cross product, we only need:

A unit normal (encoded in 2 bytes)
An angle to rotate a base tangent around the normal (1 byte)
A sign bit for the handedness (stored in a bitfield)

CPU-Side Code (C++ with GLM + meshoptimizer)

uint32_t RoundUpDiv(uint32_t value, uint32_t div) noexcept {
    return (value + div - 1) / div;
}

glm::vec3 CalculateTangentBase(const glm::vec3& normal) noexcept {
    return glm::normalize((glm::abs(normal.x) > glm::abs(normal.z))
        ? glm::vec3{-normal.y, normal.x, 0.0f}
        : glm::vec3{0.0f, -normal.z, normal.y});
}

float EncodeTangentAngle(const glm::vec3& normal, const glm::vec3& tangent) noexcept {
    const glm::vec3 Tb = CalculateTangentBase(normal);
    return std::acos(glm::dot(tangent, Tb));
}

glm::vec2 OctWrap(const glm::vec2& v) noexcept {
    const glm::vec2 t = 1.0f - glm::abs(glm::vec2(v.y, v.x));
    return glm::vec2(v.x < 0.0f ? -t.x : t.x, v.y < 0.0f ? -t.y : t.y);
}

glm::vec2 EncodeOct(glm::vec3 n) noexcept {
    n /= (glm::abs(n.x) + glm::abs(n.y) + glm::abs(n.z));
    const glm::vec2 Nxy = glm::vec2(n.x, n.y);
    const glm::vec2 p = (n.z > 0.0f) ? Nxy : OctWrap(Nxy);
    return p * 0.5f + 0.5f;
}

Encoding Pass

// Given array of normals and tangents.
std::vector<glm::vec3> rawNormals;
std::vector<glm::vec4> rawTangents;

// Encoding normals into 2 bytes.
std::vector<uint16_t> encodedNormals(rawNormals.size());
for (size_t i = 0; i < rawNormals.size(); ++i) {
    const glm::vec2 onv = EncodeOct(rawNormals[i]);
    const uint8_t x = meshopt_quantizeSnorm(onv.x, 8) + 127;
    const uint8_t y = meshopt_quantizeSnorm(onv.y, 8) + 127;
    encodedNormals[i] = static_cast<uint16_t>((y << 8) | x);
}

// Encoding tanget rotation angle into 1 byte.
// Build tangent sign bit array.
std::vector<uint8_t> encodedAngles(rawTangents.size());
std::vector<uint8_t> tangentSigns(RoundUpDiv((uint32_t)rawTangents.size(), 8), 0);

for (size_t i = 0; i < rawTangents.size(); ++i) {
    const glm::vec4& t = rawTangents[i];
    if (t.w > 0.0f)
        tangentSigns[i / 8] |= (1u << (i % 8));

    const glm::vec3 normalizedNormal = glm::normalize(rawNormals[i]);
    const glm::vec3 T(t);
    const float angle = EncodeTangentAngle(normalizedNormal, T);
    encodedAngles[i] = glm::packUnorm1x8(angle);
}

GPU-Side Code (Slang/HLSL Style)

// And this is how you decode it given arrays of data on GPU.
const uint16_t* Normals;
const uint8_t* Tangents;
const uint8_t* TSigns;  // Bit array. 0 - negative, 1 - positive.

float UnpackUnorm1x8(in const uint8_t p) {
    return float(p) / 255.0f;
}

float3 DecodeOct(in float2 f) {
    f = f * 2.0f - 1.0f;
    float3 n = float3(f.x, f.y, 1.0f - abs(f.x) - abs(f.y));
    const float t = max(-n.z, 0.0f);
    n.xy += (n.z < 0.0f) ? float2(sign(n.x) * t, sign(n.y) * t) : float2(0.0f);
    return normalize(n);
}

float3 UnpackNormal(in const uint vertexIndex) {
    const uint16_t packed = Normals[vertexIndex];
    float2 f;
    f.x = float(packed & 0xFF) / 127.0f - 1.0f;
    f.y = float((packed >> 8) & 0xFF) / 127.0f - 1.0f;
    return normalize(DecodeOct(f));
}

float UnpackTSign(in const uint vertexIndex) {
    const uint byteIndex = vertexIndex / 8; // >> 3
    const uint bit = vertexIndex % 8; // & 7
    return (TSigns[byteIndex] & (1 << bit)) != 0 ? 1.0f : -1.0f;
}

float4 UnpackTangent(in const uint vertexIndex) {
    const float3 n = UnpackNormal(vertexIndex);
    const float3 Tb = abs(n.x) > abs(n.z)
        ? normalize(float3(-n.y, n.x, 0.0f))
        : normalize(float3(0.0f, -n.z, n.y));

    const float angle = UnpackUnorm1x8(Tangents[vertexIndex]);
    const float sign = UnpackTSign(vertexIndex);
    const float3 t = normalize(Tb * cos(angle) + cross(n, Tb) * sin(angle));
    return float4(t, sign);
}

✅ Summary

Efficient: Only 3 bytes + 1 bit per vertex
Portable: Works across CPU and GPU
Flexible: Full TBN reconstruction at runtime

wiseConst/3_byte_tangent_frames.md