Skip to content

Instantly share code, notes, and snippets.

@Bigfoot71
Created February 17, 2026 20:55
Show Gist options
  • Select an option

  • Save Bigfoot71/52bc3a075af40c48d17d009660364a6a to your computer and use it in GitHub Desktop.

Select an option

Save Bigfoot71/52bc3a075af40c48d17d009660364a6a to your computer and use it in GitHub Desktop.
Interleaved SSGI - v1
#!/usr/bin/env python3
import argparse
import math
import sys
import warnings
from math import gcd
import numpy as np
TAU = 2.0 * math.pi
def is_pow2(x: int) -> bool:
return x > 0 and (x & (x - 1)) == 0
def warn(msg: str) -> None:
warnings.warn(msg, RuntimeWarning, stacklevel=2)
def fmt_f(x: float) -> str:
return f"{x:.7f}".rstrip("0").rstrip(".")
def ring_cos(ring: int, ring_count: int, ring_power: float) -> float:
u = (ring + 0.5) / ring_count
return (1.0 - u) ** (1.0 / (ring_power + 1.0))
def rotate_z(x: float, y: float, ang: float) -> tuple[float, float]:
c = math.cos(ang)
s = math.sin(ang)
return (c * x - s * y, s * x + c * y)
def iter_dirs(azN: int, rgN: int, phN: int, ring_power: float):
"""
Yields (x,y,z) directions in the exact same indexing order:
index = phase*(azN*rgN) + ring*azN + az
"""
az_step = TAU / azN
ph_step = TAU / phN
for phase in range(phN):
ang = (phase + 0.5) * ph_step
for ring in range(rgN):
cosT = ring_cos(ring, rgN, ring_power)
sinT = math.sqrt(max(0.0, 1.0 - cosT * cosT))
for az in range(azN):
phi = az * az_step
x = math.cos(phi) * sinT
y = math.sin(phi) * sinT
z = cosT
x, y = rotate_z(x, y, ang)
inv_len = 1.0 / math.sqrt(x * x + y * y + z * z)
yield (x * inv_len, y * inv_len, z * inv_len)
def main() -> int:
ap = argparse.ArgumentParser(
description="Generate combined AZIM/RING/ROT direction LUT as GLSL or float16 .raw"
)
ap.add_argument("--format", choices=("glsl", "raw"), default="glsl", help="Output format (default: glsl)")
ap.add_argument("--out", default="-", help="Output file path. '-' = stdout for glsl (default: '-')")
# RAW layout: RGBA16F by default (alignment-friendly), optional RGB16F
g = ap.add_mutually_exclusive_group()
g.add_argument("--raw-rgb", action="store_true", help="RAW only: write RGB16F (3 half / entry)")
g.add_argument("--raw-rgba", action="store_true", help="RAW only: write RGBA16F (4 half / entry) [default]")
ap.add_argument("--tile-log2", type=int, default=2)
ap.add_argument("--azim-count", type=int, default=16)
ap.add_argument("--ring-count", type=int, default=4)
ap.add_argument("--azim-step", type=int, default=5)
ap.add_argument("--ring-step", type=int, default=3)
ap.add_argument("--ring-power", type=float, default=3.0)
ap.add_argument("--rot-phases", type=int, default=64)
ap.add_argument("--rot-bits", type=int, default=8)
args = ap.parse_args()
# Some validations
for name, v in [("AZIM_COUNT", args.azim_count), ("RING_COUNT", args.ring_count), ("ROT_PHASES", args.rot_phases)]:
if not is_pow2(v):
warn(f"{name} should be a power of two (got {v}). Output will still be generated.")
if gcd(args.azim_step, args.azim_count) != 1:
warn("AZIM_STEP should be coprime with AZIM_COUNT (sequence may not cycle nicely).")
if gcd(args.ring_step, args.ring_count) != 1:
warn("RING_STEP should be coprime with RING_COUNT (sequence may not cycle nicely).")
min_bits = int(math.log2(args.rot_phases)) if args.rot_phases > 0 else 0
if args.rot_bits < min_bits:
warn(f"ROT_BITS should be >= log2(ROT_PHASES) (need >= {min_bits}, got {args.rot_bits}).")
azN = args.azim_count
rgN = args.ring_count
phN = args.rot_phases
ring_stride = azN
phase_stride = azN * rgN
lut_size = azN * rgN * phN
dirs = iter_dirs(azN, rgN, phN, args.ring_power)
if args.format == "raw":
out_path = args.out if args.out != "-" else "dir_lut.raw"
# Default = RGBA unless explicitly --raw-rgb
channels = 3 if args.raw_rgb else 4
layout = "RGB" if channels == 3 else "RGBA"
bytes_per_entry = channels * 2 # float16 per channel
total_bytes = lut_size * bytes_per_entry
print("[LUT]", file=sys.stderr)
print(f" entries : {lut_size}", file=sys.stderr)
print(f" layout : {layout} float16 ({channels} half / entry)", file=sys.stderr)
print(f" bytes/entry : {bytes_per_entry}", file=sys.stderr)
print(f" total bytes : {total_bytes}", file=sys.stderr)
print(f" AZIM/RING/PHASE: {azN}/{rgN}/{phN}", file=sys.stderr)
print(f" strides : ring={ring_stride}, phase={phase_stride}", file=sys.stderr)
print(f" indexing : idx = phase*{phase_stride} + ring*{ring_stride} + az", file=sys.stderr)
print(f" out : {out_path}", file=sys.stderr)
chunk = []
chunk_limit = 8192 * channels # floats
with open(out_path, "wb") as f:
if channels == 3:
for x, y, z in dirs:
chunk.extend((x, y, z))
if len(chunk) >= chunk_limit:
np.array(chunk, dtype=np.float16).tofile(f)
chunk.clear()
else:
# RGBA16F aligned: W = 0.0 (unused)
for x, y, z in dirs:
chunk.extend((x, y, z, 0.0))
if len(chunk) >= chunk_limit:
np.array(chunk, dtype=np.float16).tofile(f)
chunk.clear()
if chunk:
np.array(chunk, dtype=np.float16).tofile(f)
return 0
# GLSL output
out = sys.stdout if args.out == "-" else open(args.out, "w", encoding="utf-8")
print("// ---- Generated AZIM/RING/ROT direction LUT ----", file=out)
print("// Indexing:", file=out)
print(f"// index = phase * {phase_stride} + ring * {ring_stride} + az;", file=out)
print("", file=out)
print(f"const uint LUT_AZIM_COUNT = {azN}u;", file=out)
print(f"const uint LUT_RING_COUNT = {rgN}u;", file=out)
print(f"const uint LUT_ROT_PHASES = {phN}u;", file=out)
print(f"const uint LUT_RING_STRIDE = {ring_stride}u;", file=out)
print(f"const uint LUT_PHASE_STRIDE = {phase_stride}u;", file=out)
print(f"const uint LUT_SIZE = {lut_size}u;", file=out)
print("", file=out)
print("const vec3 DIR_LUT[LUT_SIZE] = vec3[](", file=out)
first = True
for x, y, z in dirs:
if not first:
print(",", file=out)
first = False
print(f" vec3({fmt_f(x)}, {fmt_f(y)}, {fmt_f(z)})", end="", file=out)
print("\n);", file=out)
if out is not sys.stdout:
out.close()
return 0
if __name__ == "__main__":
raise SystemExit(main())
/*
MIT License
Copyright (c) 2026 Le Juez Victor
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
Screen-Space GI (raymarching) - Proof of Concept (no TAA / no temporal accumulation)
This shader traces a few screen-space rays per pixel and accumulates a cheap
indirect-light estimate from hit surfaces (diffuse + optional history term).
The key goal is maximum spatial stability without temporal reprojection.
We use a deterministic interleaved sampling pattern, plus a structured,
tile-stable rotation field to break structured artifacts.
--------------------------------------------------------------------------
PARAMETER OVERVIEW (constant parameters)
--------------------------------------------------------------------------
TILE_LOG2
Size of the interleaving tile in pixels (power of two).
2 => 4x4 tile. This defines the pixel-stable interleaved pattern.
AZIM_COUNT / RING_COUNT
Discrete hemisphere direction set size:
- AZIM_COUNT: number of azimuth slices (around the normal).
- RING_COUNT: number of elevation rings (from tight to wide).
Both are powers of two for cheap masking.
AZIM_STEP / RING_STEP
Step increments used to walk the direction set across samples.
They must be coprime with AZIM_COUNT / RING_COUNT so that the sequence
cycles through all directions instead of repeating early.
RING_POWER
Controls how much the rings concentrate near the normal.
Higher = more samples near the normal (tighter lobes).
Lower = more spread toward grazing directions.
ROT_PHASES / ROT_BITS
Structured rotation field used to rotate the local direction pattern.
- ROT_PHASES: number of quantized rotation angles (power of two).
- ROT_BITS: number of bits extracted from the hash (must be >= log2(ROT_PHASES)).
Rotation is tile-stable (same tiling as interleaving) to avoid shimmer.
Using masked low bits for the phase is intentional (helps avoid banding).
--------------------------------------------------------------------------
UNIFORMS (runtime knobs)
--------------------------------------------------------------------------
uSampleCount : rays per pixel (typical default: 2)
uMaxRaySteps : max raymarch iterations
uStepSize : step length in view space
uThickness : depth thickness test (view-space z gap)
uMaxDistance : max ray length (view space)
uFadeStart/End : depth-based fade for the final GI output
*/
#version 330 core
/* === Includes === */
#include "../include/blocks/view.glsl"
#include "../include/math.glsl"
/* === Varyings === */
noperspective in vec2 vTexCoord;
/* === Uniforms === */
uniform sampler2D uHistoryTex;
uniform sampler2D uDiffuseTex;
uniform sampler2D uNormalTex;
uniform sampler2D uDepthTex;
uniform int uSampleCount;
uniform int uMaxRaySteps;
uniform float uStepSize;
uniform float uThickness;
uniform float uMaxDistance;
uniform float uFadeStart;
uniform float uFadeEnd;
/* === Constant Parameters === */
// Interleaving / stability tile (power of two)
const uint TILE_LOG2 = 2u; // 2 => 4x4 tile
// Direction set (power of two counts)
const uint AZIM_COUNT = 16u;
const uint RING_COUNT = 4u;
// Must be coprime with AZIM_COUNT / RING_COUNT (so you cycle)
const uint AZIM_STEP = 5u;
const uint RING_STEP = 3u;
// Ring distribution shaping
const float RING_POWER = 3.0; // larger => tighter toward normal
// Rotation quantization (power of two)
const uint ROT_PHASES = 64u; // 32/64/128...
const uint ROT_BITS = 8u; // must be >= log2(ROT_PHASES)
/* === Derived Constants === */
const uint TILE_SIZE = 1u << TILE_LOG2;
const uint TILE_MASK = TILE_SIZE - 1u;
const uint AZIM_MASK = AZIM_COUNT - 1u;
const uint RING_MASK = RING_COUNT - 1u;
const uint ROT_MASK = ROT_PHASES - 1u;
const float AZIM_PHI_STEP = M_TAU / float(AZIM_COUNT);
const float ROT_ANG_STEP = M_TAU / float(ROT_PHASES);
/* === Outputs === */
out vec4 FragColor;
/* === Helper Functions === */
// Returns a stable "state" per tile cell: 0..(2^bits - 1)
// Used as a structured (non-random) rotation field.
uint TileCellHash(uvec2 cell, uint bits)
{
uint n = cell.x * 0x9E3779B9u + cell.y * 0xBB67AE85u;
return n >> (32u - bits);
}
// Stratified ring elevation (tight -> wide).
// Provides deterministic rings for any RING_COUNT, shaped by RING_POWER.
float RingCos(uint ring)
{
float u = (float(ring) + 0.5) / float(RING_COUNT); // (0..1)
return pow(1.0 - u, 1.0 / (RING_POWER + 1.0));
}
// Builds a tangent-space hemisphere direction from an azimuth index + ring index.
vec3 DirFromAzRing(uint az, uint ring)
{
float phi = float(az) * AZIM_PHI_STEP;
float cosT = RingCos(ring);
float sinT = sqrt(max(0.0, 1.0 - cosT * cosT));
return vec3(cos(phi) * sinT, sin(phi) * sinT, cosT);
}
// Z-axis rotation in tangent space (rotates around the local normal).
vec3 RotateAroundZ(vec3 v, float a)
{
float c = cos(a), s = sin(a);
return vec3(c * v.x - s * v.y, s * v.x + c * v.y, v.z);
}
// Simple linear screen-space raymarch.
// This is intentionally kept basic for clarity; any other traversal method can be
// substituted here without changing the sampling/distribution logic of this shader.
vec3 TraceRay(vec3 startViewPos, vec3 dirVS)
{
vec3 stepVS = dirVS * uStepSize;
float stepLenSq = dot(stepVS, stepVS);
float maxLenSq = uMaxDistance * uMaxDistance;
vec3 posVS = startViewPos + stepVS;
float distSq = stepLenSq;
vec2 hitUV = vec2(0.0);
bool hit = false;
for (int i = 1; i < uMaxRaySteps; i++)
{
if (distSq > maxLenSq) break;
vec2 uv = V_ViewToScreen(posVS);
if (V_OffScreen(uv)) break;
float sceneZ = -textureLod(uDepthTex, uv, 0).r;
float dz = sceneZ - posVS.z;
if (dz > 0.0 && dz < uThickness) {
hitUV = uv;
hit = true;
break;
}
posVS += stepVS;
distSq += stepLenSq;
}
if (!hit) return vec3(0.0);
vec3 hist = textureLod(uHistoryTex, hitUV, 0).rgb;
vec3 diff = textureLod(uDiffuseTex, hitUV, 0).rgb;
float distFade = 1.0 - smoothstep(0.0, uMaxDistance, sqrt(distSq));
return (diff + hist) * distFade;
}
/* === Main Program === */
void main()
{
// Early depth fade-out.
ivec2 pix = ivec2(gl_FragCoord.xy);
float depth = texelFetch(uDepthTex, pix, 0).r;
if (depth >= uFadeEnd) { FragColor = vec4(0.0); return; }
// G-buffer reconstruction.
vec3 Nvs = V_GetViewNormal(uNormalTex, pix);
vec3 Pvs = V_GetViewPosition(vTexCoord, depth);
mat3 TBN = M_OrthonormalBasis(Nvs);
// 1) Interleaved per-pixel base index (tile-local, perfectly stable in screen space).
// Every pixel in the tile gets a different base direction.
uint idx = (uint(pix.x) & TILE_MASK) | ((uint(pix.y) & TILE_MASK) << TILE_LOG2);
uint baseAz = idx & AZIM_MASK;
uint baseRing = idx & RING_MASK;
// 2) Structured rotation field (tile-stable).
// The rotation is computed per tile cell (same tiling as idx) for maximum stability.
// Masking to ROT_MASK yields a phase in [0..ROT_PHASES-1].
uvec2 cell = uvec2(pix) >> TILE_LOG2;
uint h = TileCellHash(cell, ROT_BITS) & ROT_MASK;
// 3) For each sample:
// - Walk the azimuth/ring sequence deterministically (stratified across samples).
// - Apply the tile-stable quantized rotation phase (offset by sample index).
// - Transform to view space and raymarch.
vec3 gi = vec3(0.0);
uint S = uint(max(uSampleCount, 1));
for (uint s = 0u; s < S; ++s)
{
uint az = (baseAz + s * AZIM_STEP) & AZIM_MASK;
uint ring = (baseRing + s * RING_STEP) & RING_MASK;
vec3 dirLocal = DirFromAzRing(az, ring);
uint phase = (h + s) & ROT_MASK;
float ang = (float(phase) + 0.5) * ROT_ANG_STEP;
dirLocal = RotateAroundZ(dirLocal, ang);
gi += TraceRay(Pvs, TBN * dirLocal);
}
// 4) Normalize, apply depth fade.
float fade = smoothstep(uFadeEnd, uFadeStart, depth);
FragColor = vec4(gi * (1.0 / float(S)) * fade, 1.0);
}
/*
MIT License
Copyright (c) 2026 Le Juez Victor
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#version 330 core
/* === Includes === */
#include "../include/blocks/view.glsl"
#include "../include/math.glsl"
/* === Varyings === */
noperspective in vec2 vTexCoord;
/* === Uniforms === */
uniform sampler2D uHistoryTex;
uniform sampler2D uDiffuseTex;
uniform sampler2D uNormalTex;
uniform sampler2D uDepthTex;
uniform sampler1D uLutTex;
uniform int uSampleCount;
uniform int uMaxRaySteps;
uniform float uStepSize;
uniform float uThickness;
uniform float uMaxDistance;
uniform float uFadeStart;
uniform float uFadeEnd;
/* === Constants === */
// These constants correspond to the default parameters of the LUT generator
const uint TILE_LOG2 = 2u;
const uint TILE_SIZE = 1u << TILE_LOG2;
const uint TILE_MASK = TILE_SIZE - 1u;
const uint AZIM_COUNT = 16u;
const uint RING_COUNT = 4u;
const uint AZIM_STEP = 5u;
const uint RING_STEP = 3u;
const uint ROT_PHASES = 64u;
const uint ROT_BITS = 8u;
const uint AZIM_MASK = AZIM_COUNT - 1u;
const uint RING_MASK = RING_COUNT - 1u;
const uint ROT_MASK = ROT_PHASES - 1u;
const uint LUT_RING_STRIDE = AZIM_COUNT;
const uint LUT_PHASE_STRIDE = AZIM_COUNT * RING_COUNT;
/* === Fragments === */
out vec4 FragColor;
/* === Helper Functions === */
uint TileCellHash(uvec2 cell, uint bits)
{
uint n = cell.x * 0x9E3779B9u + cell.y * 0xBB67AE85u;
return n >> (32u - bits);
}
vec3 DirFromLUT(uint phase, uint ring, uint az)
{
uint index = phase * LUT_PHASE_STRIDE + ring * LUT_RING_STRIDE + az;
return texelFetch(uLutTex, int(index), 0).xyz;
}
vec3 TraceRay(vec3 startViewPos, vec3 dirVS)
{
vec3 stepVS = dirVS * uStepSize;
float stepLenSq = dot(stepVS, stepVS);
float maxLenSq = uMaxDistance * uMaxDistance;
vec3 posVS = startViewPos + stepVS;
float distSq = stepLenSq;
vec2 hitUV = vec2(0.0);
bool hit = false;
for (int i = 1; i < uMaxRaySteps; i++)
{
if (distSq > maxLenSq) break;
vec2 uv = V_ViewToScreen(posVS);
if (V_OffScreen(uv)) break;
float sceneZ = -textureLod(uDepthTex, uv, 0).r;
float dz = sceneZ - posVS.z;
if (dz > 0.0 && dz < uThickness) {
hitUV = uv;
hit = true;
break;
}
posVS += stepVS;
distSq += stepLenSq;
}
if (!hit) return vec3(0.0);
vec3 hist = textureLod(uHistoryTex, hitUV, 0).rgb;
vec3 diff = textureLod(uDiffuseTex, hitUV, 0).rgb;
float distFade = 1.0 - smoothstep(0.0, uMaxDistance, sqrt(distSq));
return (diff + hist) * distFade;
}
/* === Main Program === */
void main()
{
ivec2 pix = ivec2(gl_FragCoord.xy);
float depth = texelFetch(uDepthTex, pix, 0).r;
if (depth >= uFadeEnd) { FragColor = vec4(0.0); return; }
vec3 Nvs = V_GetViewNormal(uNormalTex, pix);
vec3 Pvs = V_GetViewPosition(vTexCoord, depth);
mat3 TBN = M_OrthonormalBasis(Nvs);
uint idx = (uint(pix.x) & TILE_MASK) | ((uint(pix.y) & TILE_MASK) << TILE_LOG2);
uint baseAz = idx & AZIM_MASK;
uint baseRing = idx & RING_MASK;
uvec2 cell = uvec2(pix) >> TILE_LOG2;
uint h = TileCellHash(cell, ROT_BITS) & ROT_MASK;
vec3 gi = vec3(0.0);
for (int i = 0; i < uSampleCount; i++)
{
uint s = uint(i);
uint az = (baseAz + s * AZIM_STEP) & AZIM_MASK;
uint ring = (baseRing + s * RING_STEP) & RING_MASK;
uint phase = (h + s) & ROT_MASK;
vec3 dirLocal = DirFromLUT(phase, ring, az);
gi += TraceRay(Pvs, TBN * dirLocal);
}
float fade = smoothstep(uFadeEnd, uFadeStart, depth);
FragColor = vec4(gi * (1.0 / float(uSampleCount)) * fade, 1.0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment