CptPotato · May 13, 2021 18:34
diff --git a/shadow_3x3_pcf.hlsl b/shadow_3x3_pcf.hlsl
 // shade four shadow samples independently
 float4 shade_shadow_4(float4 occluder_depth, float4 receiver_depth, float2 bias)
 {
    return saturate((occluder_depth - receiver_depth - bias.x) / bias.y);
 }

 // pcf sampling with 3x3 box blur + bilinear filtering (16 samples)
 float sample_shadow_3x3(float2 shadow_uv, float2 shadow_res, float ref_depth, float2 bias)
 {
    float2 shadow_pixel = 1.0f / shadow_res; // pixel size
    float2 uv_px = shadow_uv * shadow_res; // uv in pixels
    float2 uv_min = floor(uv_px);
    float2 deltaf = uv_px - uv_min; // bilinear delta
    uv_min = uv_min * shadow_pixel; // lower bilinear uv
    
    // gather and shade a 4x4 sample area
    float4x4 kernel = float4x4(
        shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(-1, -1)), ref_depth, bias),
        shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(1, -1)), ref_depth, bias),
        shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(-1, 1)), ref_depth, bias),
        shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(1, 1)), ref_depth, bias)
    );
    
    // accumulate bilinear samples of each 3x3 area
    float center = kernel[0].y + kernel[1].x + kernel[2].z + kernel[3].w;
    float left = kernel[0].x + kernel[2].w;
    float top = kernel[0].z + kernel[1].w;
    float right = kernel[1].y + kernel[3].z;
    float bottom = kernel[2].y + kernel[3].x;
    
    float4 bilinear = float4(
        center + top + left + kernel[0].w,
        center + top + right + kernel[1].z,
        center + bottom + left + kernel[2].x,
        center + bottom + right + kernel[3].y
    );
    
    // blend bilinear samples & normalize
    return (1.0f / 9.0f) * lerp(lerp(bilinear.x, bilinear.y, deltaf.x), lerp(bilinear.z, bilinear.w, deltaf.x), deltaf.y);
 }

 /* Explanation:

 4 gather fetches result in a 4x4 area (here float4x4) with this addressing:

    kernel[n]              kernel[..][m]
 +---+---+---+---+        +---+---+---+---+
 | 0 | 0 | 1 | 1 |        | W | Z | W | Z |
 +---+---+---+---+        +---+---+---+---+
 | 0 | 0 | 1 | 1 |        | X | Y | X | Y |
 +---+---+---+---+        +---+---+---+---+
 | 2 | 2 | 3 | 3 |        | W | Z | W | Z |
 +---+---+---+---+        +---+---+---+---+
 | 2 | 2 | 3 | 3 |        | X | Y | X | Y |
 +---+---+---+---+        +---+---+---+---+

 Each sample is compared to the receiver's depth and shaded individually.
 Then accumulate into four smaller areas for bilinear filtering (one float for each):

    top left                 top right
 +---+---+---+---+        +---+---+---+---+
 | # | # | # |   |        |   | # | # | # |
 +---+---+---+---+        +---+---+---+---+
 | # | # | # |   |        |   | # | # | # |
 +---+---+---+---+        +---+---+---+---+
 | # | # | # |   |        |   | # | # | # |
 +---+---+---+---+        +---+---+---+---+
 |   |   |   |   |        |   |   |   |   |
 +---+---+---+---+        +---+---+---+---+

   bottom left              bottom right
 +---+---+---+---+        +---+---+---+---+
 |   |   |   |   |        |   |   |   |   |
 +---+---+---+---+        +---+---+---+---+
 | # | # | # |   |        |   | # | # | # |
 +---+---+---+---+        +---+---+---+---+
 | # | # | # |   |        |   | # | # | # |
 +---+---+---+---+        +---+---+---+---+
 | # | # | # |   |        |   | # | # | # |
 +---+---+---+---+        +---+---+---+---+

 Finish by bilinearly blending these four floats (blends all 9 taps at once).
 */
	// shade four shadow samples independently
	float4 shade_shadow_4(float4 occluder_depth, float4 receiver_depth, float2 bias)
	{
	return saturate((occluder_depth - receiver_depth - bias.x) / bias.y);
	}

	// pcf sampling with 3x3 box blur + bilinear filtering (16 samples)
	float sample_shadow_3x3(float2 shadow_uv, float2 shadow_res, float ref_depth, float2 bias)
	{
	float2 shadow_pixel = 1.0f / shadow_res; // pixel size
	float2 uv_px = shadow_uv * shadow_res; // uv in pixels
	float2 uv_min = floor(uv_px);
	float2 deltaf = uv_px - uv_min; // bilinear delta
	uv_min = uv_min * shadow_pixel; // lower bilinear uv

	// gather and shade a 4x4 sample area
	float4x4 kernel = float4x4(
	shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(-1, -1)), ref_depth, bias),
	shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(1, -1)), ref_depth, bias),
	shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(-1, 1)), ref_depth, bias),
	shade_shadow_4(tex_shadow.GatherRed(smp, uv_min, int2(1, 1)), ref_depth, bias)
	);

	// accumulate bilinear samples of each 3x3 area
	float center = kernel[0].y + kernel[1].x + kernel[2].z + kernel[3].w;
	float left = kernel[0].x + kernel[2].w;
	float top = kernel[0].z + kernel[1].w;
	float right = kernel[1].y + kernel[3].z;
	float bottom = kernel[2].y + kernel[3].x;

	float4 bilinear = float4(
	center + top + left + kernel[0].w,
	center + top + right + kernel[1].z,
	center + bottom + left + kernel[2].x,
	center + bottom + right + kernel[3].y
	);

	// blend bilinear samples & normalize
	return (1.0f / 9.0f) * lerp(lerp(bilinear.x, bilinear.y, deltaf.x), lerp(bilinear.z, bilinear.w, deltaf.x), deltaf.y);
	}

	/* Explanation:

	4 gather fetches result in a 4x4 area (here float4x4) with this addressing:

	kernel[n] kernel[..][m]
	+---+---+---+---+ +---+---+---+---+
	\| 0 \| 0 \| 1 \| 1 \| \| W \| Z \| W \| Z \|
	+---+---+---+---+ +---+---+---+---+
	\| 0 \| 0 \| 1 \| 1 \| \| X \| Y \| X \| Y \|
	+---+---+---+---+ +---+---+---+---+
	\| 2 \| 2 \| 3 \| 3 \| \| W \| Z \| W \| Z \|
	+---+---+---+---+ +---+---+---+---+
	\| 2 \| 2 \| 3 \| 3 \| \| X \| Y \| X \| Y \|
	+---+---+---+---+ +---+---+---+---+

	Each sample is compared to the receiver's depth and shaded individually.
	Then accumulate into four smaller areas for bilinear filtering (one float for each):

	top left top right
	+---+---+---+---+ +---+---+---+---+
	\| # \| # \| # \| \| \| \| # \| # \| # \|
	+---+---+---+---+ +---+---+---+---+
	\| # \| # \| # \| \| \| \| # \| # \| # \|
	+---+---+---+---+ +---+---+---+---+
	\| # \| # \| # \| \| \| \| # \| # \| # \|
	+---+---+---+---+ +---+---+---+---+
	\| \| \| \| \| \| \| \| \| \|
	+---+---+---+---+ +---+---+---+---+

	bottom left bottom right
	+---+---+---+---+ +---+---+---+---+
	\| \| \| \| \| \| \| \| \| \|
	+---+---+---+---+ +---+---+---+---+
	\| # \| # \| # \| \| \| \| # \| # \| # \|
	+---+---+---+---+ +---+---+---+---+
	\| # \| # \| # \| \| \| \| # \| # \| # \|
	+---+---+---+---+ +---+---+---+---+
	\| # \| # \| # \| \| \| \| # \| # \| # \|
	+---+---+---+---+ +---+---+---+---+

	Finish by bilinearly blending these four floats (blends all 9 taps at once).
	*/