Skip to content

Instantly share code, notes, and snippets.

@Bloodwyn
Last active July 14, 2025 22:39
Show Gist options
  • Save Bloodwyn/08156a49ef335a990d3679c4fcc7ad87 to your computer and use it in GitHub Desktop.
Save Bloodwyn/08156a49ef335a990d3679c4fcc7ad87 to your computer and use it in GitHub Desktop.
Mesh Shader Triangulation
#include "Common.h"
/*
author: Bastian Kuth
description: A d3d12 mesh shader that receives a polygon of up to 32 points and triangulates it using ear clipping.
Threads of a wave perform the checks in parallel, so should be fairly fast.
Requires 32 wave size, so WARP or some intel hardware might not work.
how to run: Meant to be used with the WorkGraphPlayground: https://github.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground
licence: CC0 / Public Domain
comment this gist if you end up using it!
*/
static const int maxNumPoints = 32;
struct Polygon {
int numPoints;
float2 points[maxNumPoints];
};
struct PersistentState {
uint lastInputState;
Polygon polygon;
};
PersistentState LoadState(){
return PersistentScratchBuffer.Load<PersistentState>(0);
}
void StoreState(in const PersistentState data){
PersistentScratchBuffer.Store<PersistentState>(0, data);
}
struct Vertex {
float4 position : SV_POSITION;
};
bool PointInTriangle(float2 P, float2 A, float2 B, float2 C) {
float d1 = (P.x - B.x) * (A.y - B.y) - (A.x - B.x) * (P.y - B.y);
float d2 = (P.x - C.x) * (B.y - C.y) - (B.x - C.x) * (P.y - C.y);
float d3 = (P.x - A.x) * (C.y - A.y) - (C.x - A.x) * (P.y - A.y);
bool hasNeg = (d1 < 0) || (d2 < 0) || (d3 < 0);
bool hasPos = (d1 > 0) || (d2 > 0) || (d3 > 0);
return !(hasNeg && hasPos);
}
[Shader("node")]
[NodeLaunch("mesh")]
[NodeId("TriangleMeshNode", 0)]
[NodeDispatchGrid(1, 1, 1)]
[NumThreads(32, 1, 1)]
[WaveSize(32)]
[OutputTopology("triangle")]
void TriangleMeshShader(
DispatchNodeInputRecord<Polygon> inputRecord,
uint gtid : SV_GroupThreadID,
out indices uint3 outputIndices[30],
out vertices Vertex outputVertices[32])
{
const Polygon poly = inputRecord.Get();
// crashes your driver if not handled
if(poly.numPoints < 2) return;
SetMeshOutputCounts(poly.numPoints, poly.numPoints - 2);
if(gtid >= poly.numPoints) return;
// write points
Vertex v;
float2 tmp = (poly.points[gtid] / RenderSize) * 2 - 1;
v.position = float4(tmp.x, -tmp.y, 0.5, 1);
outputVertices[gtid] = v;
int posIdx = gtid;
int count = poly.numPoints;
for(int ti = 0; ti < (poly.numPoints - 3) && gtid < count; ++ti){
int prevThreadIdx = (gtid + count - 1) % count;
int nextThreadIdx = (gtid + + 1) % count;
int prevPosIdx = WaveReadLaneAt(posIdx, prevThreadIdx);
int nextPosIdx = WaveReadLaneAt(posIdx, nextThreadIdx);
// create ballot with 1s where poly is convex
float det = determinant(float2x2(poly.points[prevPosIdx] - poly.points[posIdx], poly.points[nextPosIdx] - poly.points[posIdx]));
uint convexBallot = WaveActiveBallot(det > 0).x;
while(convexBallot != 0){
// find first thread with convex point
int earThreadIdx = firstbitlow(convexBallot);
// I've been told broadcasts are fast, but can the compiler proof it? X Doubt, might need WaveReadLaneFirst
int earThreadPrevPosIdx = WaveReadLaneAt(prevPosIdx, earThreadIdx);
int earThreadPosIdx = WaveReadLaneAt( posIdx, earThreadIdx);
int earThreadNextPosIdx = WaveReadLaneAt(nextPosIdx, earThreadIdx);
// Check if any point is inside the ear
bool posInTriangle = PointInTriangle(poly.points[posIdx],
poly.points[earThreadPrevPosIdx],
poly.points[earThreadPosIdx],
poly.points[earThreadNextPosIdx]);
// Ignore points that make the ear
posInTriangle = posInTriangle &&
earThreadIdx != gtid &&
earThreadIdx != prevThreadIdx &&
earThreadIdx != nextThreadIdx;
// sync!
bool collides = WaveActiveAnyTrue(posInTriangle);
if(!collides){ // thread has an ear, write and clip it!
if(gtid == earThreadIdx) outputIndices[ti] = uint3(prevPosIdx, posIdx, nextPosIdx);
int j = (gtid < earThreadIdx) ? gtid : gtid+1;
posIdx = WaveReadLaneAt(posIdx, j);
--count;
break;
}
// thread has no valid ear, lets invalidate it
convexBallot = convexBallot ^ (1u << earThreadIdx);
}
}
uint3 tri = uint3(WaveReadLaneAt(posIdx, 0), WaveReadLaneAt(posIdx, 1), WaveReadLaneAt(posIdx, 2));
if(WaveIsFirstLane()) outputIndices[poly.numPoints - 3] = tri;
}
float4 TrianglePixelShader(float3 bary : SV_Barycentrics) : SV_TARGET { return float4(bary, 1); }
[Shader("node")]
[NodeIsProgramEntry]
[NodeLaunch("broadcasting")]
[NodeDispatchGrid(1,1,1)]
[NumThreads(1,1,1)]
void Entry(
[MaxRecords(1)] NodeOutput<Polygon> TriangleMeshNode
){
PersistentState state = LoadState();
// input handling
if(input::IsKeySpaceDown()){
state.polygon.numPoints = 0;
}
if(input::IsMouseLeftDown()){
int dragPoint = -1;
for(int i = 0; i < state.polygon.numPoints; ++i){
float d = distance(state.polygon.points[i], MousePosition);
if(d < 40){
dragPoint = i;
}
}
if(dragPoint != -1){
state.polygon.points[dragPoint] = MousePosition;
}else if(!(state.lastInputState & (1 << 0))){
if(state.polygon.numPoints < maxNumPoints){
state.polygon.points[state.polygon.numPoints++] = MousePosition;
}
}
}
state.lastInputState = InputState;
StoreState(state);
for(int i = 0; i < state.polygon.numPoints; ++i){
FillCircle(state.polygon.points[i], 5);
}
if(state.polygon.numPoints > 2){
ThreadNodeOutputRecords<Polygon> triangleRecord = TriangleMeshNode.GetThreadNodeOutputRecords(1);
triangleRecord.Get() = state.polygon;
triangleRecord.OutputComplete();
}
}
@Bloodwyn
Copy link
Author

Bloodwyn commented Jul 3, 2025

grafik

@seanisom
Copy link

Screenshot 2025-07-14 183520

We are using successfully to render roofs of 3D buildings!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment