Created
September 12, 2024 20:35
-
-
Save monteslu/4f48ee02def3e146f2385b7d44a0715c to your computer and use it in GitHub Desktop.
single page compute shader in webgpu much faster than CPU equivalent.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta charset="UTF-8"> | |
<title>WebGPU Compute Shader Example</title> | |
</head> | |
<body> | |
<h1>WebGPU Compute Shader Example</h1> | |
<p id="output"></p> | |
<script type="module"> | |
if (!navigator.gpu) { | |
document.getElementById('output').innerText = "WebGPU not supported in this browser."; | |
} else { | |
run(); | |
} | |
async function run() { | |
const outputElement = document.getElementById('output'); | |
// Parameters | |
const numElements = 100000; // 100,000 elements | |
const iterations = 10000; // 10,000 iterations per element | |
// Generate rValues | |
const rValues = new Float32Array(numElements); | |
for (let i = 0; i < numElements; i++) { | |
rValues[i] = 3.5 + Math.random() * 0.5; // r between 3.5 and 4.0 | |
} | |
// CPU computation | |
outputElement.innerText = "Running CPU computation..."; | |
await new Promise(resolve => setTimeout(resolve, 100)); // Allow UI update | |
const cpuStartTime = performance.now(); | |
const cpuResults = cpuComputation(rValues, iterations); | |
const cpuEndTime = performance.now(); | |
const cpuTime = cpuEndTime - cpuStartTime; | |
outputElement.innerText += `\nCPU computation took ${cpuTime.toFixed(2)} ms`; | |
// GPU computation | |
outputElement.innerText += "\nRunning GPU computation..."; | |
await new Promise(resolve => setTimeout(resolve, 100)); // Allow UI update | |
const gpuStartTime = performance.now(); | |
const gpuResults = await gpuComputation(rValues, iterations); | |
const gpuEndTime = performance.now(); | |
const gpuTime = gpuEndTime - gpuStartTime; | |
outputElement.innerText += `\nGPU computation took ${gpuTime.toFixed(2)} ms`; | |
// Compare results (optional) | |
outputElement.innerText += "\nComparing results..."; | |
let maxDifference = 0; | |
for (let i = 0; i < numElements; i++) { | |
const diff = Math.abs(cpuResults[i] - gpuResults[i]); | |
if (diff > maxDifference) { | |
maxDifference = diff; | |
} | |
} | |
outputElement.innerText += `\nMaximum difference between CPU and GPU results: ${maxDifference}`; | |
} | |
function cpuComputation(rValues, iterations) { | |
const results = new Float32Array(rValues.length); | |
for (let i = 0; i < rValues.length; i++) { | |
let x = 0.5; | |
const r = rValues[i]; | |
for (let n = 0; n < iterations; n++) { | |
x = r * x * (1 - x); | |
} | |
results[i] = x; | |
} | |
return results; | |
} | |
async function gpuComputation(rValues, iterations) { | |
// Get WebGPU device | |
const adapter = await navigator.gpu.requestAdapter(); | |
const device = await adapter.requestDevice(); | |
// Create buffers | |
const rValuesBufferSize = rValues.byteLength; | |
const resultsBufferSize = rValues.byteLength; | |
const uniformBufferSize = 8; // Two u32 values | |
// Create and upload rValues buffer | |
const gpuBufferRValues = device.createBuffer({ | |
size: rValuesBufferSize, | |
usage: GPUBufferUsage.STORAGE, | |
mappedAtCreation: true | |
}); | |
new Float32Array(gpuBufferRValues.getMappedRange()).set(rValues); | |
gpuBufferRValues.unmap(); | |
// Create results buffer | |
const gpuBufferResults = device.createBuffer({ | |
size: resultsBufferSize, | |
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | |
}); | |
// Create uniform buffer | |
const uniformsData = new Uint32Array([rValues.length, iterations]); | |
const uniformBuffer = device.createBuffer({ | |
size: uniformBufferSize, | |
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST | |
}); | |
device.queue.writeBuffer(uniformBuffer, 0, uniformsData.buffer, uniformsData.byteOffset, uniformsData.byteLength); | |
// Create shader module | |
const shaderCode = ` | |
struct Uniforms { | |
numElements: u32, | |
iterations: u32, | |
}; | |
@group(0) @binding(0) var<uniform> uniforms: Uniforms; | |
@group(0) @binding(1) var<storage, read> rValues: array<f32>; | |
@group(0) @binding(2) var<storage, read_write> results: array<f32>; | |
@compute @workgroup_size(64) | |
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) { | |
let index = global_id.x; | |
if (index >= uniforms.numElements) { | |
return; | |
} | |
var x = 0.5; | |
let r = rValues[index]; | |
for (var i = 0u; i < uniforms.iterations; i = i + 1u) { | |
x = r * x * (1.0 - x); | |
} | |
results[index] = x; | |
} | |
`; | |
const shaderModule = device.createShaderModule({code: shaderCode}); | |
// Create compute pipeline | |
const computePipeline = device.createComputePipeline({ | |
layout: 'auto', | |
compute: { | |
module: shaderModule, | |
entryPoint: 'main' | |
} | |
}); | |
// Create bind group | |
const bindGroup = device.createBindGroup({ | |
layout: computePipeline.getBindGroupLayout(0), | |
entries: [ | |
{binding: 0, resource: {buffer: uniformBuffer}}, | |
{binding: 1, resource: {buffer: gpuBufferRValues}}, | |
{binding: 2, resource: {buffer: gpuBufferResults}}, | |
] | |
}); | |
// Create command encoder and pass | |
const commandEncoder = device.createCommandEncoder(); | |
const passEncoder = commandEncoder.beginComputePass(); | |
passEncoder.setPipeline(computePipeline); | |
passEncoder.setBindGroup(0, bindGroup); | |
const workgroupSize = 64; | |
const numWorkgroups = Math.ceil(rValues.length / workgroupSize); | |
passEncoder.dispatchWorkgroups(numWorkgroups); | |
passEncoder.end(); | |
// Copy results from GPU to CPU-readable buffer | |
const gpuReadBuffer = device.createBuffer({ | |
size: resultsBufferSize, | |
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ | |
}); | |
commandEncoder.copyBufferToBuffer( | |
gpuBufferResults, | |
0, | |
gpuReadBuffer, | |
0, | |
resultsBufferSize | |
); | |
// Submit commands | |
const gpuCommands = commandEncoder.finish(); | |
device.queue.submit([gpuCommands]); | |
// Wait for GPU to finish | |
await device.queue.onSubmittedWorkDone(); | |
// Read buffer | |
await gpuReadBuffer.mapAsync(GPUMapMode.READ); | |
const arrayBuffer = gpuReadBuffer.getMappedRange(); | |
const resultArray = new Float32Array(arrayBuffer.slice(0)); | |
gpuReadBuffer.unmap(); | |
return resultArray; | |
} | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment