This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push: new afb6416234 [WebGPU] Handle device OOM in createBuffer (#17005) afb6416234 is described below commit afb64162342bc911cb101a5038139441cbbd8bbc Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Fri May 17 09:41:57 2024 -0700 [WebGPU] Handle device OOM in createBuffer (#17005) --- web/src/runtime.ts | 15 +++++++++++++++ web/src/webgpu.ts | 29 ++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/web/src/runtime.ts b/web/src/runtime.ts index ff4dce497d..080003b4f0 100644 --- a/web/src/runtime.ts +++ b/web/src/runtime.ts @@ -1014,6 +1014,7 @@ export class Instance implements Disposable { private asyncifyHandler: AsyncifyHandler; private initProgressCallback: Array<InitProgressCallback> = []; private rng: LinearCongruentialGenerator; + private deviceLostIsError = true; // whether device.lost is due to actual error or dispose() /** * Internal function(registered by the runtime) @@ -1107,11 +1108,14 @@ export class Instance implements Disposable { } dispose(): void { + this.deviceLostIsError = false; // prevent dispose to trigger device.lost error // order matters // ctx release goes back into lib. this.ctx.dispose(); this.lib.dispose(); + this.deviceLostIsError = true; } + /** * Obtain the runtime information in readable format. */ @@ -2094,6 +2098,17 @@ export class Instance implements Disposable { * @param device The given GPU device. */ initWebGPU(device: GPUDevice): void { + device.addEventListener("uncapturederror", (event) => { + console.error("A WebGPU error was not captured: ", event); + }); + + device.lost.then((info: any) => { + if (this.deviceLostIsError) { + console.error("Device lost, calling Instance.dispose(). Please initialize again. ", info); + this.dispose(); + } + }); + const webGPUContext = new WebGPUContext( this.memory, device ); diff --git a/web/src/webgpu.ts b/web/src/webgpu.ts index 55c53bb8d5..8d699c4c48 100644 --- a/web/src/webgpu.ts +++ b/web/src/webgpu.ts @@ -120,6 +120,29 @@ export async function detectGPUDevice(): Promise<GPUDeviceDetectOutput | undefin } } +/** + * Create GPU buffer with `createBuffer()` but with error catching; destroy if error caught. + * @param device The GPUDevice used to create a buffer. + * @param descriptor The GPUBufferDescriptor passed to `createBuffer()`. + * @returns The buffer created by `createBuffer()`. + * + * @note We treat any error occurred at `createBuffer()` fatal and expect the user to handle + * `device.destroy()` with `device.lost.then()`. + */ +function tryCreateBuffer(device: GPUDevice, descriptor: GPUBufferDescriptor) { + device.pushErrorScope("out-of-memory"); + device.pushErrorScope("validation"); + device.pushErrorScope("internal"); + + const buffer = device.createBuffer(descriptor); + + device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}}); + device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}}); + device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}}); + + return buffer; +} + const canvasRenderWGSL = ` @group(0) @binding(0) var my_sampler : sampler; @group(0) @binding(1) var my_texture : texture_2d<f32>; @@ -504,7 +527,7 @@ export class WebGPUContext { if (buffer == undefined) { // create uniform buffer - buffer = this.device.createBuffer({ + buffer = tryCreateBuffer(this.device, { size: allocSize, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, }); @@ -779,7 +802,7 @@ export class WebGPUContext { if (nbytes == 0) { nbytes = 1; } - const buffer = this.device.createBuffer({ + const buffer = tryCreateBuffer(this.device, { size: nbytes, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, }); @@ -833,7 +856,7 @@ export class WebGPUContext { nbytes: number ): void { // Perhaps it would be more useful to resuse a staging buffer? - const gpuTemp = this.device.createBuffer({ + const gpuTemp = tryCreateBuffer(this.device, { size: nbytes, usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, });