This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new afb6416234 [WebGPU] Handle device OOM in createBuffer (#17005)
afb6416234 is described below

commit afb64162342bc911cb101a5038139441cbbd8bbc
Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com>
AuthorDate: Fri May 17 09:41:57 2024 -0700

    [WebGPU] Handle device OOM in createBuffer (#17005)
---
 web/src/runtime.ts | 15 +++++++++++++++
 web/src/webgpu.ts  | 29 ++++++++++++++++++++++++++---
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index ff4dce497d..080003b4f0 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -1014,6 +1014,7 @@ export class Instance implements Disposable {
   private asyncifyHandler: AsyncifyHandler;
   private initProgressCallback: Array<InitProgressCallback> = [];
   private rng: LinearCongruentialGenerator;
+  private deviceLostIsError = true;  // whether device.lost is due to actual 
error or dispose()
 
   /**
    * Internal function(registered by the runtime)
@@ -1107,11 +1108,14 @@ export class Instance implements Disposable {
   }
 
   dispose(): void {
+    this.deviceLostIsError = false;  // prevent dispose to trigger device.lost 
error
     // order matters
     // ctx release goes back into lib.
     this.ctx.dispose();
     this.lib.dispose();
+    this.deviceLostIsError = true;
   }
+
   /**
    * Obtain the runtime information in readable format.
    */
@@ -2094,6 +2098,17 @@ export class Instance implements Disposable {
    * @param device The given GPU device.
    */
   initWebGPU(device: GPUDevice): void {
+    device.addEventListener("uncapturederror", (event) => {
+      console.error("A WebGPU error was not captured: ", event);
+    });
+
+    device.lost.then((info: any) => {
+      if (this.deviceLostIsError) {
+        console.error("Device lost, calling Instance.dispose(). Please 
initialize again. ", info);
+        this.dispose();
+      }
+    });
+
     const webGPUContext = new WebGPUContext(
       this.memory, device
     );
diff --git a/web/src/webgpu.ts b/web/src/webgpu.ts
index 55c53bb8d5..8d699c4c48 100644
--- a/web/src/webgpu.ts
+++ b/web/src/webgpu.ts
@@ -120,6 +120,29 @@ export async function detectGPUDevice(): 
Promise<GPUDeviceDetectOutput | undefin
   }
 }
 
+/**
+ * Create GPU buffer with `createBuffer()` but with error catching; destroy if 
error caught.
+ * @param device The GPUDevice used to create a buffer.
+ * @param descriptor The GPUBufferDescriptor passed to `createBuffer()`.
+ * @returns The buffer created by `createBuffer()`.
+ *
+ * @note We treat any error occurred at `createBuffer()` fatal and expect the 
user to handle
+ *   `device.destroy()` with `device.lost.then()`.
+ */
+function tryCreateBuffer(device: GPUDevice, descriptor: GPUBufferDescriptor) {
+  device.pushErrorScope("out-of-memory");
+  device.pushErrorScope("validation");
+  device.pushErrorScope("internal");
+
+  const buffer = device.createBuffer(descriptor);
+
+  device.popErrorScope().then((error) => {if (error) {device.destroy(); 
console.error(error);}});
+  device.popErrorScope().then((error) => {if (error) {device.destroy(); 
console.error(error);}});
+  device.popErrorScope().then((error) => {if (error) {device.destroy(); 
console.error(error);}});
+
+  return buffer;
+}
+
 const canvasRenderWGSL = `
 @group(0) @binding(0) var my_sampler : sampler;
 @group(0) @binding(1) var my_texture : texture_2d<f32>;
@@ -504,7 +527,7 @@ export class WebGPUContext {
 
     if (buffer == undefined) {
       // create uniform buffer
-      buffer = this.device.createBuffer({
+      buffer = tryCreateBuffer(this.device, {
         size: allocSize,
         usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
       });
@@ -779,7 +802,7 @@ export class WebGPUContext {
     if (nbytes == 0) {
       nbytes = 1;
     }
-    const buffer = this.device.createBuffer({
+    const buffer = tryCreateBuffer(this.device, {
       size: nbytes,
       usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | 
GPUBufferUsage.COPY_DST,
     });
@@ -833,7 +856,7 @@ export class WebGPUContext {
     nbytes: number
   ): void {
     // Perhaps it would be more useful to resuse a staging buffer?
-    const gpuTemp = this.device.createBuffer({
+    const gpuTemp = tryCreateBuffer(this.device, {
       size: nbytes,
       usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
     });

Reply via email to