This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm-ffi.git


The following commit(s) were added to refs/heads/main by this push:
     new 369ff23  doc: Stable C ABI (#191)
369ff23 is described below

commit 369ff23f0581baf257c29a5cedacea9c6003e4b6
Author: Junru Shao <[email protected]>
AuthorDate: Sun Oct 26 16:35:10 2025 -0700

    doc: Stable C ABI (#191)
---
 docs/.rstcheck.cfg                                 |   2 +-
 docs/get_started/quickstart.rst                    |  17 +-
 docs/get_started/stable_c_abi.rst                  | 254 +++++++++++++++++++++
 docs/guides/stable_c_abi.md                        |  81 -------
 docs/index.rst                                     |   2 +-
 examples/quickstart/CMakeLists.txt                 |   2 +-
 examples/quickstart/load/load_cpp.cc               |  14 +-
 .../{quickstart => stable_c_abi}/CMakeLists.txt    |  43 ++--
 examples/stable_c_abi/README.md                    |  39 ++++
 examples/stable_c_abi/raw_compile.sh               |  46 ++++
 examples/stable_c_abi/run_all.sh                   |  27 +++
 examples/stable_c_abi/src/add_one_cpu.c            |  56 +++++
 examples/stable_c_abi/src/load.c                   | 120 ++++++++++
 13 files changed, 581 insertions(+), 122 deletions(-)

diff --git a/docs/.rstcheck.cfg b/docs/.rstcheck.cfg
index 29a712f..5d48d42 100644
--- a/docs/.rstcheck.cfg
+++ b/docs/.rstcheck.cfg
@@ -1,5 +1,5 @@
 [rstcheck]
 report_level = warning
-ignore_directives = automodule, autosummary, currentmodule, toctree, ifconfig, 
tab-set, collapse, tabs
+ignore_directives = automodule, autosummary, currentmodule, toctree, ifconfig, 
tab-set, collapse, tabs, dropdown
 ignore_roles = ref, cpp:class, cpp:func, py:func, c:macro
 ignore_languages = cpp, python
diff --git a/docs/get_started/quickstart.rst b/docs/get_started/quickstart.rst
index 6f1ce00..650b750 100644
--- a/docs/get_started/quickstart.rst
+++ b/docs/get_started/quickstart.rst
@@ -49,8 +49,6 @@ We can build a single shared library that works across:
 Write a Simple ``add_one``
 --------------------------
 
-.. _sec-cpp-source-code:
-
 Source Code
 ~~~~~~~~~~~
 
@@ -60,6 +58,8 @@ Suppose we implement a C++ function ``AddOne`` that performs 
elementwise ``y = x
 
   .. group-tab:: C++
 
+    .. _cpp_add_one_kernel:
+
     .. literalinclude:: ../../examples/quickstart/compile/add_one_cpu.cc
       :language: cpp
       :emphasize-lines: 8, 17
@@ -246,6 +246,8 @@ As shown in the :ref:`previous 
section<sec-use-across-framework>`, :py:func:`tvm
 and framework-independent ``add_one_cpu.so`` or ``add_one_cuda.so`` and can be 
used to incorporate it into all Python
 array frameworks that implement the standard `DLPack protocol 
<https://data-apis.org/array-api/2024.12/design_topics/data_interchange.html>`_.
 
+.. _cpp_load:
+
 C++
 ~~~
 
@@ -254,8 +256,15 @@ can be used directly in C/C++ with no Python dependency.
 
 .. literalinclude:: ../../examples/quickstart/load/load_cpp.cc
    :language: cpp
-   :start-after: [example.begin]
-   :end-before: [example.end]
+   :start-after: [main.begin]
+   :end-before: [main.end]
+
+.. dropdown:: Auxiliary Logics
+
+  .. literalinclude:: ../../examples/quickstart/load/load_cpp.cc
+    :language: cpp
+    :start-after: [aux.begin]
+    :end-before: [aux.end]
 
 Compile and run it with:
 
diff --git a/docs/get_started/stable_c_abi.rst 
b/docs/get_started/stable_c_abi.rst
new file mode 100644
index 0000000..f83db93
--- /dev/null
+++ b/docs/get_started/stable_c_abi.rst
@@ -0,0 +1,254 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+..
+..   http://www.apache.org/licenses/LICENSE-2.0
+..
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Stable C ABI
+============
+
+.. note::
+
+  All code used in this guide lives under
+  `examples/stable_c_abi 
<https://github.com/apache/tvm-ffi/tree/main/examples/stable_c_abi>`_.
+
+.. admonition:: Prerequisite
+   :class: hint
+
+   - Python: 3.9 or newer (for the ``tvm_ffi.config``/``tvm-ffi-config`` 
helpers)
+   - Compiler: C11-capable toolchain (GCC/Clang/MSVC)
+   - TVM-FFI installed via
+
+     .. code-block:: bash
+
+        pip install --reinstall --upgrade apache-tvm-ffi
+
+This guide introduces TVM-FFI's stable C ABI: a single, minimal and stable
+ABI that represents any cross-language calls, with DSL and ML compiler codegen
+in mind.
+
+TVM-FFI builds on the following key idea:
+
+.. _tvm_ffi_c_abi:
+
+.. admonition:: Key Idea: A Single C ABI for all Functions
+  :class: important
+
+  Every function call can be represented by a single stable C ABI:
+
+  .. code-block:: c
+
+      int tvm_ffi_c_abi(          // returns 0 on success; non-zero on failure
+        void*            handle,  // library handle
+        const TVMFFIAny* args,    // inputs: args[0 ... N - 1]
+        int              N,       // number of inputs
+        TVMFFIAny*       result,  // output: *result
+      );
+
+  where :cpp:class:`TVMFFIAny`, is a tagged union of all supported types, e.g. 
integers, floats, Tensors, strings, etc., and can be further extended to 
arbitrary user-defined types.
+
+Built on top of this stable C ABI, TVM-FFI defines a common C ABI protocol for 
all functions, and further provides an extensible, performant, and 
ecosystem-friendly open solution for all.
+
+The rest of this guide covers:
+
+- The stable C layout and calling convention of ``tvm_ffi_c_abi``;
+- C examples from both callee and caller side of this ABI.
+
+Stable C Layout
+---------------
+
+TVM-FFI's :ref:`C ABI <tvm_ffi_c_abi>` uses a stable layout for all the input 
and output arguments.
+
+Layout of :cpp:class:`TVMFFIAny`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:cpp:class:`TVMFFIAny` is a fixed-size (128-bit) tagged union that represents 
all supported types.
+
+- First 32 bits: type index indicating which value is stored (supports up to 
2^32 types).
+- Next 32 bits: reserved (used for flags in rare cases, e.g. small-string 
optimization).
+- Last 64 bits: payload that is either a 64-bit integer, a 64-bit 
floating-point number, or a pointer to a heap-allocated object.
+
+.. figure:: 
https://raw.githubusercontent.com/tlc-pack/web-data/refs/heads/main/images/tvm-ffi/stable-c-abi-layout-any.svg
+   :alt: Layout of the 128-bit Any tagged union
+   :name: fig:layout-any
+
+   Figure 1. Layout spec for the :cpp:class:`TVMFFIAny` tagged union.
+
+The following conventions apply when representing values in 
:cpp:class:`TVMFFIAny`:
+
+- Primitive types: the last 64 bits directly store the value, for example:
+
+  * Integers
+  * Floating-point numbers
+
+- Heap-allocated objects: the last 64 bits store a pointer to the actual 
object, for example:
+
+  * Managed tensor objects that follow `DLPack 
<https://data-apis.org/array-api/2024.12/design_topics/data_interchange.html#dlpack-an-in-memory-tensor-structure>`_
 (i.e. `DLTensor 
<https://dmlc.github.io/dlpack/latest/c_api.html#c.DLTensor>`_) layout.
+
+- Arbitrary objects: the type index identifies the concrete type, and the last 
64 bits store a pointer to a reference-counted object in TVM-FFI's object 
format, for example:
+
+  * :py:class:`tvm_ffi.Function`, representing all functions, such as 
Python/C++ functions/lambdas, etc.;
+  * :py:class:`tvm_ffi.Array` and :py:class:`tvm_ffi.Map` (list/dict 
containers of :cpp:class:`TVMFFIAny` values);
+  * Extending to up to 2^32 types is supported.
+
+Function Calling Convention
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Function calls in TVM-FFI share the same calling convention, 
:ref:`tvm_ffi_c_abi <tvm_ffi_c_abi>`, as described above.
+
+- ``handle: void*``: optional library/closure handle passed to the callee. For 
exported symbols this is typically ``NULL``; closures may use it to capture 
context.
+- ``args: TVMFFIAny*``: pointer to a contiguous array of input arguments.
+- ``num_args: int``: number of input arguments.
+- ``result: TVMFFIAny*``: out-parameter that receives the function result (use 
``kTVMFFINone`` for "no return value").
+
+.. figure:: 
https://raw.githubusercontent.com/tlc-pack/web-data/refs/heads/main/images/tvm-ffi/stable-c-abi-layout-func.svg
+   :alt: Layout and calling convention for tvm_ffi_c_abi
+   :name: fig:layout-func
+
+   Figure 2. Layout and calling convention of :ref:`tvm_ffi_c_abi 
<tvm_ffi_c_abi>`, where ``Any`` in this figure refers to :cpp:class:`TVMFFIAny`.
+
+
+Stability and Interoperability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Stability.** The pure C layout and the calling convention are stable across 
compiler versions and independent of host languages or frameworks.
+
+**Cross-language.** TVM-FFI implements this calling convention in multiple 
languages (C, C++, Python, Rust, ...), enabling code written in one language—or 
generated by a DSL targeting the ABI—to be called from another language.
+
+**Cross-framework.** TVM-FFI uses standard data structures such as `DLPack 
tensors 
<https://data-apis.org/array-api/2024.12/design_topics/data_interchange.html#dlpack-an-in-memory-tensor-structure>`_
 to represent arrays, so compiled functions can be used from any array 
framework that implements the DLPack protocol (NumPy, PyTorch, TensorFlow, 
CuPy, JAX, and others).
+
+
+Stable ABI in C Code
+--------------------
+
+.. hint::
+
+  You can build and run the examples either with raw compiler commands or with 
CMake.
+  Both approaches are demonstrated below.
+
+TVM FFI's :ref:`C ABI <tvm_ffi_c_abi>` is designed with DSL and ML compilers 
in mind. DSL codegen usually relies on MLIR, LLVM or low-level C as the 
compilation target, where no access to C++ features is available, and where 
stable C ABIs are preferred for simplicity and stability.
+
+This section shows how to write C code that follows the stable C ABI. 
Specifically, we provide two examples:
+
+- Callee side: A CPU ``add_one_cpu`` kernel in C that is equivalent to the 
:ref:`C++ example <cpp_add_one_kernel>`.
+- Caller side: A loader and runner in C that invokes the kernel, a direct C 
translation of the :ref:`C++ example <cpp_load>`.
+
+The C code is minimal and dependency-free, so it can serve as a direct 
reference for DSL compilers that want to expose or invoke kernels through the 
ABI.
+
+Callee: ``add_one_cpu`` Kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Below is a minimal ``add_one_cpu`` kernel in C that follows the stable C ABI. 
It has three steps:
+
+- **Step 1**. Extract input ``x`` and output ``y`` as DLPack tensors;
+- **Step 2**. Implement the kernel ``y = x + 1`` on CPU with a simple for-loop;
+- **Step 3**. Set the output result to ``result``.
+
+.. literalinclude:: ../../examples/stable_c_abi/src/add_one_cpu.c
+   :language: c
+   :start-after: [example.begin]
+   :end-before: [example.end]
+
+Build it with either approach:
+
+.. tabs::
+
+  .. group-tab:: Raw command
+
+    .. literalinclude:: ../../examples/stable_c_abi/raw_compile.sh
+      :language: bash
+      :start-after: [kernel.begin]
+      :end-before: [kernel.end]
+
+  .. group-tab:: CMake
+
+    .. code-block:: bash
+
+       cmake . -B build -DEXAMPLE_NAME="kernel" 
-DCMAKE_BUILD_TYPE=RelWithDebInfo
+       cmake --build build --config RelWithDebInfo
+
+
+**Compiler codegen.** This C code serves as a direct reference for DSL 
compilers. To emit a function that follows the stable C ABI, ensure the 
following:
+
+- Symbol naming: define the exported symbol name as ``__tvm_ffi_{func_name}``;
+- Type checking: check input types via :cpp:member:`TVMFFIAny::type_index`, 
then marshal inputs from :cpp:class:`TVMFFIAny` to the desired types;
+- Error handling: return 0 on success, or a non-zero code on failure. When an 
error occurs, set an error message via :cpp:func:`TVMFFIErrorSetRaisedFromCStr` 
or :cpp:func:`TVMFFIErrorSetRaisedFromCStrParts`.
+
+**C vs. C++.** Compared to the :ref:`C++ example <cpp_add_one_kernel>`, there 
are a few key differences:
+
+- The explicit marshalling in **Step 1** is only needed in C. In C++, 
templates hide these details.
+- The C++ macro :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC` (used to export 
``add_one_cpu``) is not needed in C, because this example directly defines the 
exported C symbol ``__tvm_ffi_add_one_cpu``.
+
+.. hint::
+
+  In TVM-FFI's C++ APIs, many invocables (functions, lambdas, functors) are 
automatically converted into the universal C ABI form by 
:cpp:class:`tvm::ffi::Function` and :cpp:class:`tvm::ffi::TypedFunction`.
+
+  Rule of thumb: if an invocable's arguments and result can be converted 
to/from :cpp:class:`tvm::ffi::Any` (the C++ equivalent of 
:cpp:class:`TVMFFIAny`), it can be wrapped as a universal C ABI function.
+
+
+Caller: Kernel Loader
+~~~~~~~~~~~~~~~~~~~~~
+
+Next, a minimal C loader invokes the ``add_one_cpu`` kernel. It is 
functionally identical to the :ref:`C++ example <cpp_load>` and performs:
+
+- **Step 1**. Load the shared library ``build/add_one_cpu.so`` that contains 
the kernel;
+- **Step 2**. Get function ``add_one_cpu`` from the library;
+- **Step 3**. Invoke the function with two `DLTensor 
<https://dmlc.github.io/dlpack/latest/c_api.html#c.DLTensor>`_ inputs ``x`` and 
``y``;
+
+.. literalinclude:: ../../examples/stable_c_abi/src/load.c
+   :language: c
+   :start-after: [main.begin]
+   :end-before: [main.end]
+
+
+.. dropdown:: Auxiliary Logics
+
+  .. literalinclude:: ../../examples/stable_c_abi/src/load.c
+    :language: c
+    :start-after: [aux.begin]
+    :end-before: [aux.end]
+
+Build and run the loader with either approach:
+
+.. tabs::
+
+  .. group-tab:: Raw command
+
+    .. literalinclude:: ../../examples/stable_c_abi/raw_compile.sh
+      :language: bash
+      :start-after: [load.begin]
+      :end-before: [load.end]
+
+  .. group-tab:: CMake
+
+    .. code-block:: bash
+
+       cmake . -B build -DEXAMPLE_NAME="load" -DCMAKE_BUILD_TYPE=RelWithDebInfo
+       cmake --build build --config RelWithDebInfo
+       build/load
+
+To call a function via the stable C ABI in C, idiomatically:
+
+- Convert input arguments to the :cpp:class:`TVMFFIAny` type;
+- Call the target function (e.g., ``add_one_cpu``) via 
:cpp:func:`TVMFFIFunctionCall`;
+- Optionally convert the output :cpp:class:`TVMFFIAny` back to the desired 
type, if the function returns a value.
+
+What's Next
+-----------
+
+**ABI specification.** See the complete ABI specification in 
:doc:`../concepts/abi_overview`.
+
+**Convenient compiler target.** The stable C ABI is a simple, portable codegen 
target for DSL compilers. Emit C that follows this ABI to integrate with 
TVM-FFI and call the result from multiple languages and frameworks. See 
:doc:`../guides/compiler_integration`.
+
+**Rich and extensible type system.** TVM-FFI supports a rich set of types in 
the stable C ABI: primitive types (integers, floats), DLPack tensors, strings, 
built-in reference-counted objects (functions, arrays, maps), and user-defined 
reference-counted objects. See :doc:`../guides/cpp_guide`.
diff --git a/docs/guides/stable_c_abi.md b/docs/guides/stable_c_abi.md
deleted file mode 100644
index ae46a8b..0000000
--- a/docs/guides/stable_c_abi.md
+++ /dev/null
@@ -1,81 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-# Stable C ABI
-
-**C ABI** is provided for easy low-level integration.
-
-For those who need to understand the low-level C ABI or are implementing
-compiler codegen, we also provided an example that is C only as follows:
-
-```c
-#include <tvm/ffi/c_api.h>
-#include <tvm/ffi/extra/c_env_api.h>
-
-// Helper to extract DLTensor from TVMFFIAny
-int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor** out) {
-  if (value->type_index == kTVMFFIDLTensorPtr) {
-    *out = (DLTensor*)(value->v_ptr);
-    return 0;
-  }
-  if (value->type_index != kTVMFFITensor) {
-    TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
-    return -1;
-  }
-  *out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
-  return 0;
-}
-
-// Raw C FFI function
-int __tvm_ffi_add_one_c(
-  void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result
-) {
-  DLTensor *x, *y;
-
-  // Extract tensor arguments
-  if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
-  if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;
-
-  // Get current stream for device synchronization (e.g., CUDA)
-  // not needed for CPU, just keep here for demonstration purpose
-  void* stream = TVMFFIEnvGetStream(x->device.device_type, 
x->device.device_id);
-
-  // Perform computation
-  for (int i = 0; i < x->shape[0]; ++i) {
-    ((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
-  }
-  return 0;  // Success
-}
-```
-
-To compile this code, you need to add 
{py:func}`tvm_ffi.libinfo.find_include_paths` to your include
-path and link the shared library that can be found through 
{py:func}`tvm_ffi.libinfo.find_libtvm_ffi`.
-We also provide command line tools to link, so you can compile with the 
following command:
-
-```bash
-gcc -shared -fPIC `tvm-ffi-config --cflags`  \
-    src/add_one_c.c -o build/add_one_c.so    \
-    `tvm-ffi-config --ldflags` `tvm-ffi-config --libs`
-```
-
-The main takeaway points are:
-
-- Function symbols follow name `int __tvm_ffi_<name>`
-- The function follows signature of `TVMFFISafeCallType`
-- Use `TVMFFIAny` to handle dynamic argument types
-- Return `0` for success, `-1` for error (set via 
`TVMFFIErrorSetRaisedFromCStr`)
-- This function can be compiled using a c compiler and loaded in the same one 
as
-  other libraries in this example.
diff --git a/docs/index.rst b/docs/index.rst
index f885799..14ec2ee 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -40,6 +40,7 @@ Table of Contents
    :caption: Get Started
 
    get_started/quickstart.rst
+   get_started/stable_c_abi.rst
 
 .. toctree::
    :maxdepth: 1
@@ -49,7 +50,6 @@ Table of Contents
    guides/cpp_guide.md
    guides/python_guide.md
    guides/rust_guide.md
-   guides/stable_c_abi.md
    guides/compiler_integration.md
    guides/build_from_source.md
 
diff --git a/examples/quickstart/CMakeLists.txt 
b/examples/quickstart/CMakeLists.txt
index 71b1b27..ba2d7a4 100644
--- a/examples/quickstart/CMakeLists.txt
+++ b/examples/quickstart/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 cmake_minimum_required(VERSION 3.20)
-project(tvm_ffi_example)
+project(quickstart LANGUAGES CXX)
 
 option(EXAMPLE_NAME "Which example to build 
('compile_cpu'/'compile_cuda'/'load_cpp')"
        "compile_cpu"
diff --git a/examples/quickstart/load/load_cpp.cc 
b/examples/quickstart/load/load_cpp.cc
index afa4343..b00db42 100644
--- a/examples/quickstart/load/load_cpp.cc
+++ b/examples/quickstart/load/load_cpp.cc
@@ -16,16 +16,13 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-// [example.begin]
+// [main.begin]
 // File: load/load_cpp.cc
 #include <tvm/ffi/container/tensor.h>
 #include <tvm/ffi/extra/module.h>
 
 namespace {
 namespace ffi = tvm::ffi;
-
-/************* Main logics *************/
-
 /*!
  * \brief Main logics of library loading and function calling.
  * \param x The input tensor.
@@ -39,9 +36,10 @@ void Run(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
   // Call the function
   add_one_cpu(x, y);
 }
-
+}  // namespace
+// [main.end]
 /************* Auxiliary Logics *************/
-
+// [aux.begin]
 /*!
  * \brief Allocate a 1D float32 `tvm::ffi::Tensor` on CPU from an braced 
initializer list.
  * \param data The input data.
@@ -65,8 +63,6 @@ ffi::Tensor Alloc1DTensor(std::initializer_list<float> data) {
   return x;
 }
 
-}  // namespace
-
 int main() {
   ffi::Tensor x = Alloc1DTensor({1, 2, 3, 4, 5});
   ffi::Tensor y = Alloc1DTensor({0, 0, 0, 0, 0});
@@ -79,4 +75,4 @@ int main() {
   std::cout << "]" << std::endl;
   return 0;
 }
-// [example.end]
+// [aux.end]
diff --git a/examples/quickstart/CMakeLists.txt 
b/examples/stable_c_abi/CMakeLists.txt
similarity index 63%
copy from examples/quickstart/CMakeLists.txt
copy to examples/stable_c_abi/CMakeLists.txt
index 71b1b27..81bbd1d 100644
--- a/examples/quickstart/CMakeLists.txt
+++ b/examples/stable_c_abi/CMakeLists.txt
@@ -15,11 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 cmake_minimum_required(VERSION 3.20)
-project(tvm_ffi_example)
+project(tvm_ffi_example LANGUAGES C)
 
-option(EXAMPLE_NAME "Which example to build 
('compile_cpu'/'compile_cuda'/'load_cpp')"
-       "compile_cpu"
-)
+option(EXAMPLE_NAME "Which example to build ('kernel'/'load')" "kernel")
 message(STATUS "Building example: ${EXAMPLE_NAME}")
 
 # Run `tvm_ffi.config --cmakedir` to find tvm-ffi package
@@ -35,9 +33,9 @@ execute_process(
 )
 find_package(tvm_ffi CONFIG REQUIRED)
 
-if (EXAMPLE_NAME STREQUAL "compile_cpu")
-  # Example 1. C++ `add_one`
-  add_library(add_one_cpu SHARED compile/add_one_cpu.cc)
+if (EXAMPLE_NAME STREQUAL "kernel")
+  # Example 1. `add_one_cpu` in C
+  add_library(add_one_cpu SHARED src/add_one_cpu.c)
   target_link_libraries(add_one_cpu PRIVATE tvm_ffi_header)
   target_link_libraries(add_one_cpu PRIVATE tvm_ffi_shared)
   set_target_properties(
@@ -45,31 +43,26 @@ if (EXAMPLE_NAME STREQUAL "compile_cpu")
     PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/"
                PREFIX ""
                SUFFIX ".so"
+               C_STANDARD 11
+               C_STANDARD_REQUIRED YES
+               C_EXTENSIONS NO
   )
-elseif (EXAMPLE_NAME STREQUAL "compile_cuda")
-  # Example 2. CUDA `add_one`
-  enable_language(CUDA)
-  add_library(add_one_cuda SHARED compile/add_one_cuda.cu)
-  target_link_libraries(add_one_cuda PRIVATE tvm_ffi_shared)
-  set_target_properties(
-    add_one_cuda
-    PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/"
-               PREFIX ""
-               SUFFIX ".so"
-  )
-elseif (EXAMPLE_NAME STREQUAL "load_cpp")
-  # Example 3. Load C++ shared library
-  add_executable(load_cpp load/load_cpp.cc)
-  target_link_libraries(load_cpp PRIVATE tvm_ffi_header)
-  target_link_libraries(load_cpp PRIVATE tvm_ffi_shared)
+elseif (EXAMPLE_NAME STREQUAL "load")
+  # Example 2. Load `add_one_cpu` shared library in C
+  add_executable(load src/load.c)
+  target_link_libraries(load PRIVATE tvm_ffi_header)
+  target_link_libraries(load PRIVATE tvm_ffi_shared)
   set_target_properties(
-    load_cpp
+    load
     PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/"
                PREFIX ""
                SUFFIX ""
+               C_STANDARD 11
+               C_STANDARD_REQUIRED YES
+               C_EXTENSIONS NO
   )
 else ()
   message(FATAL_ERROR "Unknown EXAMPLE_NAME option: ${EXAMPLE_NAME}. "
-                      "Expected: 'compile_cpu', 'compile_cuda', 'load_cpp'."
+                      "Expected: 'kernel' or 'load'."
   )
 endif ()
diff --git a/examples/stable_c_abi/README.md b/examples/stable_c_abi/README.md
new file mode 100644
index 0000000..3a93a7d
--- /dev/null
+++ b/examples/stable_c_abi/README.md
@@ -0,0 +1,39 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# Stable C ABI Code Example
+
+This directory contains all the source code for 
[tutorial](https://tvm.apache.org/ffi/get_started/stable_c_abi.html).
+
+## Compile and Distribute `add_one_cpu`
+
+To compile the C Example:
+
+```bash
+cmake . -B build -DEXAMPLE_NAME="kernel" -DCMAKE_BUILD_TYPE=RelWithDebInfo
+cmake --build build --config RelWithDebInfo
+```
+
+## Load the Distributed `add_one_cpu`
+
+To run library loading example in C:
+
+```bash
+cmake . -B build -DEXAMPLE_NAME="load" -DCMAKE_BUILD_TYPE=RelWithDebInfo
+cmake --build build --config RelWithDebInfo
+build/load
+```
diff --git a/examples/stable_c_abi/raw_compile.sh 
b/examples/stable_c_abi/raw_compile.sh
new file mode 100755
index 0000000..0a7c378
--- /dev/null
+++ b/examples/stable_c_abi/raw_compile.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# shellcheck disable=SC2046
+set -ex
+
+BUILD_DIR=build
+mkdir -p $BUILD_DIR
+
+# Example 1. Compile C++ `add_one_cpu.cc` to shared library `add_one_cpu.so`
+# [kernel.begin]
+gcc -shared -O3 -std=c11 src/add_one_cpu.c  \
+    -fPIC -fvisibility=hidden               \
+    $(tvm-ffi-config --cflags)              \
+    $(tvm-ffi-config --ldflags)             \
+    $(tvm-ffi-config --libs)                \
+    -o $BUILD_DIR/add_one_cpu.so
+# [kernel.end]
+
+# Example 2. Load and run `add_one_cpu.so` in C
+if [ -f "$BUILD_DIR/add_one_cpu.so" ]; then
+# [load.begin]
+gcc -fvisibility=hidden -O3 -std=c11        \
+    src/load.c                              \
+    $(tvm-ffi-config --cflags)              \
+    $(tvm-ffi-config --ldflags)             \
+    $(tvm-ffi-config --libs)                \
+    -Wl,-rpath,$(tvm-ffi-config --libdir)   \
+    -o build/load
+build/load
+# [load.end]
+fi
diff --git a/examples/stable_c_abi/run_all.sh b/examples/stable_c_abi/run_all.sh
new file mode 100755
index 0000000..a1c727e
--- /dev/null
+++ b/examples/stable_c_abi/run_all.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+set -ex
+
+# To compile `src/add_one_cpu.c` to shared library `build/add_one_cpu.so`
+cmake . -B build -DEXAMPLE_NAME="kernel" -DCMAKE_BUILD_TYPE=RelWithDebInfo
+cmake --build build --config RelWithDebInfo
+
+# To compile `src/load.c` to executable `build/load`
+cmake . -B build -DEXAMPLE_NAME="load" -DCMAKE_BUILD_TYPE=RelWithDebInfo
+cmake --build build --config RelWithDebInfo
+./build/load
diff --git a/examples/stable_c_abi/src/add_one_cpu.c 
b/examples/stable_c_abi/src/add_one_cpu.c
new file mode 100644
index 0000000..f273014
--- /dev/null
+++ b/examples/stable_c_abi/src/add_one_cpu.c
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// 
NOLINTBEGIN(bugprone-reserved-identifier,google-readability-braces-around-statements)
+
+#include <tvm/ffi/c_api.h>
+#include <tvm/ffi/extra/c_env_api.h>
+
+// clang-format off
+// [example.begin]
+// File: src/add_one_cpu.cc
+TVM_FFI_DLL int __tvm_ffi_add_one_cpu(void* handle, const TVMFFIAny* args, 
int32_t num_args,
+                                      TVMFFIAny* result) {
+  // Step 1. Extract inputs from `Any`
+  // Step 1.1. Extract `x := args[0]`
+  DLTensor* x;
+  if (args[0].type_index == kTVMFFIDLTensorPtr) x = (DLTensor*)(args[0].v_ptr);
+  else if (args[0].type_index == kTVMFFITensor) x = 
(DLTensor*)(args[0].v_c_str + sizeof(TVMFFIObject));
+  else { TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input"); 
return -1; }
+  // Step 1.2. Extract `y := args[1]`
+  DLTensor* y;
+  if (args[1].type_index == kTVMFFIDLTensorPtr) y = (DLTensor*)(args[1].v_ptr);
+  else if (args[1].type_index == kTVMFFITensor) y = 
(DLTensor*)(args[1].v_c_str + sizeof(TVMFFIObject));
+  else { TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor 
output"); return -1; }
+
+  // Step 2. Perform add one: y = x + 1
+  for (int64_t i = 0; i < x->shape[0]; ++i) {
+    ((float*)y->data)[i] = ((float*)x->data)[i] + 1.0f;
+  }
+
+  // Step 3. Return error code 0 (success)
+  //
+  // Note that `result` is not set, as the output is passed in via `y` 
argument,
+  // which is functionally similar to a Python function with signature:
+  //
+  //   def add_one(x: Tensor, y: Tensor) -> None: ...
+  return 0;
+}
+// [example.end]
+// clang-format on
+// 
NOLINTEND(bugprone-reserved-identifier,google-readability-braces-around-statements)
diff --git a/examples/stable_c_abi/src/load.c b/examples/stable_c_abi/src/load.c
new file mode 100644
index 0000000..5f207e9
--- /dev/null
+++ b/examples/stable_c_abi/src/load.c
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// 
NOLINTBEGIN(modernize-deprecated-headers,modernize-use-nullptr,bugprone-assignment-in-if-condition,modernize-loop-convert)
+// [main.begin]
+// File: src/load.c
+#include <stdio.h>
+#include <tvm/ffi/c_api.h>
+#include <tvm/ffi/extra/c_env_api.h>
+
+// Global functions are looked up during `Initialize` and deallocated during 
`Finalize`
+// - global function: "ffi.Module.load_from_file.so"
+static TVMFFIObjectHandle fn_load_module = NULL;
+// - global function: "ffi.ModuleGetFunction"
+static TVMFFIObjectHandle fn_get_function = NULL;
+
+int Run(DLTensor* x, DLTensor* y) {
+  int ret_code = 0;
+  TVMFFIAny call_args[3] = {};
+  TVMFFIAny mod = {.type_index = kTVMFFINone, .v_obj = NULL};
+  TVMFFIAny func = {.type_index = kTVMFFINone, .v_obj = NULL};
+  TVMFFIAny none = {.type_index = kTVMFFINone};  // ignore the return value
+
+  // Step 1. Load module
+  // Equivalent to:
+  //    mod = tvm::ffi::Module::LoadFromFile("build/add_one_cpu.so")
+  call_args[0] = (TVMFFIAny){.type_index = kTVMFFIRawStr, .v_c_str = 
"build/add_one_cpu.so"};
+  call_args[1] = (TVMFFIAny){.type_index = kTVMFFISmallStr, .v_int64 = 0};
+  if ((ret_code = TVMFFIFunctionCall(fn_load_module, call_args, 2, &mod))) 
goto _RAII;
+
+  // Step 2. Get function `add_one_cpu` from module
+  // Equivalent to:
+  //    func = mod->GetFunction("add_one_cpu", /*query_imports=*/false).value()
+  call_args[0] = (TVMFFIAny){.type_index = mod.type_index, .v_obj = mod.v_obj};
+  call_args[1] = (TVMFFIAny){.type_index = kTVMFFIRawStr, .v_c_str = 
"add_one_cpu"};
+  call_args[2] = (TVMFFIAny){.type_index = kTVMFFIBool, .v_int64 = 0};
+  if ((ret_code = TVMFFIFunctionCall(fn_get_function, call_args, 3, &func))) 
goto _RAII;
+
+  // Step 3. Call function `add_one_cpu(x, y)`
+  // Equivalent to:
+  //    func(x, y)
+  call_args[0] = (TVMFFIAny){.type_index = kTVMFFIDLTensorPtr, .v_ptr = x};
+  call_args[1] = (TVMFFIAny){.type_index = kTVMFFIDLTensorPtr, .v_ptr = y};
+  if ((ret_code = TVMFFIFunctionCall(func.v_ptr, call_args, 2, &none))) goto 
_RAII;
+
+_RAII:
+  if (mod.type_index >= kTVMFFIObject) TVMFFIObjectDecRef(mod.v_obj);
+  if (func.type_index >= kTVMFFIObject) TVMFFIObjectDecRef(func.v_obj);
+  if (none.type_index >= kTVMFFIObject) TVMFFIObjectDecRef(none.v_obj);
+  return ret_code;
+}
+// [main.end]
+
+/************* Auxiliary Logics *************/
+
+// [aux.begin]
+static inline int Initialize() {
+  int ret_code = 0;
+  TVMFFIByteArray name_load_module = {.data = "ffi.Module.load_from_file.so", 
.size = 28};
+  TVMFFIByteArray name_get_function = {.data = "ffi.ModuleGetFunction", .size 
= 21};
+  if ((ret_code = TVMFFIFunctionGetGlobal(&name_load_module, 
&fn_load_module))) return ret_code;
+  if ((ret_code = TVMFFIFunctionGetGlobal(&name_get_function, 
&fn_get_function))) return ret_code;
+  return 0;
+}
+
+static inline void Finalize(int ret_code) {
+  TVMFFIObjectHandle err = NULL;
+  TVMFFIErrorCell* cell = NULL;
+  if (fn_load_module) TVMFFIObjectDecRef(fn_load_module);
+  if (fn_get_function) TVMFFIObjectDecRef(fn_get_function);
+  if (ret_code) {
+    TVMFFIErrorMoveFromRaised(&err);
+    cell = (TVMFFIErrorCell*)((char*)(err) + sizeof(TVMFFIObject));
+    printf("%.*s: %.*s\n", (int)(cell->kind.size), cell->kind.data, 
(int)(cell->message.size),
+           cell->message.data);
+  }
+}
+
+int main() {
+  int ret_code = 0;
+  float x_data[5] = {1.0, 2.0, 3.0, 4.0, 5.0};
+  float y_data[5] = {0.0, 0.0, 0.0, 0.0, 0.0};
+  int64_t shape[1] = {5};
+  int64_t strides[1] = {1};
+  DLDataType f32 = {.code = kTVMFFIFloat, .bits = 32, .lanes = 1};
+  DLDevice cpu = {.device_type = kDLCPU, .device_id = 0};
+  DLTensor x = {//
+                .data = x_data, .device = cpu,      .ndim = 1,       .dtype = 
f32,
+                .shape = shape, .strides = strides, .byte_offset = 0};
+  DLTensor y = {//
+                .data = y_data, .device = cpu,      .ndim = 1,       .dtype = 
f32,
+                .shape = shape, .strides = strides, .byte_offset = 0};
+  if ((ret_code = Initialize())) goto _RAII;
+  if ((ret_code = Run(&x, &y))) goto _RAII;
+
+  printf("[ ");
+  for (int i = 0; i < 5; ++i) printf("%f ", y_data[i]);
+  printf("]\n");
+
+_RAII:
+  Finalize(ret_code);
+  return ret_code;
+}
+// [aux.end]
+// 
NOLINTEND(modernize-deprecated-headers,modernize-use-nullptr,bugprone-assignment-in-if-condition,modernize-loop-convert)


Reply via email to