This is an automated email from the ASF dual-hosted git repository.

ruihangl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 19395eedd4 [REFACTOR][FFI] Cleanup PackedFunc redirections (#18022)
19395eedd4 is described below

commit 19395eedd4888c93072861a04811e3fb6595bb2f
Author: Tianqi Chen <[email protected]>
AuthorDate: Thu May 29 15:57:47 2025 -0400

    [REFACTOR][FFI] Cleanup PackedFunc redirections (#18022)
    
    This PR cleans up the packed_func.h redirections
    - Move module related macro to module
    - remove most includes
    - TVM_DLL_EXPORT_TYPED_FUNC => TVM_FFI_DLL_EXPORT_TYPED_FUNC
    - Once downstream binding get updated, we can remove
    TVM_DLL_EXPORT_TYPED_FUNC
---
 3rdparty/cutlass_fpA_intB_gemm                     |   2 +-
 apps/hexagon_launcher/launcher_core.cc             |   1 -
 apps/hexagon_launcher/launcher_core.h              |   2 +-
 apps/ios_rpc/tvmrpc/RPCServer.mm                   |   1 -
 docs/arch/runtime.rst                              |   2 +-
 ffi/include/tvm/ffi/any.h                          |   6 +
 ffi/include/tvm/ffi/c_api.h                        |   7 ++
 ffi/include/tvm/ffi/function.h                     |  48 ++++++++
 include/tvm/ir/attrs.h                             |   2 +-
 include/tvm/ir/env_func.h                          |   6 +-
 include/tvm/ir/source_map.h                        |   1 -
 include/tvm/meta_schedule/builder.h                |   2 +-
 include/tvm/meta_schedule/cost_model.h             |   2 +-
 include/tvm/meta_schedule/database.h               |   2 +-
 include/tvm/meta_schedule/feature_extractor.h      |   2 +-
 include/tvm/meta_schedule/measure_callback.h       |   2 +-
 include/tvm/meta_schedule/mutator.h                |   2 +-
 include/tvm/meta_schedule/postproc.h               |   2 +-
 include/tvm/meta_schedule/profiler.h               |   2 +-
 include/tvm/meta_schedule/runner.h                 |   2 +-
 include/tvm/meta_schedule/schedule_rule.h          |   2 +-
 include/tvm/meta_schedule/search_strategy.h        |   2 +-
 include/tvm/meta_schedule/space_generator.h        |   2 +-
 include/tvm/meta_schedule/task_scheduler.h         |   2 +-
 include/tvm/meta_schedule/tune_context.h           |   2 +-
 include/tvm/node/reflection.h                      |   4 +-
 include/tvm/node/script_printer.h                  |   3 +-
 include/tvm/relax/dataflow_matcher.h               |   2 +-
 include/tvm/runtime/disco/disco_worker.h           |   2 +-
 include/tvm/runtime/disco/session.h                |   7 +-
 include/tvm/runtime/module.h                       |  77 +++++++++++-
 include/tvm/runtime/packed_func.h                  | 137 +--------------------
 include/tvm/runtime/profiling.h                    |   3 +-
 include/tvm/runtime/relax_vm/executable.h          |   2 +-
 include/tvm/runtime/relax_vm/vm.h                  |   2 +-
 include/tvm/script/printer/ir_docsifier_functor.h  |   2 +-
 include/tvm/support/span.h                         | 109 ----------------
 include/tvm/target/codegen.h                       |   2 +-
 include/tvm/tir/op_attr_types.h                    |   2 +-
 include/tvm/topi/utils.h                           |   2 +-
 python/tvm/relax/frontend/nn/extern.py             |   6 +-
 src/ir/op.cc                                       |   2 +-
 src/node/attr_registry.h                           |   2 +-
 src/node/serialization.cc                          |   1 -
 src/relax/backend/contrib/codegen_c/codegen_c.h    |   6 +-
 src/relax/backend/contrib/cutlass/codegen.cc       |   4 +-
 src/relax/transform/fold_constant.cc               |   1 +
 src/relax/transform/run_codegen.cc                 |   1 +
 src/runtime/const_loader_module.cc                 |   2 +-
 src/runtime/contrib/amx/amx_config.cc              |   1 -
 src/runtime/contrib/coreml/coreml_runtime.h        |   2 +-
 src/runtime/contrib/cutlass/fp16_group_gemm.cu     |   2 +-
 .../contrib/cutlass/fp8_blockwise_scaled_gemm.cu   |   2 +-
 src/runtime/contrib/cutlass/fp8_gemm.cu            |   2 +-
 src/runtime/contrib/cutlass/fp8_group_gemm.cu      |   2 +-
 src/runtime/contrib/cutlass/weight_preprocess.cc   |   1 -
 src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.h     |   2 +-
 src/runtime/contrib/mscclpp/allreduce.cu           |   2 +-
 src/runtime/contrib/nvshmem/init.cc                |   1 -
 src/runtime/contrib/nvshmem/memory_allocator.cc    |   1 -
 src/runtime/contrib/tflite/tflite_runtime.h        |   3 +-
 src/runtime/contrib/vllm/attention_kernels.cu      |   2 +-
 src/runtime/contrib/vllm/cache_kernels.cu          |   2 +-
 src/runtime/cuda/cuda_common.h                     |   2 +-
 src/runtime/debug_compile.cc                       |   4 +-
 src/runtime/device_api.cc                          |   7 +-
 src/runtime/disco/bcast_session.cc                 |   5 +-
 src/runtime/disco/bcast_session.h                  |   2 +-
 src/runtime/disco/builtin.cc                       |   1 -
 src/runtime/disco/disco_worker.cc                  |  15 ++-
 src/runtime/disco/disco_worker_thread.h            |   2 +-
 src/runtime/disco/loader.cc                        |   5 +-
 src/runtime/disco/message_queue.h                  |   4 +-
 src/runtime/disco/process_session.cc               |   7 +-
 src/runtime/disco/protocol.h                       |   3 +-
 src/runtime/disco/session.cc                       |   1 -
 src/runtime/disco/threaded_session.cc              |   4 +-
 src/runtime/dso_library.cc                         |   1 -
 src/runtime/hexagon/hexagon_buffer.h               |   2 +-
 src/runtime/hexagon/hexagon_common.h               |   3 +-
 src/runtime/hexagon/hexagon_thread_manager.h       |   2 +-
 src/runtime/hexagon/hexagon_vtcm_pool.h            |   2 +-
 src/runtime/hexagon/rpc/hexagon/rpc_server.cc      |   1 -
 src/runtime/hexagon/rpc/simulator/session.cc       |   1 -
 src/runtime/meta_data.h                            |   2 +-
 src/runtime/metal/metal_common.h                   |   2 +-
 src/runtime/metal/metal_module.h                   |   2 +-
 src/runtime/module.cc                              |   1 -
 src/runtime/opencl/opencl_common.h                 |   2 +-
 src/runtime/opencl/opencl_module.h                 |   2 +-
 src/runtime/pack_args.h                            |   2 +-
 src/runtime/packed_func.cc                         |  32 -----
 src/runtime/profiling.cc                           |  85 ++++++-------
 src/runtime/relax_vm/attn_backend.h                |   2 +-
 src/runtime/relax_vm/builtin.cc                    |  50 ++++----
 src/runtime/relax_vm/cuda/cuda_graph_builtin.cc    |   1 -
 src/runtime/relax_vm/hexagon/builtin.cc            |   1 -
 src/runtime/relax_vm/ndarray_cache_support.cc      |   4 +-
 src/runtime/relax_vm/paged_kv_cache.cc             |   2 +-
 src/runtime/relax_vm/vm.cc                         |  30 ++---
 src/runtime/rocm/rocm_common.h                     |   2 +-
 src/runtime/rpc/rpc_channel.cc                     |   2 +
 src/runtime/rpc/rpc_channel.h                      |   2 +-
 src/runtime/rpc/rpc_endpoint.cc                    |   1 -
 src/runtime/rpc/rpc_endpoint.h                     |   2 +-
 src/runtime/rpc/rpc_local_session.cc               |   3 +-
 src/runtime/rpc/rpc_local_session.h                |   2 +-
 src/runtime/rpc/rpc_session.cc                     |   8 +-
 src/runtime/rpc/rpc_session.h                      |   4 +-
 src/runtime/spirv/spirv_shader.h                   |   3 +-
 src/runtime/static_library.cc                      |   1 -
 src/runtime/thread_pool.cc                         |   1 -
 src/runtime/thread_storage_scope.h                 |   2 +-
 src/runtime/vulkan/vulkan_common.h                 |   1 -
 src/target/datatype/registry.cc                    |   1 +
 src/target/datatype/registry.h                     |   1 -
 src/target/llvm/llvm_module.cc                     |   1 -
 src/target/source/source_module.cc                 |   1 -
 src/target/spirv/ir_builder.h                      |   2 +-
 src/tir/transforms/make_packed_api.cc              |   1 +
 src/topi/broadcast.cc                              |   1 -
 src/topi/elemwise.cc                               |   1 -
 src/topi/nn.cc                                     |   1 -
 src/topi/reduction.cc                              |   1 -
 src/topi/transform.cc                              |   1 -
 src/topi/utils.cc                                  |   1 -
 src/topi/vision.cc                                 |   1 -
 tests/cpp-runtime/hexagon/run_all_tests.cc         |   1 -
 tests/cpp-runtime/hexagon/run_unit_tests.cc        |   1 -
 tests/cpp/llvm_codegen_registry_test.cc            |   1 -
 tests/python/relax/frontend_nn_extern_module.cc    |   6 +-
 131 files changed, 353 insertions(+), 510 deletions(-)

diff --git a/3rdparty/cutlass_fpA_intB_gemm b/3rdparty/cutlass_fpA_intB_gemm
index 3e07e778d7..412a22bded 160000
--- a/3rdparty/cutlass_fpA_intB_gemm
+++ b/3rdparty/cutlass_fpA_intB_gemm
@@ -1 +1 @@
-Subproject commit 3e07e778d78f0fcd047533c1fdaed571a68a396f
+Subproject commit 412a22bded6631d02fa40e3994a8096a5b8a6c7c
diff --git a/apps/hexagon_launcher/launcher_core.cc 
b/apps/hexagon_launcher/launcher_core.cc
index aebde97a51..56242082cc 100644
--- a/apps/hexagon_launcher/launcher_core.cc
+++ b/apps/hexagon_launcher/launcher_core.cc
@@ -21,7 +21,6 @@
 
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/c_backend_api.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <fstream>
 #include <ios>
diff --git a/apps/hexagon_launcher/launcher_core.h 
b/apps/hexagon_launcher/launcher_core.h
index ae9e4108cd..5e62774607 100644
--- a/apps/hexagon_launcher/launcher_core.h
+++ b/apps/hexagon_launcher/launcher_core.h
@@ -22,10 +22,10 @@
 
 #include <dlpack/dlpack.h>
 #include <dmlc/json.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/data_type.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <vector>
diff --git a/apps/ios_rpc/tvmrpc/RPCServer.mm b/apps/ios_rpc/tvmrpc/RPCServer.mm
index 4717d71032..da689d4c70 100644
--- a/apps/ios_rpc/tvmrpc/RPCServer.mm
+++ b/apps/ios_rpc/tvmrpc/RPCServer.mm
@@ -24,7 +24,6 @@
 #import "RPCServer.h"
 
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <random>
 #include <string>
diff --git a/docs/arch/runtime.rst b/docs/arch/runtime.rst
index 55c523cb4c..f1642827cd 100644
--- a/docs/arch/runtime.rst
+++ b/docs/arch/runtime.rst
@@ -52,7 +52,7 @@ The following code block provides an example in C++
 
 .. code:: c
 
-    #include <tvm/runtime/packed_func.h>
+    #include <tvm/ffi/function.h>
 
     void MyAdd(ffi::PackedArgs args, ffi::Any* rv) {
       // automatically convert arguments to desired type.
diff --git a/ffi/include/tvm/ffi/any.h b/ffi/include/tvm/ffi/any.h
index 7897d62898..5b56b003e8 100644
--- a/ffi/include/tvm/ffi/any.h
+++ b/ffi/include/tvm/ffi/any.h
@@ -583,5 +583,11 @@ struct AnyEqual {
 };
 
 }  // namespace ffi
+
+// Expose to the tvm namespace for usability
+// Rationale: no ambiguity even in root
+using tvm::ffi::Any;
+using tvm::ffi::AnyView;
+
 }  // namespace tvm
 #endif  // TVM_FFI_ANY_H_
diff --git a/ffi/include/tvm/ffi/c_api.h b/ffi/include/tvm/ffi/c_api.h
index df8265d0b9..996eaa369b 100644
--- a/ffi/include/tvm/ffi/c_api.h
+++ b/ffi/include/tvm/ffi/c_api.h
@@ -34,9 +34,14 @@
 #define TVM_FFI_WEAK __attribute__((weak))
 #endif
 
+// Defines two macros
+// TVM_FFI_DLL: marks the function as a DLL export/import
+//              depending on whether TVM_FFI_EXPORTS is defined
+// TVM_FFI_DLL_EXPORT: always marks the function as a DLL export
 #if !defined(TVM_FFI_DLL) && defined(__EMSCRIPTEN__)
 #include <emscripten/emscripten.h>
 #define TVM_FFI_DLL EMSCRIPTEN_KEEPALIVE
+#define TVM_FFI_DLL_EXPORT EMSCRIPTEN_KEEPALIVE
 #endif
 #if !defined(TVM_FFI_DLL) && defined(_MSC_VER)
 #ifdef TVM_FFI_EXPORTS
@@ -44,9 +49,11 @@
 #else
 #define TVM_FFI_DLL __declspec(dllimport)
 #endif
+#define TVM_FFI_DLL_EXPORT __declspec(dllexport)
 #endif
 #ifndef TVM_FFI_DLL
 #define TVM_FFI_DLL __attribute__((visibility("default")))
+#define TVM_FFI_DLL_EXPORT __attribute__((visibility("default")))
 #endif
 
 #ifdef __cplusplus
diff --git a/ffi/include/tvm/ffi/function.h b/ffi/include/tvm/ffi/function.h
index 753d4f50f1..128c67830e 100644
--- a/ffi/include/tvm/ffi/function.h
+++ b/ffi/include/tvm/ffi/function.h
@@ -910,6 +910,54 @@ inline int32_t TypeKeyToIndex(std::string_view type_key) {
 #define TVM_FFI_REGISTER_GLOBAL(OpName) \
   TVM_FFI_STR_CONCAT(TVM_FFI_FUNC_REG_VAR_DEF, __COUNTER__) = 
::tvm::ffi::Function::Registry(OpName)
 
+/*!
+ * \brief Export typed function as a SafeCallType symbol.
+ *
+ * \param ExportName The symbol name to be exported.
+ * \param Function The typed function.
+ * \note ExportName and Function must be different,
+ *       see code examples below.
+ *
+ * \sa ffi::TypedFunction
+ *
+ * \code
+ *
+ * int AddOne_(int x) {
+ *   return x + 1;
+ * }
+ *
+ * // Expose the function as "AddOne"
+ * TVM_FFI_DLL_EXPORT_TYPED_FUNC(AddOne, AddOne_);
+ *
+ * // Expose the function as "SubOne"
+ * TVM_FFI_DLL_EXPORT_TYPED_FUNC(SubOne, [](int x) {
+ *   return x - 1;
+ * });
+ *
+ * // The following code will cause compilation error.
+ * // Because the same Function and ExportName
+ * // TVM_FFI_DLL_EXPORT_TYPED_FUNC(AddOne_, AddOne_);
+ *
+ * // The following code is OK, assuming the macro
+ * // is in a different namespace from xyz
+ * // TVM_FFI_DLL_EXPORT_TYPED_FUNC(AddOne_, xyz::AddOne_);
+ *
+ * \endcode
+ */
+#define TVM_FFI_DLL_EXPORT_TYPED_FUNC(ExportName, Function)                    
    \
+  extern "C" {                                                                 
    \
+  TVM_FFI_DLL_EXPORT int ExportName(void* self, TVMFFIAny* args, int32_t 
num_args, \
+                                    TVMFFIAny* result) {                       
    \
+    TVM_FFI_SAFE_CALL_BEGIN();                                                 
    \
+    using FuncInfo = ::tvm::ffi::details::FunctionInfo<decltype(Function)>;    
    \
+    static std::string name = #ExportName;                                     
    \
+    ::tvm::ffi::details::unpack_call<typename FuncInfo::RetType>(              
    \
+        std::make_index_sequence<FuncInfo::num_args>{}, &name, Function,       
    \
+        reinterpret_cast<const ::tvm::ffi::AnyView*>(args), num_args,          
    \
+        reinterpret_cast<::tvm::ffi::Any*>(result));                           
    \
+    TVM_FFI_SAFE_CALL_END();                                                   
    \
+  }                                                                            
    \
+  }
 }  // namespace ffi
 }  // namespace tvm
 #endif  // TVM_FFI_FUNCTION_H_
diff --git a/include/tvm/ir/attrs.h b/include/tvm/ir/attrs.h
index dc8e99fceb..6378d6f74a 100644
--- a/include/tvm/ir/attrs.h
+++ b/include/tvm/ir/attrs.h
@@ -46,10 +46,10 @@
 
 #include <dmlc/common.h>
 #include <tvm/ffi/container/map.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ir/expr.h>
 #include <tvm/node/structural_equal.h>
 #include <tvm/node/structural_hash.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <functional>
 #include <string>
diff --git a/include/tvm/ir/env_func.h b/include/tvm/ir/env_func.h
index 52fab11636..ab5cf31c6c 100644
--- a/include/tvm/ir/env_func.h
+++ b/include/tvm/ir/env_func.h
@@ -24,8 +24,8 @@
 #ifndef TVM_IR_ENV_FUNC_H_
 #define TVM_IR_ENV_FUNC_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/node/reflection.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <utility>
@@ -142,8 +142,8 @@ class TypedEnvFunc<R(Args...)> : public ObjectRef {
     if constexpr (std::is_same_v<R, void>) {
       n->func(std::forward<Args>(args)...);
     } else {
-      Any res = n->func(std::forward<Args>(args)...);
-      if constexpr (std::is_same_v<R, Any>) {
+      ffi::Any res = n->func(std::forward<Args>(args)...);
+      if constexpr (std::is_same_v<R, ffi::Any>) {
         return res;
       } else {
         return std::move(res).cast<R>();
diff --git a/include/tvm/ir/source_map.h b/include/tvm/ir/source_map.h
index 2752d9951a..83e2f4f375 100644
--- a/include/tvm/ir/source_map.h
+++ b/include/tvm/ir/source_map.h
@@ -26,7 +26,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/node/node.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <fstream>
 #include <string>
diff --git a/include/tvm/meta_schedule/builder.h 
b/include/tvm/meta_schedule/builder.h
index 5ac1fd1753..24e136f9d3 100644
--- a/include/tvm/meta_schedule/builder.h
+++ b/include/tvm/meta_schedule/builder.h
@@ -21,13 +21,13 @@
 
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/container/map.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/module.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/target/target.h>
 
 namespace tvm {
diff --git a/include/tvm/meta_schedule/cost_model.h 
b/include/tvm/meta_schedule/cost_model.h
index a3386df5e9..300f53e113 100644
--- a/include/tvm/meta_schedule/cost_model.h
+++ b/include/tvm/meta_schedule/cost_model.h
@@ -21,13 +21,13 @@
 #define TVM_META_SCHEDULE_COST_MODEL_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
 #include <tvm/meta_schedule/arg_info.h>
 #include <tvm/meta_schedule/measure_candidate.h>
 #include <tvm/meta_schedule/runner.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/tir/schedule/schedule.h>
 
 #include <vector>
diff --git a/include/tvm/meta_schedule/database.h 
b/include/tvm/meta_schedule/database.h
index 15d92c4e3c..570da2cf06 100644
--- a/include/tvm/meta_schedule/database.h
+++ b/include/tvm/meta_schedule/database.h
@@ -20,13 +20,13 @@
 #define TVM_META_SCHEDULE_DATABASE_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/expr.h>
 #include <tvm/ir/module.h>
 #include <tvm/meta_schedule/arg_info.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/target/target.h>
 #include <tvm/tir/schedule/schedule.h>
 #include <tvm/tir/schedule/trace.h>
diff --git a/include/tvm/meta_schedule/feature_extractor.h 
b/include/tvm/meta_schedule/feature_extractor.h
index 8eecb2e582..e45cb4eab1 100644
--- a/include/tvm/meta_schedule/feature_extractor.h
+++ b/include/tvm/meta_schedule/feature_extractor.h
@@ -21,12 +21,12 @@
 #define TVM_META_SCHEDULE_FEATURE_EXTRACTOR_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
 #include <tvm/meta_schedule/measure_candidate.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace tvm {
 namespace meta_schedule {
diff --git a/include/tvm/meta_schedule/measure_callback.h 
b/include/tvm/meta_schedule/measure_callback.h
index 1b3bbb0f5c..3a3d83cbf9 100644
--- a/include/tvm/meta_schedule/measure_callback.h
+++ b/include/tvm/meta_schedule/measure_callback.h
@@ -21,6 +21,7 @@
 #define TVM_META_SCHEDULE_MEASURE_CALLBACK_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
 #include <tvm/meta_schedule/builder.h>
 #include <tvm/meta_schedule/measure_candidate.h>
@@ -29,7 +30,6 @@
 #include <tvm/meta_schedule/tune_context.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace tvm {
 namespace meta_schedule {
diff --git a/include/tvm/meta_schedule/mutator.h 
b/include/tvm/meta_schedule/mutator.h
index 525c812f90..0f8e446784 100644
--- a/include/tvm/meta_schedule/mutator.h
+++ b/include/tvm/meta_schedule/mutator.h
@@ -20,10 +20,10 @@
 #ifndef TVM_META_SCHEDULE_MUTATOR_H_
 #define TVM_META_SCHEDULE_MUTATOR_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/support/random_engine.h>
 #include <tvm/tir/schedule/schedule.h>
 #include <tvm/tir/schedule/trace.h>
diff --git a/include/tvm/meta_schedule/postproc.h 
b/include/tvm/meta_schedule/postproc.h
index 5a2b96caf8..e8648f038e 100644
--- a/include/tvm/meta_schedule/postproc.h
+++ b/include/tvm/meta_schedule/postproc.h
@@ -20,9 +20,9 @@
 #ifndef TVM_META_SCHEDULE_POSTPROC_H_
 #define TVM_META_SCHEDULE_POSTPROC_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/tir/schedule/schedule.h>
 
 namespace tvm {
diff --git a/include/tvm/meta_schedule/profiler.h 
b/include/tvm/meta_schedule/profiler.h
index b9905cf06f..6f8072b3f3 100644
--- a/include/tvm/meta_schedule/profiler.h
+++ b/include/tvm/meta_schedule/profiler.h
@@ -20,12 +20,12 @@
 #define TVM_META_SCHEDULE_PROFILER_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/module.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/target/target.h>
 
 #include <string>
diff --git a/include/tvm/meta_schedule/runner.h 
b/include/tvm/meta_schedule/runner.h
index 64974d16c6..c8331a3a60 100644
--- a/include/tvm/meta_schedule/runner.h
+++ b/include/tvm/meta_schedule/runner.h
@@ -20,13 +20,13 @@
 #define TVM_META_SCHEDULE_RUNNER_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/expr.h>
 #include <tvm/meta_schedule/arg_info.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace tvm {
 namespace meta_schedule {
diff --git a/include/tvm/meta_schedule/schedule_rule.h 
b/include/tvm/meta_schedule/schedule_rule.h
index 2d369656b0..1a759c1b50 100644
--- a/include/tvm/meta_schedule/schedule_rule.h
+++ b/include/tvm/meta_schedule/schedule_rule.h
@@ -22,12 +22,12 @@
 
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/container/map.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/expr.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/tir/schedule/schedule.h>
 
 namespace tvm {
diff --git a/include/tvm/meta_schedule/search_strategy.h 
b/include/tvm/meta_schedule/search_strategy.h
index ed08f5729e..c0b4677f84 100644
--- a/include/tvm/meta_schedule/search_strategy.h
+++ b/include/tvm/meta_schedule/search_strategy.h
@@ -20,6 +20,7 @@
 #define TVM_META_SCHEDULE_SEARCH_STRATEGY_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/meta_schedule/arg_info.h>
 #include <tvm/meta_schedule/cost_model.h>
@@ -28,7 +29,6 @@
 #include <tvm/meta_schedule/runner.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/tir/schedule/schedule.h>
 
 namespace tvm {
diff --git a/include/tvm/meta_schedule/space_generator.h 
b/include/tvm/meta_schedule/space_generator.h
index efdca187ca..4ba3c0b089 100644
--- a/include/tvm/meta_schedule/space_generator.h
+++ b/include/tvm/meta_schedule/space_generator.h
@@ -20,13 +20,13 @@
 #define TVM_META_SCHEDULE_SPACE_GENERATOR_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ir/module.h>
 #include <tvm/meta_schedule/mutator.h>
 #include <tvm/meta_schedule/postproc.h>
 #include <tvm/meta_schedule/schedule_rule.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/target/target.h>
 #include <tvm/tir/schedule/schedule.h>
 
diff --git a/include/tvm/meta_schedule/task_scheduler.h 
b/include/tvm/meta_schedule/task_scheduler.h
index 8c0e35f639..7bf36873b3 100644
--- a/include/tvm/meta_schedule/task_scheduler.h
+++ b/include/tvm/meta_schedule/task_scheduler.h
@@ -20,6 +20,7 @@
 #define TVM_META_SCHEDULE_TASK_SCHEDULER_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/meta_schedule/builder.h>
 #include <tvm/meta_schedule/cost_model.h>
@@ -28,7 +29,6 @@
 #include <tvm/meta_schedule/tune_context.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/support/random_engine.h>
 
 #include <string>
diff --git a/include/tvm/meta_schedule/tune_context.h 
b/include/tvm/meta_schedule/tune_context.h
index 24f06c9d5c..9045d4188a 100644
--- a/include/tvm/meta_schedule/tune_context.h
+++ b/include/tvm/meta_schedule/tune_context.h
@@ -21,6 +21,7 @@
 
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/container/map.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/expr.h>
@@ -31,7 +32,6 @@
 #include <tvm/meta_schedule/space_generator.h>
 #include <tvm/node/reflection.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/support/random_engine.h>
 #include <tvm/target/target.h>
 
diff --git a/include/tvm/node/reflection.h b/include/tvm/node/reflection.h
index ab197078f3..e56639570e 100644
--- a/include/tvm/node/reflection.h
+++ b/include/tvm/node/reflection.h
@@ -24,6 +24,7 @@
 #define TVM_NODE_REFLECTION_H_
 
 #include <tvm/ffi/container/map.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/memory.h>
 #include <tvm/node/structural_equal.h>
 #include <tvm/node/structural_hash.h>
@@ -31,7 +32,6 @@
 #include <tvm/runtime/data_type.h>
 #include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <type_traits>
@@ -164,7 +164,7 @@ class ReflectionVTable {
    * \param kwargs The field arguments.
    * \return The created object.
    */
-  TVM_DLL ObjectRef CreateObject(const std::string& type_key, const 
Map<String, Any>& kwargs);
+  TVM_DLL ObjectRef CreateObject(const std::string& type_key, const 
Map<String, ffi::Any>& kwargs);
   /*!
    * \brief Get an field object by the attr name.
    * \param self The pointer to the object.
diff --git a/include/tvm/node/script_printer.h 
b/include/tvm/node/script_printer.h
index 5ee6b177f0..721ae0932c 100644
--- a/include/tvm/node/script_printer.h
+++ b/include/tvm/node/script_printer.h
@@ -23,6 +23,7 @@
 #ifndef TVM_NODE_SCRIPT_PRINTER_H_
 #define TVM_NODE_SCRIPT_PRINTER_H_
 
+#include <tvm/ffi/any.h>
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/container/map.h>
 #include <tvm/ffi/string.h>
@@ -151,7 +152,7 @@ class PrinterConfigNode : public Object {
 
 class PrinterConfig : public ObjectRef {
  public:
-  explicit PrinterConfig(Map<String, Any> config_dict = Map<String, Any>());
+  explicit PrinterConfig(Map<String, ffi::Any> config_dict = Map<String, 
ffi::Any>());
 
   TVM_DEFINE_MUTABLE_NOTNULLABLE_OBJECT_REF_METHODS(PrinterConfig, 
runtime::ObjectRef,
                                                     PrinterConfigNode);
diff --git a/include/tvm/relax/dataflow_matcher.h 
b/include/tvm/relax/dataflow_matcher.h
index d5a0de0493..80359135c2 100644
--- a/include/tvm/relax/dataflow_matcher.h
+++ b/include/tvm/relax/dataflow_matcher.h
@@ -24,9 +24,9 @@
 #ifndef TVM_RELAX_DATAFLOW_MATCHER_H_
 #define TVM_RELAX_DATAFLOW_MATCHER_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/relax/dataflow_pattern.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 
diff --git a/include/tvm/runtime/disco/disco_worker.h 
b/include/tvm/runtime/disco/disco_worker.h
index c7aeb4e284..078c061b7b 100644
--- a/include/tvm/runtime/disco/disco_worker.h
+++ b/include/tvm/runtime/disco/disco_worker.h
@@ -25,8 +25,8 @@
 #ifndef TVM_RUNTIME_DISCO_DISCO_WORKER_H_
 #define TVM_RUNTIME_DISCO_DISCO_WORKER_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <vector>
 
diff --git a/include/tvm/runtime/disco/session.h 
b/include/tvm/runtime/disco/session.h
index 9d213312c8..0c1ed7ca0a 100644
--- a/include/tvm/runtime/disco/session.h
+++ b/include/tvm/runtime/disco/session.h
@@ -72,9 +72,10 @@
 #ifndef TVM_RUNTIME_DISCO_SESSION_H_
 #define TVM_RUNTIME_DISCO_SESSION_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/int_tuple.h>
+#include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <queue>
 #include <string>
@@ -243,7 +244,7 @@ class SessionObj : public Object {
    * \param value The value to be set.
    * \param worker_id The id of the worker to be set.
    */
-  TVM_DLL virtual void DebugSetRegister(int64_t reg_id, AnyView value, int 
worker_id) = 0;
+  TVM_DLL virtual void DebugSetRegister(int64_t reg_id, ffi::AnyView value, 
int worker_id) = 0;
 
   struct FFI;
   friend struct SessionObj::FFI;
@@ -338,7 +339,7 @@ template <typename... Args>
 DRef SessionObj::CallPacked(const DRef& func, Args&&... args) {
   constexpr int offset = 3;
   constexpr int kNumArgs = offset + sizeof...(Args);
-  AnyView packed_args[kNumArgs];
+  ffi::AnyView packed_args[kNumArgs];
   ffi::PackedArgs::Fill(packed_args,
                         /*.0=*/static_cast<int>(DiscoAction::kCallPacked),  // 
action
                         /*.1=*/0,     // reg_id, which will be updated by 
this->CallWithPacked
diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h
index c02e312b71..705fb276d9 100644
--- a/include/tvm/runtime/module.h
+++ b/include/tvm/runtime/module.h
@@ -37,6 +37,7 @@
 #include <mutex>
 #include <string>
 #include <unordered_map>
+#include <utility>
 #include <vector>
 
 namespace tvm {
@@ -325,8 +326,80 @@ inline std::ostream& operator<<(std::ostream& out, const 
Module& module) {
   return out;
 }
 
+namespace details {
+
+template <typename T>
+struct ModuleVTableEntryHelper {};
+
+template <typename T, typename R, typename... Args>
+struct ModuleVTableEntryHelper<R (T::*)(Args...) const> {
+  using MemFnType = R (T::*)(Args...) const;
+  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
+    auto wrapped = [self, f](Args... args) -> R { return 
(self->*f)(std::forward<Args>(args)...); };
+    ffi::details::unpack_call<R>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
+                                 args.data(), args.size(), rv);
+  }
+};
+
+template <typename T, typename R, typename... Args>
+struct ModuleVTableEntryHelper<R (T::*)(Args...)> {
+  using MemFnType = R (T::*)(Args...);
+  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
+    auto wrapped = [self, f](Args... args) -> R { return 
(self->*f)(std::forward<Args>(args)...); };
+    ffi::details::unpack_call<R>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
+                                 args.data(), args.size(), rv);
+  }
+};
+
+template <typename T, typename... Args>
+struct ModuleVTableEntryHelper<void (T::*)(Args...) const> {
+  using MemFnType = void (T::*)(Args...) const;
+  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
+    auto wrapped = [self, f](Args... args) -> void { 
(self->*f)(std::forward<Args>(args)...); };
+    
ffi::details::unpack_call<void>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
+                                    args.data(), args.size(), rv);
+  }
+};
+
+template <typename T, typename... Args>
+struct ModuleVTableEntryHelper<void (T::*)(Args...)> {
+  using MemFnType = void (T::*)(Args...);
+  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
+    auto wrapped = [self, f](Args... args) -> void { 
(self->*f)(std::forward<Args>(args)...); };
+    
ffi::details::unpack_call<void>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
+                                    args.data(), args.size(), rv);
+  }
+};
+}  // namespace details
 }  // namespace runtime
 }  // namespace tvm
 
-#include <tvm/runtime/packed_func.h>  // NOLINT(*)
-#endif                                // TVM_RUNTIME_MODULE_H_
+#define TVM_MODULE_VTABLE_BEGIN(TypeKey)                                       
             \
+  const char* type_key() const final { return TypeKey; }                       
             \
+  ffi::Function GetFunction(const String& _name, const ObjectPtr<Object>& 
_self) override { \
+    using SelfPtr = std::remove_cv_t<decltype(this)>;
+#define TVM_MODULE_VTABLE_END()  \
+  return ffi::Function(nullptr); \
+  }
+#define TVM_MODULE_VTABLE_END_WITH_DEFAULT(MemFunc) \
+  {                                                 \
+    auto f = (MemFunc);                             \
+    return (this->*f)(_name);                       \
+  }                                                 \
+  }  // NOLINT(*)
+#define TVM_MODULE_VTABLE_ENTRY(Name, MemFunc)                                 
            \
+  if (_name == Name) {                                                         
            \
+    return ffi::Function::FromPacked([_self](ffi::PackedArgs args, ffi::Any* 
rv) -> void { \
+      using Helper = 
::tvm::runtime::details::ModuleVTableEntryHelper<decltype(MemFunc)>;  \
+      SelfPtr self = static_cast<SelfPtr>(_self.get());                        
            \
+      Helper::Call(rv, self, MemFunc, args);                                   
            \
+    });                                                                        
            \
+  }
+#define TVM_MODULE_VTABLE_ENTRY_PACKED(Name, MemFunc)                          
\
+  if (_name == Name) {                                                         
\
+    return ffi::Function([_self](ffi::PackedArgs args, ffi::Any* rv) -> void { 
\
+      (static_cast<SelfPtr>(_self.get())->*(MemFunc))(args, rv);               
\
+    });                                                                        
\
+  }
+
+#endif  // TVM_RUNTIME_MODULE_H_
diff --git a/include/tvm/runtime/packed_func.h 
b/include/tvm/runtime/packed_func.h
index 6da06c1191..3f8ec66bc1 100644
--- a/include/tvm/runtime/packed_func.h
+++ b/include/tvm/runtime/packed_func.h
@@ -26,145 +26,16 @@
 
 #include <tvm/ffi/any.h>
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/base.h>
-#include <tvm/runtime/logging.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/ndarray.h>
-
-#include <string>
-#include <utility>
-#include <vector>
 
 namespace tvm {
 namespace runtime {
 
-using ffi::Any;
-using ffi::AnyView;
-
-namespace details {
-
-template <typename T>
-struct ModuleVTableEntryHelper {};
-
-template <typename T, typename R, typename... Args>
-struct ModuleVTableEntryHelper<R (T::*)(Args...) const> {
-  using MemFnType = R (T::*)(Args...) const;
-  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
-    auto wrapped = [self, f](Args... args) -> R { return 
(self->*f)(std::forward<Args>(args)...); };
-    ffi::details::unpack_call<R>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
-                                 args.data(), args.size(), rv);
-  }
-};
+#define TVM_DLL_EXPORT_TYPED_FUNC TVM_FFI_DLL_EXPORT_TYPED_FUNC
 
-template <typename T, typename R, typename... Args>
-struct ModuleVTableEntryHelper<R (T::*)(Args...)> {
-  using MemFnType = R (T::*)(Args...);
-  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
-    auto wrapped = [self, f](Args... args) -> R { return 
(self->*f)(std::forward<Args>(args)...); };
-    ffi::details::unpack_call<R>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
-                                 args.data(), args.size(), rv);
-  }
-};
-
-template <typename T, typename... Args>
-struct ModuleVTableEntryHelper<void (T::*)(Args...) const> {
-  using MemFnType = void (T::*)(Args...) const;
-  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
-    auto wrapped = [self, f](Args... args) -> void { 
(self->*f)(std::forward<Args>(args)...); };
-    
ffi::details::unpack_call<void>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
-                                    args.data(), args.size(), rv);
-  }
-};
-
-template <typename T, typename... Args>
-struct ModuleVTableEntryHelper<void (T::*)(Args...)> {
-  using MemFnType = void (T::*)(Args...);
-  static TVM_ALWAYS_INLINE void Call(ffi::Any* rv, T* self, MemFnType f, 
ffi::PackedArgs args) {
-    auto wrapped = [self, f](Args... args) -> void { 
(self->*f)(std::forward<Args>(args)...); };
-    
ffi::details::unpack_call<void>(std::make_index_sequence<sizeof...(Args)>{}, 
nullptr, wrapped,
-                                    args.data(), args.size(), rv);
-  }
-};
-}  // namespace details
-
-#define TVM_MODULE_VTABLE_BEGIN(TypeKey)                                       
             \
-  const char* type_key() const final { return TypeKey; }                       
             \
-  ffi::Function GetFunction(const String& _name, const ObjectPtr<Object>& 
_self) override { \
-    using SelfPtr = std::remove_cv_t<decltype(this)>;
-#define TVM_MODULE_VTABLE_END()  \
-  return ffi::Function(nullptr); \
-  }
-#define TVM_MODULE_VTABLE_END_WITH_DEFAULT(MemFunc) \
-  {                                                 \
-    auto f = (MemFunc);                             \
-    return (this->*f)(_name);                       \
-  }                                                 \
-  }  // NOLINT(*)
-#define TVM_MODULE_VTABLE_ENTRY(Name, MemFunc)                                 
           \
-  if (_name == Name) {                                                         
           \
-    return ffi::Function::FromPacked([_self](ffi::PackedArgs args, Any* rv) -> 
void {     \
-      using Helper = 
::tvm::runtime::details::ModuleVTableEntryHelper<decltype(MemFunc)>; \
-      SelfPtr self = static_cast<SelfPtr>(_self.get());                        
           \
-      Helper::Call(rv, self, MemFunc, args);                                   
           \
-    });                                                                        
           \
-  }
-#define TVM_MODULE_VTABLE_ENTRY_PACKED(Name, MemFunc)                     \
-  if (_name == Name) {                                                    \
-    return ffi::Function([_self](ffi::PackedArgs args, Any* rv) -> void { \
-      (static_cast<SelfPtr>(_self.get())->*(MemFunc))(args, rv);          \
-    });                                                                   \
-  }
-
-/*!
- * \brief Export typed function as a ffi::Function
- *        that can be loaded by LibraryModule.
- *
- * \param ExportName The symbol name to be exported.
- * \param Function The typed function.
- * \note ExportName and Function must be different,
- *       see code examples below.
- *
- * \sa ffi::TypedFunction
- *
- * \code
- *
- * int AddOne_(int x) {
- *   return x + 1;
- * }
- *
- * // Expose the function as "AddOne"
- * TVM_DLL_EXPORT_TYPED_FUNC(AddOne, AddOne_);
- *
- * // Expose the function as "SubOne"
- * TVM_DLL_EXPORT_TYPED_FUNC(SubOne, [](int x) {
- *   return x - 1;
- * });
- *
- * // The following code will cause compilation error.
- * // Because the same Function and ExportName
- * // TVM_DLL_EXPORT_TYPED_FUNC(AddOne_, AddOne_);
- *
- * // The following code is OK, assuming the macro
- * // is in a different namespace from xyz
- * // TVM_DLL_EXPORT_TYPED_FUNC(AddOne_, xyz::AddOne_);
- *
- * \endcode
- */
-#define TVM_DLL_EXPORT_TYPED_FUNC(ExportName, Function)                        
              \
-  extern "C" {                                                                 
              \
-  TVM_DLL int ExportName(void* self, TVMFFIAny* args, int32_t num_args, 
TVMFFIAny* result) { \
-    TVM_FFI_SAFE_CALL_BEGIN();                                                 
              \
-    using FuncInfo = ::tvm::ffi::details::FunctionInfo<decltype(Function)>;    
              \
-    static std::string name = #ExportName;                                     
              \
-    ::tvm::ffi::details::unpack_call<typename FuncInfo::RetType>(              
              \
-        std::make_index_sequence<FuncInfo::num_args>{}, &name, Function,       
              \
-        reinterpret_cast<const ::tvm::ffi::AnyView*>(args), num_args,          
              \
-        reinterpret_cast<::tvm::ffi::Any*>(result));                           
              \
-    TVM_FFI_SAFE_CALL_END();                                                   
              \
-  }                                                                            
              \
-  }
-}  // namespace runtime  // NOLINT(*)
 using ffi::Any;
 using ffi::AnyView;
+
+}  // namespace runtime
 }  // namespace tvm
+
 #endif  // TVM_RUNTIME_PACKED_FUNC_H_
diff --git a/include/tvm/runtime/profiling.h b/include/tvm/runtime/profiling.h
index 1e950f9f7a..2a6ecc0e4d 100644
--- a/include/tvm/runtime/profiling.h
+++ b/include/tvm/runtime/profiling.h
@@ -29,8 +29,9 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/ndarray.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <stack>
 #include <string>
diff --git a/include/tvm/runtime/relax_vm/executable.h 
b/include/tvm/runtime/relax_vm/executable.h
index afaaea9e41..8a9fa024ca 100644
--- a/include/tvm/runtime/relax_vm/executable.h
+++ b/include/tvm/runtime/relax_vm/executable.h
@@ -24,8 +24,8 @@
 #define TVM_RUNTIME_RELAX_VM_EXECUTABLE_H_
 
 #include <tvm/ffi/function.h>
+#include <tvm/runtime/module.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <unordered_map>
diff --git a/include/tvm/runtime/relax_vm/vm.h 
b/include/tvm/runtime/relax_vm/vm.h
index ce69548d70..884a8d0f43 100644
--- a/include/tvm/runtime/relax_vm/vm.h
+++ b/include/tvm/runtime/relax_vm/vm.h
@@ -155,7 +155,7 @@ class VirtualMachine : public runtime::ModuleNode {
    * \param rv The return value.
    */
   virtual void InvokeClosurePacked(const ObjectRef& closure_or_packedfunc, 
ffi::PackedArgs args,
-                                   Any* rv) = 0;
+                                   ffi::Any* rv) = 0;
   /*!
    * \brief Set an instrumentation function.
    *
diff --git a/include/tvm/script/printer/ir_docsifier_functor.h 
b/include/tvm/script/printer/ir_docsifier_functor.h
index 40bb245e72..62133ef2c9 100644
--- a/include/tvm/script/printer/ir_docsifier_functor.h
+++ b/include/tvm/script/printer/ir_docsifier_functor.h
@@ -19,9 +19,9 @@
 #ifndef TVM_SCRIPT_PRINTER_IR_DOCSIFIER_FUNCTOR_H_
 #define TVM_SCRIPT_PRINTER_IR_DOCSIFIER_FUNCTOR_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/node/node.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <optional>
 #include <string>
diff --git a/include/tvm/support/span.h b/include/tvm/support/span.h
deleted file mode 100644
index 768252f77c..0000000000
--- a/include/tvm/support/span.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *
- * \file tvm/support/span.h
- * \brief Reimplementation of part of C++-20 style span.
- */
-#ifndef TVM_SUPPORT_SPAN_H_
-#define TVM_SUPPORT_SPAN_H_
-
-#include <cstddef>
-#include <iterator>
-#include <type_traits>
-#include <vector>
-
-namespace tvm {
-namespace support {
-
-/*!
- * \brief A partial implementation of the C++20 std::span.
- *
- * At the time of writing, TVM must compile against C++17.
- */
-template <class T, class W>
-class Span {
- public:
-  using value_type = W;
-  using const_W = typename std::add_const<W>::type;
-
-  template <class W1>
-  class iterator_base {
-   public:
-    using iterator_category = std::input_iterator_tag;
-    using value_type = W;
-    using difference_type = std::ptrdiff_t;
-    using pointer = const W*;
-    using reference = const W&;
-
-    inline iterator_base(T* ptr, T* end) : ptr_{ptr}, end_{end} { 
CHECK_GE(end, ptr); }
-
-    inline W1 operator*() { return W1(*ptr_); }
-
-    inline iterator_base<W1>& operator++() {
-      if (ptr_ != end_) ptr_++;
-      return *this;
-    }
-
-    inline bool operator==(iterator_base<W1> other) {
-      return ptr_ == other.ptr_ && end_ == other.end_;
-    }
-
-    inline bool operator!=(iterator_base<W1> other) { return !(*this == 
other); }
-
-    template <class X = W1, typename = 
std::enable_if_t<!std::is_const<X>::value>>
-    inline operator iterator_base<const_W>() const {
-      return iterator_base<const_W>(ptr_, end_);
-    }
-
-   private:
-    T* ptr_;
-    T* end_;
-  };
-
-  using iterator = iterator_base<W>;
-  using const_iterator = iterator_base<const_W>;
-
-  inline Span(T* begin, int num_elements) : begin_{begin}, end_{begin + 
num_elements} {}
-  inline Span(T* begin, T* end) : begin_{begin}, end_{end} {}
-
-  inline iterator begin() const { return iterator(begin_, end_); }
-
-  inline iterator end() const { return iterator(end_, end_); }
-
-  size_t size() const { return end_ - begin_; }
-
-  inline W operator[](int i) {
-    T* to_return = begin_ + i;
-    ICHECK_LT(to_return, end_) << "Span access out of bounds: " << i;
-    return W(*to_return);
-  }
-
-  inline operator std::vector<W>() { return std::vector<W>(begin(), end()); }
-
- protected:
-  T* begin_;
-  T* end_;
-};
-
-}  // namespace support
-}  // namespace tvm
-
-#endif  // TVM_SUPPORT_SPAN_H_
diff --git a/include/tvm/target/codegen.h b/include/tvm/target/codegen.h
index faa58f8487..54f09a081b 100644
--- a/include/tvm/target/codegen.h
+++ b/include/tvm/target/codegen.h
@@ -25,7 +25,7 @@
 #define TVM_TARGET_CODEGEN_H_
 
 #include <tvm/ir/module.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/runtime/module.h>
 #include <tvm/target/target.h>
 #include <tvm/tir/expr.h>
 
diff --git a/include/tvm/tir/op_attr_types.h b/include/tvm/tir/op_attr_types.h
index 29877b1458..883477dd64 100644
--- a/include/tvm/tir/op_attr_types.h
+++ b/include/tvm/tir/op_attr_types.h
@@ -28,9 +28,9 @@
 #ifndef TVM_TIR_OP_ATTR_TYPES_H_
 #define TVM_TIR_OP_ATTR_TYPES_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
 #include <tvm/ir/expr.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <ostream>
 
diff --git a/include/tvm/topi/utils.h b/include/tvm/topi/utils.h
index 23ac27d134..b5f2d6c38d 100644
--- a/include/tvm/topi/utils.h
+++ b/include/tvm/topi/utils.h
@@ -24,8 +24,8 @@
 #ifndef TVM_TOPI_UTILS_H_
 #define TVM_TOPI_UTILS_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/ir/expr.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace tvm {
 namespace topi {
diff --git a/python/tvm/relax/frontend/nn/extern.py 
b/python/tvm/relax/frontend/nn/extern.py
index d89369c718..e7248b0f4b 100644
--- a/python/tvm/relax/frontend/nn/extern.py
+++ b/python/tvm/relax/frontend/nn/extern.py
@@ -135,14 +135,14 @@ class SourceModule(ExternModule):  # pylint: 
disable=too-few-public-methods
     of in-memory representation of tensors. More details:
     https://github.com/dmlc/dlpack/blob/v0.8/include/dlpack/dlpack.h#L163-L206.
 
-    To expose the symbol, `TVM_DLL_EXPORT_TYPED_FUNC(symbol, function)` is 
guaranteed available:
+    To expose the symbol, `TVM_FFI_DLL_EXPORT_TYPED_FUNC(symbol, function)` is 
guaranteed available:
 
     .. code-block:: C++
 
         // those headers are guaranteed to be available
         #include <dlpack/dlpack.h>
         #include <tvm/runtime/data_type.h>
-        #include <tvm/runtime/packed_func.h>
+        #include <tvm/ffi/function.h>
 
         namespace {
         // anonymous namespace hides the symbol `_my_func_impl` from other 
translation units
@@ -151,7 +151,7 @@ class SourceModule(ExternModule):  # pylint: 
disable=too-few-public-methods
         }
         }
         // expose symbol `my_func` instead of `_my_func_impl`
-        TVM_DLL_EXPORT_TYPED_FUNC(my_func, _my_func_impl);
+        TVM_FFI_DLL_EXPORT_TYPED_FUNC(my_func, _my_func_impl);
 
     **A compiler pass `AttachExternModules`.** It is introduced to attach a 
list of
     `nn.ExternModule`s into an IRModule at any stage of the compilation 
pipeline,
diff --git a/src/ir/op.cc b/src/ir/op.cc
index 4917f8336b..b6d1f39526 100644
--- a/src/ir/op.cc
+++ b/src/ir/op.cc
@@ -21,10 +21,10 @@
  * \file src/ir/op.cc
  * \brief Primitive operators and intrinsics.
  */
+#include <tvm/ffi/function.h>
 #include <tvm/ir/op.h>
 #include <tvm/ir/type.h>
 #include <tvm/runtime/module.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/tir/op_attr_types.h>
 
 #include <memory>
diff --git a/src/node/attr_registry.h b/src/node/attr_registry.h
index 9ec39e9f6a..334c15b3be 100644
--- a/src/node/attr_registry.h
+++ b/src/node/attr_registry.h
@@ -24,8 +24,8 @@
 #ifndef TVM_NODE_ATTR_REGISTRY_H_
 #define TVM_NODE_ATTR_REGISTRY_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/node/attr_registry_map.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <unordered_map>
diff --git a/src/node/serialization.cc b/src/node/serialization.cc
index 631d70e235..986a2d0445 100644
--- a/src/node/serialization.cc
+++ b/src/node/serialization.cc
@@ -28,7 +28,6 @@
 #include <tvm/node/reflection.h>
 #include <tvm/node/serialization.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <cctype>
 #include <map>
diff --git a/src/relax/backend/contrib/codegen_c/codegen_c.h 
b/src/relax/backend/contrib/codegen_c/codegen_c.h
index ce26e94cb7..28ca0e3586 100644
--- a/src/relax/backend/contrib/codegen_c/codegen_c.h
+++ b/src/relax/backend/contrib/codegen_c/codegen_c.h
@@ -130,14 +130,14 @@ class CodegenCBase {
    *   return 0;
    * }
    *
-   * TVM_DLL_EXPORT_TYPED_FUNC(foo, foo_wrapper_);
+   * TVM_FFI_DLL_EXPORT_TYPED_FUNC(foo, foo_wrapper_);
    *
    * int foo_init_wrapper_(Array<NDArray> arr) {
    *   foo_consts = arr;
    *   return 0;
    * }
    *
-   * TVM_DLL_EXPORT_TYPED_FUNC(__init_foo, foo_init_wrapper_);
+   * TVM_FFI_DLL_EXPORT_TYPED_FUNC(__init_foo, foo_init_wrapper_);
    *
    * \endcode
    */
@@ -230,7 +230,7 @@ class CodegenCBase {
       code_stream_ << "return 0;\n";
       ExitScope();
       code_stream_ << "}\n\n";
-      code_stream_ << "TVM_DLL_EXPORT_TYPED_FUNC(__init_" << func_name << ", " 
<< func_name
+      code_stream_ << "TVM_FFI_DLL_EXPORT_TYPED_FUNC(__init_" << func_name << 
", " << func_name
                    << "_init_wrapper_);\n\n";
       code_stream_ << "#endif\n";
     }
diff --git a/src/relax/backend/contrib/cutlass/codegen.cc 
b/src/relax/backend/contrib/cutlass/codegen.cc
index a05907a231..8dee9b24f4 100644
--- a/src/relax/backend/contrib/cutlass/codegen.cc
+++ b/src/relax/backend/contrib/cutlass/codegen.cc
@@ -21,12 +21,12 @@
  * \file src/relax/backend/contrib/cutlass/codegen.cc
  * \brief Implementation of the CUTLASS code generator for Relax.
  */
-
 #include <tvm/ir/module.h>
 #include <tvm/ir/name_supply.h>
 #include <tvm/relax/analysis.h>
 #include <tvm/relax/attrs/nn.h>
 #include <tvm/relax/type.h>
+#include <tvm/runtime/module.h>
 
 #include <memory>
 #include <string>
@@ -59,7 +59,7 @@ runtime::Module Finalize(const std::string& code, const 
Array<String>& func_name
       << "Should only create CUTLASS CSourceModule if there is at least one 
CUTLASS partition";
 
   std::ostringstream default_headers;
-  default_headers << "#include <tvm/runtime/packed_func.h>\n";
+  default_headers << "#include <tvm/ffi/function.h>\n";
   default_headers << "#include <dlpack/dlpack.h>\n";
   default_headers << "#include <cuda_fp16.h>\n";
   default_headers << "#include <cutlass/cutlass.h>\n";
diff --git a/src/relax/transform/fold_constant.cc 
b/src/relax/transform/fold_constant.cc
index c2bac5daa7..7fec510865 100644
--- a/src/relax/transform/fold_constant.cc
+++ b/src/relax/transform/fold_constant.cc
@@ -23,6 +23,7 @@
 #include <tvm/relax/op_attr_types.h>
 #include <tvm/relax/transform.h>
 #include <tvm/relax/type.h>
+#include <tvm/runtime/module.h>
 #include <tvm/tir/function.h>
 #include <tvm/tir/op.h>
 
diff --git a/src/relax/transform/run_codegen.cc 
b/src/relax/transform/run_codegen.cc
index 7556f26903..d29bdaacb9 100644
--- a/src/relax/transform/run_codegen.cc
+++ b/src/relax/transform/run_codegen.cc
@@ -26,6 +26,7 @@
 #include <tvm/relax/analysis.h>
 #include <tvm/relax/expr_functor.h>
 #include <tvm/relax/transform.h>
+#include <tvm/runtime/module.h>
 
 #include "../../support/ordered_set.h"
 #include "utils.h"
diff --git a/src/runtime/const_loader_module.cc 
b/src/runtime/const_loader_module.cc
index 50b504d17c..84cd4943c5 100644
--- a/src/runtime/const_loader_module.cc
+++ b/src/runtime/const_loader_module.cc
@@ -31,8 +31,8 @@
 #include <tvm/ffi/container/map.h>
 #include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
+#include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <cstdint>
 
diff --git a/src/runtime/contrib/amx/amx_config.cc 
b/src/runtime/contrib/amx/amx_config.cc
index da5ae3f62c..1eb63a10fa 100644
--- a/src/runtime/contrib/amx/amx_config.cc
+++ b/src/runtime/contrib/amx/amx_config.cc
@@ -22,7 +22,6 @@
  * \brief extraction of AMX configuration on x86 platforms
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace tvm {
 namespace runtime {
diff --git a/src/runtime/contrib/coreml/coreml_runtime.h 
b/src/runtime/contrib/coreml/coreml_runtime.h
index b3f7e846e0..5f5eec1d03 100644
--- a/src/runtime/contrib/coreml/coreml_runtime.h
+++ b/src/runtime/contrib/coreml/coreml_runtime.h
@@ -29,8 +29,8 @@
 #import <Foundation/Foundation.h>
 
 #include <dlpack/dlpack.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <string>
diff --git a/src/runtime/contrib/cutlass/fp16_group_gemm.cu 
b/src/runtime/contrib/cutlass/fp16_group_gemm.cu
index b1e152b1b0..dffe7dc4ff 100644
--- a/src/runtime/contrib/cutlass/fp16_group_gemm.cu
+++ b/src/runtime/contrib/cutlass/fp16_group_gemm.cu
@@ -20,7 +20,7 @@
 #include <cuda_fp16.h>
 #include <float.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include "group_gemm_runner.cuh"
diff --git a/src/runtime/contrib/cutlass/fp8_blockwise_scaled_gemm.cu 
b/src/runtime/contrib/cutlass/fp8_blockwise_scaled_gemm.cu
index d9bd0a33ee..b8732357c7 100644
--- a/src/runtime/contrib/cutlass/fp8_blockwise_scaled_gemm.cu
+++ b/src/runtime/contrib/cutlass/fp8_blockwise_scaled_gemm.cu
@@ -20,7 +20,7 @@
 #include <cuda_fp16.h>
 #include <float.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include "../cublas/cublas_utils.h"
diff --git a/src/runtime/contrib/cutlass/fp8_gemm.cu 
b/src/runtime/contrib/cutlass/fp8_gemm.cu
index 5146e62d8c..4ee31e73ab 100644
--- a/src/runtime/contrib/cutlass/fp8_gemm.cu
+++ b/src/runtime/contrib/cutlass/fp8_gemm.cu
@@ -20,7 +20,7 @@
 #include <cuda_fp16.h>
 #include <float.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include "../cublas/cublas_utils.h"
diff --git a/src/runtime/contrib/cutlass/fp8_group_gemm.cu 
b/src/runtime/contrib/cutlass/fp8_group_gemm.cu
index 104010f4c8..62a91dec18 100644
--- a/src/runtime/contrib/cutlass/fp8_group_gemm.cu
+++ b/src/runtime/contrib/cutlass/fp8_group_gemm.cu
@@ -20,7 +20,7 @@
 #include <cuda_fp16.h>
 #include <float.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include "group_gemm_runner.cuh"
diff --git a/src/runtime/contrib/cutlass/weight_preprocess.cc 
b/src/runtime/contrib/cutlass/weight_preprocess.cc
index 7cc053712b..5fece61661 100644
--- a/src/runtime/contrib/cutlass/weight_preprocess.cc
+++ b/src/runtime/contrib/cutlass/weight_preprocess.cc
@@ -19,7 +19,6 @@
 
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include "cutlass_kernels/cutlass_preprocessors.h"
 
diff --git a/src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.h 
b/src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.h
index c5242a4e7b..10114802b4 100644
--- a/src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.h
+++ b/src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.h
@@ -25,7 +25,7 @@
 #ifndef TVM_RUNTIME_CONTRIB_MRVL_MRVL_SW_RUNTIME_LIB_H_
 #define TVM_RUNTIME_CONTRIB_MRVL_MRVL_SW_RUNTIME_LIB_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <cstddef>
 #include <string>
diff --git a/src/runtime/contrib/mscclpp/allreduce.cu 
b/src/runtime/contrib/mscclpp/allreduce.cu
index a5bebbc561..66a6a097f6 100644
--- a/src/runtime/contrib/mscclpp/allreduce.cu
+++ b/src/runtime/contrib/mscclpp/allreduce.cu
@@ -18,7 +18,7 @@
  */
 
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include "msccl.cuh"
diff --git a/src/runtime/contrib/nvshmem/init.cc 
b/src/runtime/contrib/nvshmem/init.cc
index 090457829e..7b4a617a25 100644
--- a/src/runtime/contrib/nvshmem/init.cc
+++ b/src/runtime/contrib/nvshmem/init.cc
@@ -21,7 +21,6 @@
 #include <picojson.h>
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/disco/disco_worker.h>
-#include <tvm/runtime/packed_func.h>
 
 #include "../../cuda/cuda_common.h"
 
diff --git a/src/runtime/contrib/nvshmem/memory_allocator.cc 
b/src/runtime/contrib/nvshmem/memory_allocator.cc
index 86427eaa60..facfc95217 100644
--- a/src/runtime/contrib/nvshmem/memory_allocator.cc
+++ b/src/runtime/contrib/nvshmem/memory_allocator.cc
@@ -20,7 +20,6 @@
 #include <nvshmemx.h>
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/memory/memory_manager.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <thread>
 
diff --git a/src/runtime/contrib/tflite/tflite_runtime.h 
b/src/runtime/contrib/tflite/tflite_runtime.h
index 6557fa0797..5e8751a012 100644
--- a/src/runtime/contrib/tflite/tflite_runtime.h
+++ b/src/runtime/contrib/tflite/tflite_runtime.h
@@ -27,8 +27,9 @@
 
 #include <dlpack/dlpack.h>
 #include <tensorflow/lite/interpreter.h>
+#include <tvm/ffi/function.h>
+#include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <string>
diff --git a/src/runtime/contrib/vllm/attention_kernels.cu 
b/src/runtime/contrib/vllm/attention_kernels.cu
index 15e57bd297..9221f46725 100644
--- a/src/runtime/contrib/vllm/attention_kernels.cu
+++ b/src/runtime/contrib/vllm/attention_kernels.cu
@@ -19,7 +19,7 @@
 
 #include <float.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include <algorithm>
diff --git a/src/runtime/contrib/vllm/cache_kernels.cu 
b/src/runtime/contrib/vllm/cache_kernels.cu
index d762010427..01320daac6 100644
--- a/src/runtime/contrib/vllm/cache_kernels.cu
+++ b/src/runtime/contrib/vllm/cache_kernels.cu
@@ -17,7 +17,7 @@
  * under the License.
  */
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/function.h>
 
 #include <algorithm>
diff --git a/src/runtime/cuda/cuda_common.h b/src/runtime/cuda/cuda_common.h
index 037dd94262..a378e53c54 100644
--- a/src/runtime/cuda/cuda_common.h
+++ b/src/runtime/cuda/cuda_common.h
@@ -25,7 +25,7 @@
 #define TVM_RUNTIME_CUDA_CUDA_COMMON_H_
 
 #include <cuda_runtime.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <string>
 
diff --git a/src/runtime/debug_compile.cc b/src/runtime/debug_compile.cc
index 483b8cdb59..4b22e26494 100644
--- a/src/runtime/debug_compile.cc
+++ b/src/runtime/debug_compile.cc
@@ -25,11 +25,11 @@
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/container/map.h>
 #include <tvm/ffi/container/variant.h>
+#include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/string.h>
 #include <tvm/node/reflection.h>
 #include <tvm/node/structural_hash.h>
-#include <tvm/runtime/packed_func.h>
 
 // #include <tvm/node/structural_hash.h>
 //  #include <tvm/runtime/profiling.h>
@@ -37,7 +37,7 @@
 // #include <tvm/ir/expr.h>
 // #include <tvm/tir/expr.h>
 
-// #include <tvm/runtime/packed_func.h>
+// #include <tvm/ffi/function.h>
 // #include <tvm/ffi/function.h>
 // #include <tvm/tir/expr.h>
 
diff --git a/src/runtime/device_api.cc b/src/runtime/device_api.cc
index a80d6ebdbd..3e3145c32f 100644
--- a/src/runtime/device_api.cc
+++ b/src/runtime/device_api.cc
@@ -18,9 +18,10 @@
  */
 
 /*!
- * \file c_runtime_api.cc
+ * \file device_api.cc
  * \brief Device specific implementations
  */
+#include <tvm/ffi/container/ndarray.h>
 #include <tvm/ffi/function.h>
 #include <tvm/ffi/optional.h>
 #include <tvm/ffi/rvalue_ref.h>
@@ -28,7 +29,7 @@
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/c_backend_api.h>
 #include <tvm/runtime/device_api.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/runtime/module.h>
 
 #include <algorithm>
 #include <array>
@@ -144,7 +145,7 @@ void DeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* 
to, TVMStreamHandle str
   size_t nbytes = GetDataSize(*from);
   ICHECK_EQ(nbytes, GetDataSize(*to));
 
-  ICHECK(IsContiguous(*from) && IsContiguous(*to))
+  ICHECK(ffi::IsContiguous(*from) && ffi::IsContiguous(*to))
       << "CopyDataFromTo only support contiguous array for now";
   CopyDataFromTo(from->data, from->byte_offset, to->data, to->byte_offset, 
nbytes, from->device,
                  to->device, from->dtype, stream);
diff --git a/src/runtime/disco/bcast_session.cc 
b/src/runtime/disco/bcast_session.cc
index 034a1cf565..46ecb49f50 100644
--- a/src/runtime/disco/bcast_session.cc
+++ b/src/runtime/disco/bcast_session.cc
@@ -20,7 +20,6 @@
 
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <sstream>
 
@@ -32,7 +31,7 @@ struct BcastSessionObj::Internal {
   static void TVM_ALWAYS_INLINE BroadcastUnpacked(BcastSessionObj* self, 
DiscoAction action,
                                                   int64_t reg_id, Args&&... 
args) {
     constexpr int kNumArgs = 2 + sizeof...(Args);
-    AnyView packed_args[kNumArgs];
+    ffi::AnyView packed_args[kNumArgs];
     ffi::PackedArgs::Fill(packed_args, static_cast<int>(action), reg_id,
                           std::forward<Args>(args)...);
     self->BroadcastPacked(ffi::PackedArgs(packed_args, kNumArgs));
@@ -88,7 +87,7 @@ void BcastSessionObj::SyncWorker(int worker_id) {
 DRef BcastSessionObj::CallWithPacked(const ffi::PackedArgs& args) {
   // NOTE: this action is not safe unless we know args is not
   // used else where in this case it is oK
-  AnyView* args_vec = const_cast<AnyView*>(args.data());
+  ffi::AnyView* args_vec = const_cast<ffi::AnyView*>(args.data());
   // tranlsate args into remote calling convention
   int reg_id = AllocateReg();
   {
diff --git a/src/runtime/disco/bcast_session.h 
b/src/runtime/disco/bcast_session.h
index bfb1ca24b5..f92369d853 100644
--- a/src/runtime/disco/bcast_session.h
+++ b/src/runtime/disco/bcast_session.h
@@ -43,7 +43,7 @@ class BcastSessionObj : public SessionObj {
   void Shutdown() override;
   void InitCCL(String ccl, IntTuple device_ids) override;
   ffi::Any DebugGetFromRemote(int64_t reg_id, int worker_id) override = 0;
-  void DebugSetRegister(int64_t reg_id, AnyView value, int worker_id) override 
= 0;
+  void DebugSetRegister(int64_t reg_id, ffi::AnyView value, int worker_id) 
override = 0;
 
  protected:
   /*! \brief Deallocate a register id, kill it on all workers, and append it 
to `free_regs_`. */
diff --git a/src/runtime/disco/builtin.cc b/src/runtime/disco/builtin.cc
index b3d04d1d5b..7c769b7dd0 100644
--- a/src/runtime/disco/builtin.cc
+++ b/src/runtime/disco/builtin.cc
@@ -22,7 +22,6 @@
 #include <tvm/runtime/disco/builtin.h>
 #include <tvm/runtime/disco/disco_worker.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/relax_vm/vm.h>
 
 #include <sstream>
diff --git a/src/runtime/disco/disco_worker.cc 
b/src/runtime/disco/disco_worker.cc
index b01d378c44..8e63355283 100644
--- a/src/runtime/disco/disco_worker.cc
+++ b/src/runtime/disco/disco_worker.cc
@@ -20,7 +20,6 @@
 #include <tvm/runtime/disco/builtin.h>
 #include <tvm/runtime/disco/disco_worker.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 
 #include "../../support/process_id.h"
 #include "./protocol.h"
@@ -34,7 +33,7 @@ TVM_DLL DiscoWorker* DiscoWorker::ThreadLocal() {
   return ret;
 }
 
-void DiscoWorker::SetRegister(int reg_id, AnyView value) {
+void DiscoWorker::SetRegister(int reg_id, ffi::AnyView value) {
   ICHECK(0 <= reg_id && reg_id < static_cast<int>(register_file.size()));
   ffi::Any& rv = register_file.at(reg_id);
   if (rv.type_index() == ffi::TypeIndex::kTVMFFINDArray &&
@@ -95,7 +94,7 @@ struct DiscoWorker::Impl {
         }
         case DiscoAction::kDebugSetRegister: {
           int worker_id = args[2].cast<int>();
-          AnyView value = args[3];
+          ffi::AnyView value = args[3];
           DebugSetRegister(self, reg_id, worker_id, value);
           break;
         }
@@ -139,7 +138,7 @@ struct DiscoWorker::Impl {
   static void SyncWorker(DiscoWorker* self, int worker_id) {
     if (worker_id == self->worker_id) {
       ::tvm::runtime::SyncWorker();
-      AnyView packed_args[2];
+      ffi::AnyView packed_args[2];
       ffi::PackedArgs::Fill(packed_args, 
static_cast<int>(DiscoAction::kSyncWorker), worker_id);
       self->channel->Reply(ffi::PackedArgs(packed_args, 2));
     }
@@ -151,17 +150,17 @@ struct DiscoWorker::Impl {
       if (rv.as<ObjectRef>()) {
         rv = DiscoDebugObject::Wrap(rv);
       }
-      AnyView packed_args[2];
+      ffi::AnyView packed_args[2];
       ffi::PackedArgs::Fill(packed_args, 
static_cast<int>(DiscoAction::kDebugGetFromRemote), rv);
       self->channel->Reply(ffi::PackedArgs(packed_args, 2));
     }
   }
 
-  static void DebugSetRegister(DiscoWorker* self, int reg_id, int worker_id, 
AnyView value) {
+  static void DebugSetRegister(DiscoWorker* self, int reg_id, int worker_id, 
ffi::AnyView value) {
     if (worker_id == self->worker_id) {
       ::tvm::runtime::SyncWorker();
       self->SetRegister(reg_id, value);
-      AnyView packed_args[1];
+      ffi::AnyView packed_args[1];
       ffi::PackedArgs::Fill(packed_args, 
static_cast<int>(DiscoAction::kDebugSetRegister));
       self->channel->Reply(ffi::PackedArgs(packed_args, 1));
     }
@@ -171,7 +170,7 @@ struct DiscoWorker::Impl {
                          const ffi::PackedArgs& args) {
     // NOTE: this action is not safe unless we know args is not
     // used else where in this case it is oK
-    AnyView* args_vec = const_cast<AnyView*>(args.data());
+    ffi::AnyView* args_vec = const_cast<ffi::AnyView*>(args.data());
     // translate args into remote calling convention
     for (int i = 0; i < args.size(); ++i) {
       if (auto opt_dref = args_vec[i].as<DRef>()) {
diff --git a/src/runtime/disco/disco_worker_thread.h 
b/src/runtime/disco/disco_worker_thread.h
index 8d6b44396f..99960201b9 100644
--- a/src/runtime/disco/disco_worker_thread.h
+++ b/src/runtime/disco/disco_worker_thread.h
@@ -25,9 +25,9 @@
 #ifndef TVM_RUNTIME_DISCO_DISCO_WORKER_THREAD_H_
 #define TVM_RUNTIME_DISCO_DISCO_WORKER_THREAD_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/disco/disco_worker.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <thread>
diff --git a/src/runtime/disco/loader.cc b/src/runtime/disco/loader.cc
index 59624ac0bf..f93170d02f 100644
--- a/src/runtime/disco/loader.cc
+++ b/src/runtime/disco/loader.cc
@@ -24,7 +24,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/data_type.h>
 #include <tvm/runtime/disco/builtin.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/relax_vm/ndarray_cache_support.h>
 
 #include <functional>
@@ -218,7 +217,7 @@ NDArray ShardLoaderObj::ApplyShardFunc(const 
ShardInfo::ShardFunc& shard_func,
   NDArray o = NDArray::Empty(shard_func.output_info.shape, 
shard_func.output_info.dtype, device);
   ffi::Function f = this->shard_funcs_.at(shard_func.name);
   int n = static_cast<int>(shard_func.params.size());
-  std::vector<AnyView> packed_args(n + 2);
+  std::vector<ffi::AnyView> packed_args(n + 2);
   const DLTensor* w_in = param.operator->();
   const DLTensor* w_out = o.operator->();
   packed_args[0] = const_cast<DLTensor*>(w_in);
@@ -226,7 +225,7 @@ NDArray ShardLoaderObj::ApplyShardFunc(const 
ShardInfo::ShardFunc& shard_func,
     packed_args[i + 1] = shard_func.params[i];
   }
   packed_args[n + 1] = const_cast<DLTensor*>(w_out);
-  Any rv;
+  ffi::Any rv;
   f.CallPacked(ffi::PackedArgs(packed_args.data(), packed_args.size()), &rv);
   return o;
 }
diff --git a/src/runtime/disco/message_queue.h 
b/src/runtime/disco/message_queue.h
index 0fa793c3ab..e8286384ff 100644
--- a/src/runtime/disco/message_queue.h
+++ b/src/runtime/disco/message_queue.h
@@ -45,12 +45,12 @@ class DiscoStreamMessageQueue : private dmlc::Stream,
 
   ffi::PackedArgs Recv() {
     bool is_implicit_shutdown = DequeueNextPacket();
-    AnyView* packed_args = nullptr;
+    ffi::AnyView* packed_args = nullptr;
     int num_args = 0;
 
     if (is_implicit_shutdown) {
       num_args = 2;
-      packed_args = 
reinterpret_cast<AnyView*>(ArenaAlloc<TVMFFIAny>(num_args));
+      packed_args = 
reinterpret_cast<ffi::AnyView*>(ArenaAlloc<TVMFFIAny>(num_args));
       packed_args[0] = static_cast<int>(DiscoAction::kShutDown);
       packed_args[1] = 0;
     } else {
diff --git a/src/runtime/disco/process_session.cc 
b/src/runtime/disco/process_session.cc
index eff03ea253..4563079c30 100644
--- a/src/runtime/disco/process_session.cc
+++ b/src/runtime/disco/process_session.cc
@@ -20,7 +20,6 @@
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/disco/disco_worker.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <sstream>
@@ -100,7 +99,7 @@ class ProcessSessionObj final : public BcastSessionObj {
       return worker_0_->worker->register_file.at(reg_id);
     }
     {
-      AnyView packed_args[3];
+      ffi::AnyView packed_args[3];
       ffi::PackedArgs::Fill(packed_args, 
static_cast<int>(DiscoAction::kDebugGetFromRemote), reg_id,
                             worker_id);
       workers_[worker_id - 1]->Send(ffi::PackedArgs(packed_args, 3));
@@ -113,7 +112,7 @@ class ProcessSessionObj final : public BcastSessionObj {
     return result;
   }
 
-  void DebugSetRegister(int64_t reg_id, AnyView value, int worker_id) {
+  void DebugSetRegister(int64_t reg_id, ffi::AnyView value, int worker_id) {
     if (worker_id == 0) {
       this->SyncWorker(worker_id);
       worker_0_->worker->SetRegister(reg_id, value);
@@ -125,7 +124,7 @@ class ProcessSessionObj final : public BcastSessionObj {
       value = wrapped;
     }
     {
-      AnyView packed_args[4];
+      ffi::AnyView packed_args[4];
       ffi::PackedArgs::Fill(packed_args, 
static_cast<int>(DiscoAction::kDebugSetRegister), reg_id,
                             worker_id, value);
       SendPacked(worker_id, ffi::PackedArgs(packed_args, 4));
diff --git a/src/runtime/disco/protocol.h b/src/runtime/disco/protocol.h
index 9536c2911d..30a1e6ed66 100644
--- a/src/runtime/disco/protocol.h
+++ b/src/runtime/disco/protocol.h
@@ -24,7 +24,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <string>
@@ -212,7 +211,7 @@ inline void 
DiscoProtocol<SubClassType>::ReadObject(TVMFFIAny* out) {
     LOG(FATAL) << "ValueError: Object type is not supported in Disco calling 
convention: "
                << Object::TypeIndex2Key(type_index) << " (type_index = " << 
type_index << ")";
   }
-  *reinterpret_cast<AnyView*>(out) = result;
+  *reinterpret_cast<ffi::AnyView*>(out) = result;
   object_arena_.push_back(result);
 }
 
diff --git a/src/runtime/disco/session.cc b/src/runtime/disco/session.cc
index f2edfd59a2..ed2d857538 100644
--- a/src/runtime/disco/session.cc
+++ b/src/runtime/disco/session.cc
@@ -19,7 +19,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/disco/disco_worker.h>
 #include <tvm/runtime/disco/session.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace tvm {
 namespace runtime {
diff --git a/src/runtime/disco/threaded_session.cc 
b/src/runtime/disco/threaded_session.cc
index c03a1bd9f4..f7e34e400c 100644
--- a/src/runtime/disco/threaded_session.cc
+++ b/src/runtime/disco/threaded_session.cc
@@ -48,7 +48,7 @@ class DiscoThreadedMessageQueue : private dmlc::Stream,
 
   ffi::PackedArgs Recv() {
     DequeueNextPacket();
-    AnyView* packed_args = nullptr;
+    ffi::AnyView* packed_args = nullptr;
     int num_args = 0;
     RPCReference::RecvPackedSeq(reinterpret_cast<TVMFFIAny**>(&packed_args), 
&num_args, this);
     return ffi::PackedArgs(packed_args, num_args);
@@ -162,7 +162,7 @@ class ThreadedSessionObj final : public BcastSessionObj {
     return this->workers_.at(worker_id).worker->register_file.at(reg_id);
   }
 
-  void DebugSetRegister(int64_t reg_id, AnyView value, int worker_id) {
+  void DebugSetRegister(int64_t reg_id, ffi::AnyView value, int worker_id) {
     this->SyncWorker(worker_id);
     this->workers_.at(worker_id).worker->SetRegister(reg_id, value);
   }
diff --git a/src/runtime/dso_library.cc b/src/runtime/dso_library.cc
index d64d893ce1..8a86666913 100644
--- a/src/runtime/dso_library.cc
+++ b/src/runtime/dso_library.cc
@@ -24,7 +24,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/ffi/memory.h>
 #include <tvm/runtime/module.h>
-#include <tvm/runtime/packed_func.h>
 
 #include "library_module.h"
 
diff --git a/src/runtime/hexagon/hexagon_buffer.h 
b/src/runtime/hexagon/hexagon_buffer.h
index b426825fc2..986d6b6e5e 100644
--- a/src/runtime/hexagon/hexagon_buffer.h
+++ b/src/runtime/hexagon/hexagon_buffer.h
@@ -20,11 +20,11 @@
 #ifndef TVM_RUNTIME_HEXAGON_HEXAGON_BUFFER_H_
 #define TVM_RUNTIME_HEXAGON_HEXAGON_BUFFER_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <vector>
diff --git a/src/runtime/hexagon/hexagon_common.h 
b/src/runtime/hexagon/hexagon_common.h
index 5834093a9e..1e68a93a8b 100644
--- a/src/runtime/hexagon/hexagon_common.h
+++ b/src/runtime/hexagon/hexagon_common.h
@@ -24,9 +24,10 @@
 #define TVM_RUNTIME_HEXAGON_HEXAGON_COMMON_H_
 
 #include <dlpack/dlpack.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/c_backend_api.h>
+#include <tvm/runtime/logging.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #if defined(__hexagon__)
 #include <HAP_farf.h>
diff --git a/src/runtime/hexagon/hexagon_thread_manager.h 
b/src/runtime/hexagon/hexagon_thread_manager.h
index 31f3d04669..7ec3ac6150 100644
--- a/src/runtime/hexagon/hexagon_thread_manager.h
+++ b/src/runtime/hexagon/hexagon_thread_manager.h
@@ -20,9 +20,9 @@
 #ifndef TVM_RUNTIME_HEXAGON_HEXAGON_THREAD_MANAGER_H_
 #define TVM_RUNTIME_HEXAGON_HEXAGON_THREAD_MANAGER_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <unordered_map>
diff --git a/src/runtime/hexagon/hexagon_vtcm_pool.h 
b/src/runtime/hexagon/hexagon_vtcm_pool.h
index 18c89722f4..ece8454b85 100644
--- a/src/runtime/hexagon/hexagon_vtcm_pool.h
+++ b/src/runtime/hexagon/hexagon_vtcm_pool.h
@@ -20,11 +20,11 @@
 #ifndef TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_
 #define TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <utility>
 #include <vector>
diff --git a/src/runtime/hexagon/rpc/hexagon/rpc_server.cc 
b/src/runtime/hexagon/rpc/hexagon/rpc_server.cc
index d9a9d007b0..7880018ff8 100644
--- a/src/runtime/hexagon/rpc/hexagon/rpc_server.cc
+++ b/src/runtime/hexagon/rpc/hexagon/rpc_server.cc
@@ -29,7 +29,6 @@ extern "C" {
 #include <stdlib.h>
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <algorithm>
 #include <fstream>
diff --git a/src/runtime/hexagon/rpc/simulator/session.cc 
b/src/runtime/hexagon/rpc/simulator/session.cc
index 3211b8d047..5eb7beab0f 100644
--- a/src/runtime/hexagon/rpc/simulator/session.cc
+++ b/src/runtime/hexagon/rpc/simulator/session.cc
@@ -19,7 +19,6 @@
 
 #include <HexagonWrapper.h>
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 // POSIX includes
 #include <dirent.h>
 #include <unistd.h>
diff --git a/src/runtime/meta_data.h b/src/runtime/meta_data.h
index b93db5a19c..51120c1f9e 100644
--- a/src/runtime/meta_data.h
+++ b/src/runtime/meta_data.h
@@ -26,9 +26,9 @@
 
 #include <dmlc/io.h>
 #include <dmlc/json.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <unordered_map>
diff --git a/src/runtime/metal/metal_common.h b/src/runtime/metal/metal_common.h
index e57907e06e..138d312dd4 100644
--- a/src/runtime/metal/metal_common.h
+++ b/src/runtime/metal/metal_common.h
@@ -30,10 +30,10 @@
 #import <Metal/MTLCommandQueue.h>
 #import <Metal/MTLDevice.h>
 #import <Metal/MTLLibrary.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <mutex>
diff --git a/src/runtime/metal/metal_module.h b/src/runtime/metal/metal_module.h
index d01523b1fa..e2705a7a80 100644
--- a/src/runtime/metal/metal_module.h
+++ b/src/runtime/metal/metal_module.h
@@ -24,7 +24,7 @@
 #ifndef TVM_RUNTIME_METAL_METAL_MODULE_H_
 #define TVM_RUNTIME_METAL_METAL_MODULE_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <memory>
 #include <string>
diff --git a/src/runtime/module.cc b/src/runtime/module.cc
index d16239079c..d2bc4b2c29 100644
--- a/src/runtime/module.cc
+++ b/src/runtime/module.cc
@@ -23,7 +23,6 @@
  */
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/module.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <cstring>
 #include <unordered_set>
diff --git a/src/runtime/opencl/opencl_common.h 
b/src/runtime/opencl/opencl_common.h
index 91dad2af82..dbef2f518f 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -24,12 +24,12 @@
 #ifndef TVM_RUNTIME_OPENCL_OPENCL_COMMON_H_
 #define TVM_RUNTIME_OPENCL_OPENCL_COMMON_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/memory/memory_manager.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/profiling.h>
 
 /* There are many OpenCL platforms that do not yet support OpenCL 2.0,
diff --git a/src/runtime/opencl/opencl_module.h 
b/src/runtime/opencl/opencl_module.h
index 22fc119e03..198adc6cb2 100644
--- a/src/runtime/opencl/opencl_module.h
+++ b/src/runtime/opencl/opencl_module.h
@@ -24,7 +24,7 @@
 #ifndef TVM_RUNTIME_OPENCL_OPENCL_MODULE_H_
 #define TVM_RUNTIME_OPENCL_OPENCL_MODULE_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <memory>
 #include <string>
diff --git a/src/runtime/pack_args.h b/src/runtime/pack_args.h
index 6fbbb05b7d..b77adda4c9 100644
--- a/src/runtime/pack_args.h
+++ b/src/runtime/pack_args.h
@@ -31,8 +31,8 @@
 #ifndef TVM_RUNTIME_PACK_ARGS_H_
 #define TVM_RUNTIME_PACK_ARGS_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <cstring>
 #include <vector>
diff --git a/src/runtime/packed_func.cc b/src/runtime/packed_func.cc
deleted file mode 100644
index 38146227f9..0000000000
--- a/src/runtime/packed_func.cc
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-/*
- * \file src/runtime/packed_func.cc
- * \brief Implementation of non-inlinable ffi::Function pieces.
- */
-#include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
-
-namespace tvm {
-namespace runtime {
-
-TVM_REGISTER_OBJECT_TYPE(ffi::FunctionObj);
-
-}  // namespace runtime
-}  // namespace tvm
diff --git a/src/runtime/profiling.cc b/src/runtime/profiling.cc
index 2a12fba0b0..bab1d50db6 100644
--- a/src/runtime/profiling.cc
+++ b/src/runtime/profiling.cc
@@ -23,9 +23,9 @@
  */
 
 #include <dmlc/json.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/c_backend_api.h>
 #include <tvm/runtime/data_type.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/profiling.h>
 #include <tvm/runtime/threading_backend.h>
 
@@ -495,7 +495,7 @@ String ReportNode::AsTable(bool sort, bool aggregate, bool 
compute_col_sums) con
       }
     }
     for (const auto& p : aggregates) {
-      std::unordered_map<String, Any> aggregated;
+      std::unordered_map<String, ffi::Any> aggregated;
       std::unordered_set<std::string> metrics;
       for (auto& call : calls) {
         for (auto& metric : call) {
@@ -803,46 +803,47 @@ 
TVM_FFI_REGISTER_GLOBAL("runtime.profiling.DeviceWrapper").set_body_typed([](Dev
 ffi::Function ProfileFunction(Module mod, std::string func_name, int 
device_type, int device_id,
                               int warmup_iters, Array<MetricCollector> 
collectors) {
   // Module::GetFunction is not const, so this lambda has to be mutable
-  return ffi::Function::FromPacked([=](const AnyView* args, int32_t num_args, 
Any* ret) mutable {
-    ffi::Function f = mod.GetFunction(func_name);
-    CHECK(f.defined()) << "There is no function called \"" << func_name << "\" 
in the module";
-    Device dev{static_cast<DLDeviceType>(device_type), device_id};
-
-    // warmup
-    for (int i = 0; i < warmup_iters; i++) {
-      f.CallPacked(args, num_args, ret);
-    }
-
-    for (auto& collector : collectors) {
-      collector->Init({DeviceWrapper(dev)});
-    }
-    std::vector<Map<String, ffi::Any>> results;
-    results.reserve(collectors.size());
-    std::vector<std::pair<MetricCollector, ObjectRef>> collector_data;
-    collector_data.reserve(collectors.size());
-    for (auto& collector : collectors) {
-      ObjectRef o = collector->Start(dev);
-      // If not defined, then the collector cannot time this device.
-      if (o.defined()) {
-        collector_data.push_back({collector, o});
-      }
-    }
+  return ffi::Function::FromPacked(
+      [=](const ffi::AnyView* args, int32_t num_args, ffi::Any* ret) mutable {
+        ffi::Function f = mod.GetFunction(func_name);
+        CHECK(f.defined()) << "There is no function called \"" << func_name << 
"\" in the module";
+        Device dev{static_cast<DLDeviceType>(device_type), device_id};
+
+        // warmup
+        for (int i = 0; i < warmup_iters; i++) {
+          f.CallPacked(args, num_args, ret);
+        }
 
-    // TODO(tkonolige): repeated calls if the runtime is small?
-    f.CallPacked(args, num_args, ret);
+        for (auto& collector : collectors) {
+          collector->Init({DeviceWrapper(dev)});
+        }
+        std::vector<Map<String, ffi::Any>> results;
+        results.reserve(collectors.size());
+        std::vector<std::pair<MetricCollector, ObjectRef>> collector_data;
+        collector_data.reserve(collectors.size());
+        for (auto& collector : collectors) {
+          ObjectRef o = collector->Start(dev);
+          // If not defined, then the collector cannot time this device.
+          if (o.defined()) {
+            collector_data.push_back({collector, o});
+          }
+        }
 
-    for (auto& kv : collector_data) {
-      results.push_back(kv.first->Stop(kv.second));
-    }
-    Map<String, ffi::Any> combined_results;
-    for (auto m : results) {
-      for (auto p : m) {
-        // assume that there is no shared metric name between collectors
-        combined_results.Set(p.first, p.second);
-      }
-    }
-    *ret = combined_results;
-  });
+        // TODO(tkonolige): repeated calls if the runtime is small?
+        f.CallPacked(args, num_args, ret);
+
+        for (auto& kv : collector_data) {
+          results.push_back(kv.first->Stop(kv.second));
+        }
+        Map<String, ffi::Any> combined_results;
+        for (auto m : results) {
+          for (auto p : m) {
+            // assume that there is no shared metric name between collectors
+            combined_results.Set(p.first, p.second);
+          }
+        }
+        *ret = combined_results;
+      });
 }
 
 TVM_FFI_REGISTER_GLOBAL("runtime.profiling.ProfileFunction")
@@ -869,8 +870,8 @@ ffi::Function WrapTimeEvaluator(ffi::Function pf, Device 
dev, int number, int re
 
   auto ftimer = [pf, dev, number, repeat, min_repeat_ms, 
limit_zero_time_iterations,
                  cooldown_interval_ms, repeats_to_cooldown, cache_flush_bytes,
-                 f_preproc](const AnyView* args, int num_args, Any* rv) 
mutable {
-    Any temp;
+                 f_preproc](const ffi::AnyView* args, int num_args, ffi::Any* 
rv) mutable {
+    ffi::Any temp;
     std::ostringstream os;
     // skip first time call, to activate lazy compilation components.
     pf.CallPacked(args, num_args, &temp);
diff --git a/src/runtime/relax_vm/attn_backend.h 
b/src/runtime/relax_vm/attn_backend.h
index dd8127525c..2eb9cf3d66 100644
--- a/src/runtime/relax_vm/attn_backend.h
+++ b/src/runtime/relax_vm/attn_backend.h
@@ -26,9 +26,9 @@
 #define TVM_RUNTIME_RELAX_VM_ATTN_BACKEND_H_
 
 #include <tvm/ffi/container/array.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/int_tuple.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <memory>
 #include <tuple>
diff --git a/src/runtime/relax_vm/builtin.cc b/src/runtime/relax_vm/builtin.cc
index f62bb14608..3d7904bd8f 100644
--- a/src/runtime/relax_vm/builtin.cc
+++ b/src/runtime/relax_vm/builtin.cc
@@ -29,7 +29,6 @@
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/memory/memory_manager.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/relax_vm/builtin.h>
 #include <tvm/runtime/relax_vm/bytecode.h>
 #include <tvm/runtime/relax_vm/vm.h>
@@ -113,7 +112,7 @@ 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.match_prim_value").set_body_typed(MatchPrimV
  *
  * \sa MatchShapeCode
  */
-void MatchShape(ffi::PackedArgs args, Any* rv) {
+void MatchShape(ffi::PackedArgs args, ffi::Any* rv) {
   // input shape the first argument can take in tensor or shape.
   ffi::Shape input_shape;
   if (auto opt_nd = args[0].as<NDArray>()) {
@@ -188,7 +187,7 @@ 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.make_prim_value").set_body_typed(MakePrimVal
  *
  * \sa MakeShapeCode
  */
-void MakeShape(ffi::PackedArgs args, Any* rv) {
+void MakeShape(ffi::PackedArgs args, ffi::Any* rv) {
   // NOTE: heap can be nullptr
   auto heap = args[0].try_cast<DLTensor*>();
   int64_t* heap_data = heap.has_value() ? static_cast<int64_t*>((*heap)->data) 
: nullptr;
@@ -219,8 +218,8 @@ 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.make_shape").set_body_packed(MakeShape);
  * \param dtype The expected content data type.
  * \param err_ctx Additional context if error occurs.
  */
-void CheckTensorInfo(ffi::PackedArgs args, Any* rv) {
-  AnyView arg = args[0];
+void CheckTensorInfo(ffi::PackedArgs args, ffi::Any* rv) {
+  ffi::AnyView arg = args[0];
   int ndim = args[1].cast<int>();
   DataType dtype;
   Optional<String> err_ctx;
@@ -278,7 +277,7 @@ 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.check_shape_info").set_body_typed(CheckShape
  * \param dtype Expected dtype of the PrimValue.  Can be DataType::Void() for 
unknown dtype.
  * \param err_ctx Additional context if error occurs.
  */
-void CheckPrimValueInfo(AnyView arg, DataType dtype, Optional<String> err_ctx) 
{
+void CheckPrimValueInfo(ffi::AnyView arg, DataType dtype, Optional<String> 
err_ctx) {
   if (auto opt_obj = arg.as<ObjectRef>()) {
     LOG(FATAL) << "TypeError: " << err_ctx.value_or("") << ", expected dtype " 
<< dtype
                << ", but received ObjectRef of type " << 
opt_obj.value()->GetTypeKey();
@@ -362,9 +361,9 @@ 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.alloc_tensor").set_body_method(&StorageObj::
 //  Closure function handling, calling convention
 //-------------------------------------------------
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.make_closure")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
       VMClosure clo = args[0].cast<VMClosure>();
-      std::vector<Any> saved_args;
+      std::vector<ffi::Any> saved_args;
       saved_args.resize(args.size() - 1);
       for (size_t i = 0; i < saved_args.size(); ++i) {
         saved_args[i] = args[i + 1];
@@ -374,7 +373,7 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.make_closure")
     });
 
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.invoke_closure")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
       // args[0]: vm; args[1]: closure; args[2, 3, ...]: function arguments
       VirtualMachine* vm = VirtualMachine::GetContextPtr(args[0]);
       ObjectRef vm_closure = args[1].cast<ObjectRef>();
@@ -382,12 +381,12 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.invoke_closure")
     });
 
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.call_tir_dyn")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
       ffi::Function func = args[0].cast<ffi::Function>();
       ffi::Shape to_unpack = args[args.size() - 1].cast<ffi::Shape>();
       size_t num_tensor_args = args.size() - 2;
 
-      std::vector<AnyView> packed_args(num_tensor_args + to_unpack.size());
+      std::vector<ffi::AnyView> packed_args(num_tensor_args + 
to_unpack.size());
       std::copy(args.data() + 1, args.data() + args.size() - 1, 
packed_args.data());
 
       for (size_t i = 0; i < to_unpack.size(); ++i) {
@@ -401,7 +400,7 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.call_tir_dyn")
 //-------------------------------------
 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.shape_of").set_body_method(&NDArray::Shape);
 
-TVM_FFI_REGISTER_GLOBAL("vm.builtin.copy").set_body_typed([](Any a) -> Any { 
return a; });
+TVM_FFI_REGISTER_GLOBAL("vm.builtin.copy").set_body_typed([](ffi::Any a) -> 
ffi::Any { return a; });
 
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.reshape")
     .set_body_typed([](NDArray data, ffi::Shape new_shape) {
@@ -423,7 +422,7 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.to_device")
  * \param cond The condition
  * \return Bool
  */
-bool ReadIfCond(AnyView cond) {
+bool ReadIfCond(ffi::AnyView cond) {
   if (auto opt_int = cond.try_cast<bool>()) {
     return opt_int.value();
   }
@@ -468,7 +467,7 @@ 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.read_if_cond").set_body_typed(ReadIfCond);
 //-------------------------------------
 
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.invoke_debug_func")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) -> void {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) -> void {
       ICHECK_GE(args.size(), 3);
       int num_args = args.size() - 3;
       ObjectRef io_effect = args[0].cast<ObjectRef>();
@@ -479,7 +478,7 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.invoke_debug_func")
                                     << "Use the decorator 
`@tvm.register_func(\"" << debug_func_name
                                     << "\")` to register it.";
       String line_info = args[2].cast<String>();
-      std::vector<AnyView> call_args(num_args + 1);
+      std::vector<ffi::AnyView> call_args(num_args + 1);
       {
         call_args[0] = line_info;
         for (int i = 0; i < num_args; ++i) {
@@ -494,20 +493,21 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.invoke_debug_func")
 //  Data structure API
 //-------------------------------------
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.tuple_getitem")
-    .set_body_typed([](Array<Any> arr, int64_t index) { return arr[index]; });
+    .set_body_typed([](Array<ffi::Any> arr, int64_t index) { return 
arr[index]; });
 
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.tuple_reset_item")
     .set_body_typed([](const ffi::ArrayObj* arr, int64_t index) {
       const_cast<ffi::ArrayObj*>(arr)->SetItem(index, nullptr);
     });
 
-TVM_FFI_REGISTER_GLOBAL("vm.builtin.make_tuple").set_body_packed([](ffi::PackedArgs
 args, Any* rv) {
-  Array<Any> arr;
-  for (int i = 0; i < args.size(); ++i) {
-    arr.push_back(args[i]);
-  }
-  *rv = arr;
-});
+TVM_FFI_REGISTER_GLOBAL("vm.builtin.make_tuple")
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
+      Array<ffi::Any> arr;
+      for (int i = 0; i < args.size(); ++i) {
+        arr.push_back(args[i]);
+      }
+      *rv = arr;
+    });
 
 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.tensor_to_shape").set_body_typed([](NDArray 
data) {
   NDArray arr = data;
@@ -613,7 +613,7 @@ int TVMBackendAnyListSetPackedArg(void* anylist, int index, 
TVMFFIAny* args, int
 int TVMBackendAnyListResetItem(void* anylist, int index) {
   using namespace tvm::runtime;
   TVM_FFI_SAFE_CALL_BEGIN();
-  auto* list = static_cast<Any*>(anylist);
+  auto* list = static_cast<tvm::ffi::Any*>(anylist);
   list[index] = nullptr;
   TVM_FFI_SAFE_CALL_END();
 }
@@ -622,7 +622,7 @@ int TVMBackendAnyListMoveFromPackedReturn(void* anylist, 
int index, TVMFFIAny* a
                                           int ret_offset) {
   using namespace tvm::runtime;
   TVM_FFI_SAFE_CALL_BEGIN();
-  auto* list = static_cast<Any*>(anylist);
+  auto* list = static_cast<tvm::ffi::Any*>(anylist);
   list[index] = 
tvm::ffi::details::AnyUnsafe::MoveTVMFFIAnyToAny(std::move(args[ret_offset]));
   TVM_FFI_SAFE_CALL_END();
 }
diff --git a/src/runtime/relax_vm/cuda/cuda_graph_builtin.cc 
b/src/runtime/relax_vm/cuda/cuda_graph_builtin.cc
index 2fdf514b0e..d3484cbc7b 100644
--- a/src/runtime/relax_vm/cuda/cuda_graph_builtin.cc
+++ b/src/runtime/relax_vm/cuda/cuda_graph_builtin.cc
@@ -24,7 +24,6 @@
 
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/relax_vm/vm.h>
 
 #include "../../../support/utils.h"
diff --git a/src/runtime/relax_vm/hexagon/builtin.cc 
b/src/runtime/relax_vm/hexagon/builtin.cc
index d2d05a0e82..89f1708b28 100644
--- a/src/runtime/relax_vm/hexagon/builtin.cc
+++ b/src/runtime/relax_vm/hexagon/builtin.cc
@@ -24,7 +24,6 @@
 
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/device_api.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/relax_vm/vm.h>
 
 #include "../../hexagon/hexagon_device_api.h"
diff --git a/src/runtime/relax_vm/ndarray_cache_support.cc 
b/src/runtime/relax_vm/ndarray_cache_support.cc
index c69b48ccd6..7341507e9a 100644
--- a/src/runtime/relax_vm/ndarray_cache_support.cc
+++ b/src/runtime/relax_vm/ndarray_cache_support.cc
@@ -268,7 +268,7 @@ class NDArrayCache {
 
 
TVM_FFI_REGISTER_GLOBAL("vm.builtin.ndarray_cache.get").set_body_typed(NDArrayCache::Get);
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.ndarray_cache.update")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
       CHECK(args.size() == 2 || args.size() == 3);
       String name = args[0].cast<String>();
       bool is_override = args.size() == 2 ? false : args[2].cast<bool>();
@@ -362,7 +362,7 @@ TVM_FFI_REGISTER_GLOBAL("vm.builtin.param_array_from_cache")
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.param_array_from_cache_by_name")
     .set_body_typed(ParamModuleNode::GetParamByName);
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.param_array_from_cache_by_name_unpacked")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
       Array<String> names;
       names.reserve(args.size());
       for (int i = 0; i < args.size(); ++i) {
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc 
b/src/runtime/relax_vm/paged_kv_cache.cc
index be9cd59557..2f21e6978a 100644
--- a/src/runtime/relax_vm/paged_kv_cache.cc
+++ b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -2285,7 +2285,7 @@ TVM_REGISTER_OBJECT_TYPE(PagedAttentionKVCacheObj);
 //-------------------------------------------------
 
 TVM_FFI_REGISTER_GLOBAL("vm.builtin.paged_attention_kv_cache_create")
-    .set_body_packed([](ffi::PackedArgs args, Any* rv) {
+    .set_body_packed([](ffi::PackedArgs args, ffi::Any* rv) {
       // Todo: cuda graph arg
       CHECK(args.size() == 28 || args.size() == 29)
           << "Invalid number of KV cache constructor args: " << args.size();
diff --git a/src/runtime/relax_vm/vm.cc b/src/runtime/relax_vm/vm.cc
index 0fef2e4c6d..8d0b928f85 100644
--- a/src/runtime/relax_vm/vm.cc
+++ b/src/runtime/relax_vm/vm.cc
@@ -21,9 +21,9 @@
  * \file src/runtime/relax_vm/vm.cc
  */
 #include <dlpack/dlpack.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/memory/memory_manager.h>
 #include <tvm/runtime/nvtx.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/profiling.h>
 #include <tvm/runtime/relax_vm/vm.h>
 
@@ -52,9 +52,9 @@ VMClosure::VMClosure(String func_name, ffi::Function impl) {
  * \param last_args The arguments to bound to in the end of the function.
  * \note The new function takes in arguments and append the last_args in the 
end.
  */
-ffi::Function VMClosure::BindLastArgs(ffi::Function func, std::vector<Any> 
last_args) {
+ffi::Function VMClosure::BindLastArgs(ffi::Function func, 
std::vector<ffi::Any> last_args) {
   return ffi::Function([func, last_args](ffi::PackedArgs args, ffi::Any* rv) {
-    std::vector<AnyView> packed_args(args.size() + last_args.size());
+    std::vector<ffi::AnyView> packed_args(args.size() + last_args.size());
     std::copy(args.data(), args.data() + args.size(), packed_args.data());
     for (size_t i = 0; i < last_args.size(); ++i) {
       packed_args[args.size() + i] = last_args[i];
@@ -68,14 +68,14 @@ ffi::Function VMClosure::BindLastArgs(ffi::Function func, 
std::vector<Any> last_
 //-----------------------------------------------------------
 // Use the args after `starting_arg_idx` as a series of indices into `obj`,
 // indexing into nested Array and returning the final indexed object.
-Any IndexIntoNestedObject(Any obj, ffi::PackedArgs args, int starting_arg_idx) 
{
+ffi::Any IndexIntoNestedObject(ffi::Any obj, ffi::PackedArgs args, int 
starting_arg_idx) {
   for (int i = starting_arg_idx; i < args.size(); i++) {
     // the object must be an Array to be able to index into it
     if (!obj.as<ffi::ArrayObj>()) {
       LOG(FATAL) << "ValueError: Attempted to index into an object that is not 
an Array.";
     }
     int index = args[i].cast<int>();
-    auto arr = Downcast<ffi::Array<Any>>(obj);
+    auto arr = Downcast<ffi::Array<ffi::Any>>(obj);
     // make sure the index is in bounds
     if (index >= static_cast<int>(arr.size())) {
       LOG(FATAL) << "IndexError: Invalid index (" << index << " >= " << 
arr.size() << ").";
@@ -110,12 +110,12 @@ Any ConvertObjectToDevice(Any src, const Device& dev, 
Allocator* alloc) {
   }
 }
 
-ffi::Any ConvertArgToDevice(AnyView input, Device dev, Allocator* alloc) {
+ffi::Any ConvertArgToDevice(ffi::AnyView input, Device dev, Allocator* alloc) {
   // in terms of memory-behavior.
   // To be extra careful, we copy DLTensor.
   // The developer can still explicitly allocate NDArray
   // in TVM Native API or NDArray::FromDLPack to regain zero copy behavior.
-  Any ret;
+  ffi::Any ret;
   if (auto opt_obj = input.as<ObjectRef>()) {
     ret = ConvertObjectToDevice(opt_obj.value(), dev, alloc);
   } else if (auto opt_dltensor = input.as<DLTensor*>()) {
@@ -131,7 +131,7 @@ ffi::Any ConvertArgToDevice(AnyView input, Device dev, 
Allocator* alloc) {
 }
 
 ffi::Any ConvertRegToDevice(ffi::Any input, Device dev, Allocator* alloc) {
-  Any ret;
+  ffi::Any ret;
   if (auto opt_obj = input.as<ObjectRef>()) {
     ret = ConvertObjectToDevice(opt_obj.value(), dev, alloc);
   } else {
@@ -163,7 +163,7 @@ struct VMFrame {
   /*! \brief Register in caller's frame to put return value */
   RegName caller_return_register;
   /*! \brief Temporary argument tcode stack for packed func call. */
-  std::vector<AnyView> call_args;
+  std::vector<ffi::AnyView> call_args;
 
   VMFrame(Index pc, Index register_file_size)
       : return_pc(pc), register_file(register_file_size), 
caller_return_register(0) {}
@@ -534,7 +534,7 @@ void VirtualMachineImpl::InvokeClosurePacked(const 
ObjectRef& closure_or_packedf
   auto* clo = closure_or_packedfunc.as<VMClosureObj>();
   ICHECK(clo != nullptr) << "Function expects a closure or ffi::Function ";
 
-  std::vector<AnyView> packed_args(args.size() + 1);
+  std::vector<ffi::AnyView> packed_args(args.size() + 1);
   // per convention, ctx ptr must be VirtualMachine* casted to void.
   // this and VirtualMachine* may or maynot be the same
   // do first cast to VirtualMachine* then to void*
@@ -554,7 +554,7 @@ RegType VirtualMachineImpl::InvokeClosureInternal(const 
ObjectRef& closure_or_pa
   auto* clo = closure_or_packed.as<VMClosureObj>();
   int clo_offset = clo != nullptr ? 1 : 0;
 
-  std::vector<AnyView> packed_args(args.size() + clo_offset);
+  std::vector<ffi::AnyView> packed_args(args.size() + clo_offset);
 
   if (clo != nullptr) {
     packed_args[0] = static_cast<void*>(static_cast<VirtualMachine*>(this));
@@ -726,7 +726,7 @@ void VirtualMachineImpl::RunInstrCall(VMFrame* curr_frame, 
Instruction instr) {
 
   // NOTE: no changes and resize to those vector ref(otherwise can leads to 
segfault)
   //       in the remainder part of the function.
-  std::vector<AnyView>& call_args = curr_frame->call_args;
+  std::vector<ffi::AnyView>& call_args = curr_frame->call_args;
 
   for (Index i = 0; i < instr.num_args; ++i) {
     Instruction::Arg arg = instr.args[i];
@@ -768,7 +768,7 @@ void VirtualMachineImpl::RunInstrCall(VMFrame* curr_frame, 
Instruction instr) {
     call_args[2] = true;
     call_args[3] = nullptr;
 
-    Any rv;
+    ffi::Any rv;
     // store dtype to str since py callback cannot handle dtype atm.
     std::vector<std::unique_ptr<std::string>> temp_dtype;
     for (int i = 0; i < instr.num_args; ++i) {
@@ -906,7 +906,7 @@ void VirtualMachineImpl::_SetInstrument(ffi::PackedArgs 
args, ffi::Any* rv) {
 void VirtualMachineImpl::_GetOutputArity(ffi::PackedArgs args, ffi::Any* rv) {
   std::string func_name = args[0].cast<std::string>();
   RegType out = LookupVMOutput(func_name);
-  Any obj = IndexIntoNestedObject(out, args, 1);
+  ffi::Any obj = IndexIntoNestedObject(out, args, 1);
   if (const auto* arr = obj.as<ffi::ArrayObj>()) {
     *rv = static_cast<int>(arr->size());
   } else {
@@ -917,7 +917,7 @@ void VirtualMachineImpl::_GetOutputArity(ffi::PackedArgs 
args, ffi::Any* rv) {
 void VirtualMachineImpl::_GetOutput(ffi::PackedArgs args, ffi::Any* rv) {
   std::string func_name = args[0].cast<std::string>();
   RegType out = LookupVMOutput(func_name);
-  Any obj = IndexIntoNestedObject(out, args, 1);
+  ffi::Any obj = IndexIntoNestedObject(out, args, 1);
   if (obj.as<ffi::ArrayObj>()) {
     LOG(FATAL) << "ValueError: `get_output` cannot return a tuple for RPC 
compatibility. "
                   "Please specify another index argument.";
diff --git a/src/runtime/rocm/rocm_common.h b/src/runtime/rocm/rocm_common.h
index b258e37508..ec3e744d30 100644
--- a/src/runtime/rocm/rocm_common.h
+++ b/src/runtime/rocm/rocm_common.h
@@ -26,7 +26,7 @@
 
 #include <hip/hip_runtime_api.h>
 #include <hip/hip_version.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <string>
 
diff --git a/src/runtime/rpc/rpc_channel.cc b/src/runtime/rpc/rpc_channel.cc
index d757eb718a..50f7195a22 100644
--- a/src/runtime/rpc/rpc_channel.cc
+++ b/src/runtime/rpc/rpc_channel.cc
@@ -22,6 +22,8 @@
  */
 #include "rpc_channel.h"
 
+#include <tvm/runtime/logging.h>
+
 #include <string>
 
 namespace tvm {
diff --git a/src/runtime/rpc/rpc_channel.h b/src/runtime/rpc/rpc_channel.h
index 62af2d92a8..3c8f6b404c 100644
--- a/src/runtime/rpc/rpc_channel.h
+++ b/src/runtime/rpc/rpc_channel.h
@@ -24,7 +24,7 @@
 #ifndef TVM_RUNTIME_RPC_RPC_CHANNEL_H_
 #define TVM_RUNTIME_RPC_RPC_CHANNEL_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <utility>
 
diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc
index 9e54223a09..7ee7214056 100644
--- a/src/runtime/rpc/rpc_endpoint.cc
+++ b/src/runtime/rpc/rpc_endpoint.cc
@@ -26,7 +26,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/serializer.h>
 
 #include <algorithm>
diff --git a/src/runtime/rpc/rpc_endpoint.h b/src/runtime/rpc/rpc_endpoint.h
index 5d94aed470..195adef053 100644
--- a/src/runtime/rpc/rpc_endpoint.h
+++ b/src/runtime/rpc/rpc_endpoint.h
@@ -24,7 +24,7 @@
 #ifndef TVM_RUNTIME_RPC_RPC_ENDPOINT_H_
 #define TVM_RUNTIME_RPC_RPC_ENDPOINT_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <memory>
 #include <mutex>
diff --git a/src/runtime/rpc/rpc_local_session.cc 
b/src/runtime/rpc/rpc_local_session.cc
index 1769ed077f..a64bbb7132 100644
--- a/src/runtime/rpc/rpc_local_session.cc
+++ b/src/runtime/rpc/rpc_local_session.cc
@@ -25,6 +25,7 @@
 
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/device_api.h>
+#include <tvm/runtime/ndarray.h>
 
 #include <memory>
 #include <vector>
@@ -120,7 +121,7 @@ void LocalSession::CopyToRemote(void* from_bytes, DLTensor* 
to, uint64_t nbytes)
 }
 
 void LocalSession::CopyFromRemote(DLTensor* from, void* to_bytes, uint64_t 
nbytes) {
-  ICHECK_EQ(nbytes, GetDataSize(*from));
+  ICHECK_EQ(nbytes, ffi::GetDataSize(*from));
   DLTensor to;
   to.data = to_bytes;
   to.device = {kDLCPU, 0};
diff --git a/src/runtime/rpc/rpc_local_session.h 
b/src/runtime/rpc/rpc_local_session.h
index 4019552ebc..9035b486c9 100644
--- a/src/runtime/rpc/rpc_local_session.h
+++ b/src/runtime/rpc/rpc_local_session.h
@@ -24,8 +24,8 @@
 #ifndef TVM_RUNTIME_RPC_RPC_LOCAL_SESSION_H_
 #define TVM_RUNTIME_RPC_RPC_LOCAL_SESSION_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/device_api.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <functional>
 #include <string>
diff --git a/src/runtime/rpc/rpc_session.cc b/src/runtime/rpc/rpc_session.cc
index 76e07e00fb..ace9cf9b94 100644
--- a/src/runtime/rpc/rpc_session.cc
+++ b/src/runtime/rpc/rpc_session.cc
@@ -23,8 +23,8 @@
  */
 #include "rpc_session.h"
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/device_api.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <array>
 #include <mutex>
@@ -35,7 +35,7 @@ namespace runtime {
 bool RPCSession::IsAsync() const { return false; }
 
 void RPCSession::SendException(FAsyncCallback callback, const char* msg) {
-  AnyView packed_args[1] = {msg};
+  ffi::AnyView packed_args[1] = {msg};
   callback(RPCCode::kException, ffi::PackedArgs(packed_args, 1));
 }
 
@@ -51,7 +51,7 @@ void RPCSession::AsyncCallFunc(PackedFuncHandle func, 
ffi::PackedArgs packed_arg
 
 void RPCSession::AsyncCopyToRemote(void* local_from_bytes, DLTensor* 
remote_to, uint64_t nbytes,
                                    RPCSession::FAsyncCallback callback) {
-  AnyView packed_args[1] = {nullptr};
+  ffi::AnyView packed_args[1] = {nullptr};
 
   try {
     this->CopyToRemote(local_from_bytes, remote_to, nbytes);
@@ -63,7 +63,7 @@ void RPCSession::AsyncCopyToRemote(void* local_from_bytes, 
DLTensor* remote_to,
 
 void RPCSession::AsyncCopyFromRemote(DLTensor* remote_from, void* 
local_to_bytes, uint64_t nbytes,
                                      RPCSession::FAsyncCallback callback) {
-  AnyView packed_args[1] = {nullptr};
+  ffi::AnyView packed_args[1] = {nullptr};
 
   try {
     this->CopyFromRemote(remote_from, local_to_bytes, nbytes);
diff --git a/src/runtime/rpc/rpc_session.h b/src/runtime/rpc/rpc_session.h
index 271e26dfd0..c0ec2067eb 100644
--- a/src/runtime/rpc/rpc_session.h
+++ b/src/runtime/rpc/rpc_session.h
@@ -24,8 +24,10 @@
 #ifndef TVM_RUNTIME_RPC_RPC_SESSION_H_
 #define TVM_RUNTIME_RPC_RPC_SESSION_H_
 
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/device_api.h>
-#include <tvm/runtime/packed_func.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/object.h>
 
 #include <functional>
 #include <memory>
diff --git a/src/runtime/spirv/spirv_shader.h b/src/runtime/spirv/spirv_shader.h
index d194f70629..06b331d333 100644
--- a/src/runtime/spirv/spirv_shader.h
+++ b/src/runtime/spirv/spirv_shader.h
@@ -20,10 +20,11 @@
 #ifndef TVM_RUNTIME_SPIRV_SPIRV_SHADER_H_
 #define TVM_RUNTIME_SPIRV_SPIRV_SHADER_H_
 
+#include <dmlc/io.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <vector>
 
diff --git a/src/runtime/static_library.cc b/src/runtime/static_library.cc
index 3eae0cb739..08beb8cbc5 100644
--- a/src/runtime/static_library.cc
+++ b/src/runtime/static_library.cc
@@ -27,7 +27,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/ffi/memory.h>
 #include <tvm/runtime/module.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <iostream>
 
diff --git a/src/runtime/thread_pool.cc b/src/runtime/thread_pool.cc
index e3a3a0a8fc..d266fb7da8 100644
--- a/src/runtime/thread_pool.cc
+++ b/src/runtime/thread_pool.cc
@@ -27,7 +27,6 @@
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/c_backend_api.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/threading_backend.h>
 #if TVM_THREADPOOL_USE_OPENMP
 #include <omp.h>
diff --git a/src/runtime/thread_storage_scope.h 
b/src/runtime/thread_storage_scope.h
index 049e6467d1..914fe67819 100644
--- a/src/runtime/thread_storage_scope.h
+++ b/src/runtime/thread_storage_scope.h
@@ -24,7 +24,7 @@
 #ifndef TVM_RUNTIME_THREAD_STORAGE_SCOPE_H_
 #define TVM_RUNTIME_THREAD_STORAGE_SCOPE_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 
 #include <string>
 #include <vector>
diff --git a/src/runtime/vulkan/vulkan_common.h 
b/src/runtime/vulkan/vulkan_common.h
index c1961d8065..fb4776c98a 100644
--- a/src/runtime/vulkan/vulkan_common.h
+++ b/src/runtime/vulkan/vulkan_common.h
@@ -24,7 +24,6 @@
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
-#include <tvm/runtime/packed_func.h>
 #include <vulkan/vulkan.h>
 
 #include <memory>
diff --git a/src/target/datatype/registry.cc b/src/target/datatype/registry.cc
index 2c1fc84d40..88f96b6a70 100644
--- a/src/target/datatype/registry.cc
+++ b/src/target/datatype/registry.cc
@@ -19,6 +19,7 @@
 #include "registry.h"
 
 #include <tvm/ffi/function.h>
+#include <tvm/runtime/data_type.h>
 
 namespace tvm {
 namespace datatype {
diff --git a/src/target/datatype/registry.h b/src/target/datatype/registry.h
index eba7739a6b..b1a1a4a7f5 100644
--- a/src/target/datatype/registry.h
+++ b/src/target/datatype/registry.h
@@ -21,7 +21,6 @@
 #define TVM_TARGET_DATATYPE_REGISTRY_H_
 
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <unordered_map>
diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
index e5b2fc47ec..ed70d86926 100644
--- a/src/target/llvm/llvm_module.cc
+++ b/src/target/llvm/llvm_module.cc
@@ -62,7 +62,6 @@
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/object.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/support/with.h>
 #include <tvm/target/codegen.h>
 #include <tvm/target/target.h>
diff --git a/src/target/source/source_module.cc 
b/src/target/source/source_module.cc
index 054edd861a..5e1f132fb5 100644
--- a/src/target/source/source_module.cc
+++ b/src/target/source/source_module.cc
@@ -26,7 +26,6 @@
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <algorithm>
 #include <functional>
diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h
index 057007be72..5df779c595 100644
--- a/src/target/spirv/ir_builder.h
+++ b/src/target/spirv/ir_builder.h
@@ -24,7 +24,7 @@
 #ifndef TVM_TARGET_SPIRV_IR_BUILDER_H_
 #define TVM_TARGET_SPIRV_IR_BUILDER_H_
 
-#include <tvm/runtime/packed_func.h>
+#include <tvm/ffi/function.h>
 #include <tvm/tir/expr.h>
 
 // clang-format off
diff --git a/src/tir/transforms/make_packed_api.cc 
b/src/tir/transforms/make_packed_api.cc
index 83ce75cead..340e018a8d 100644
--- a/src/tir/transforms/make_packed_api.cc
+++ b/src/tir/transforms/make_packed_api.cc
@@ -22,6 +22,7 @@
  */
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/device_api.h>
+#include <tvm/runtime/module.h>
 #include <tvm/target/target.h>
 #include <tvm/tir/analysis.h>
 #include <tvm/tir/buffer.h>
diff --git a/src/topi/broadcast.cc b/src/topi/broadcast.cc
index 2a868145c9..1ee85e7b8c 100644
--- a/src/topi/broadcast.cc
+++ b/src/topi/broadcast.cc
@@ -22,7 +22,6 @@
  * \file broadcast.cc
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/broadcast.h>
 #include <tvm/topi/utils.h>
 
diff --git a/src/topi/elemwise.cc b/src/topi/elemwise.cc
index 05e59b9713..13947abcf6 100644
--- a/src/topi/elemwise.cc
+++ b/src/topi/elemwise.cc
@@ -22,7 +22,6 @@
  * \file elemwise.cc
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/elemwise.h>
 
 namespace tvm {
diff --git a/src/topi/nn.cc b/src/topi/nn.cc
index 68ba43090a..7fef93550d 100644
--- a/src/topi/nn.cc
+++ b/src/topi/nn.cc
@@ -22,7 +22,6 @@
  * \file nn.cc
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/nn.h>
 #include <tvm/topi/nn/bias_add.h>
 #include <tvm/topi/nn/bnn.h>
diff --git a/src/topi/reduction.cc b/src/topi/reduction.cc
index 1720ddd602..f8920bdefd 100644
--- a/src/topi/reduction.cc
+++ b/src/topi/reduction.cc
@@ -22,7 +22,6 @@
  * \file reduction.cc
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/reduction.h>
 #include <tvm/topi/utils.h>
 
diff --git a/src/topi/transform.cc b/src/topi/transform.cc
index 50aa506382..5826fdac86 100644
--- a/src/topi/transform.cc
+++ b/src/topi/transform.cc
@@ -22,7 +22,6 @@
  * \file transform.cc
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/einsum.h>
 #include <tvm/topi/transform.h>
 #include <tvm/topi/utils.h>
diff --git a/src/topi/utils.cc b/src/topi/utils.cc
index 66da512a66..9a668ad2ac 100644
--- a/src/topi/utils.cc
+++ b/src/topi/utils.cc
@@ -23,7 +23,6 @@
  */
 
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/detail/tensor_utils.h>
 
 namespace tvm {
diff --git a/src/topi/vision.cc b/src/topi/vision.cc
index 844f8f9459..57d9362680 100644
--- a/src/topi/vision.cc
+++ b/src/topi/vision.cc
@@ -22,7 +22,6 @@
  * \file vision.cc
  */
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 #include <tvm/topi/vision/reorg.h>
 
 namespace tvm {
diff --git a/tests/cpp-runtime/hexagon/run_all_tests.cc 
b/tests/cpp-runtime/hexagon/run_all_tests.cc
index 313b149e09..cf8160971a 100644
--- a/tests/cpp-runtime/hexagon/run_all_tests.cc
+++ b/tests/cpp-runtime/hexagon/run_all_tests.cc
@@ -19,7 +19,6 @@
 
 #include <gtest/gtest.h>
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <vector>
diff --git a/tests/cpp-runtime/hexagon/run_unit_tests.cc 
b/tests/cpp-runtime/hexagon/run_unit_tests.cc
index 9b55151638..a4c613b411 100644
--- a/tests/cpp-runtime/hexagon/run_unit_tests.cc
+++ b/tests/cpp-runtime/hexagon/run_unit_tests.cc
@@ -19,7 +19,6 @@
 
 #include <gtest/gtest.h>
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 #include <vector>
diff --git a/tests/cpp/llvm_codegen_registry_test.cc 
b/tests/cpp/llvm_codegen_registry_test.cc
index 49457fd0da..b5cea29c6b 100644
--- a/tests/cpp/llvm_codegen_registry_test.cc
+++ b/tests/cpp/llvm_codegen_registry_test.cc
@@ -22,7 +22,6 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <tvm/ffi/function.h>
-#include <tvm/runtime/packed_func.h>
 
 #include <string>
 
diff --git a/tests/python/relax/frontend_nn_extern_module.cc 
b/tests/python/relax/frontend_nn_extern_module.cc
index 09adbe9780..1bac39b350 100644
--- a/tests/python/relax/frontend_nn_extern_module.cc
+++ b/tests/python/relax/frontend_nn_extern_module.cc
@@ -21,8 +21,8 @@
  * \brief Testing code to be compiled by Relax nn.SourceModule
  */
 #include <dlpack/dlpack.h>
+#include <tvm/ffi/function.h>
 #include <tvm/runtime/data_type.h>
-#include <tvm/runtime/packed_func.h>
 
 namespace {
 
@@ -65,5 +65,5 @@ int _test_sym(DLTensor* a, DLTensor* b, DLTensor* c) {
   return 0;
 }
 }  // namespace
-TVM_DLL_EXPORT_TYPED_FUNC(ext_scalar_add, _scalar_add);
-TVM_DLL_EXPORT_TYPED_FUNC(ext_test_sym, _test_sym);
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(ext_scalar_add, _scalar_add);
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(ext_test_sym, _test_sym);

Reply via email to