https://github.com/KseniyaTikhomirova updated https://github.com/llvm/llvm-project/pull/189068
>From 03a1c675484bf83746ac9cb9b9580e2f3bed238f Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" <[email protected]> Date: Wed, 25 Mar 2026 05:18:49 -0700 Subject: [PATCH 1/5] [libsycl] add single_task Signed-off-by: Tikhomirova, Kseniya <[email protected]> addition to single task Signed-off-by: Tikhomirova, Kseniya <[email protected]> --- libsycl/docs/index.rst | 4 + .../sycl/__impl/detail/arg_wrapper.hpp | 135 ++++++++++++++++++ .../sycl/__impl/detail/unified_range_view.hpp | 52 +++++++ libsycl/include/sycl/__impl/queue.hpp | 96 +++++++++++++ libsycl/src/detail/queue_impl.cpp | 112 +++++++++++++++ libsycl/src/detail/queue_impl.hpp | 37 +++++ libsycl/src/queue.cpp | 19 +++ libsycl/test/basic/get_backend.cpp | 54 +++++++ libsycl/test/basic/submit_fn_ptr.cpp | 18 +++ 9 files changed, 527 insertions(+) create mode 100644 libsycl/include/sycl/__impl/detail/arg_wrapper.hpp create mode 100644 libsycl/include/sycl/__impl/detail/unified_range_view.hpp create mode 100644 libsycl/test/basic/get_backend.cpp create mode 100644 libsycl/test/basic/submit_fn_ptr.cpp diff --git a/libsycl/docs/index.rst b/libsycl/docs/index.rst index 9aa36b4a54c57..5961eeeedcedb 100644 --- a/libsycl/docs/index.rst +++ b/libsycl/docs/index.rst @@ -113,6 +113,10 @@ TODO for added SYCL classes * to implement submit & copy with accessors (low priority) * get_info & properties * ctors that accepts context (blocked by lack of liboffload support) + * nd_range kernel submissions + * cross-context events wait (host tasks are needed) + * implement check if lambda arguments are device copyable (requires clang support of corresponding builtins) + * kernel instantiating on host (debugging purposes) * ``property_list``: to fully implement and integrate with existing SYCL runtime classes supporting it * usm allocations: diff --git a/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp b/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp new file mode 100644 index 0000000000000..96f60a3121787 --- /dev/null +++ b/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp @@ -0,0 +1,135 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains helper functions used to wrap kernel arguments to +/// typeless collection. +/// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP +#define _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP + +#include <sycl/__impl/detail/config.hpp> +#include <sycl/__impl/exception.hpp> + +#include <cassert> +#include <memory> + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +namespace detail { + +/// Base class is needed for unification, we pass arguments through ABI +/// boundary. +class ArgWrapperBase { +public: + ArgWrapperBase(const ArgWrapperBase &) = delete; + ArgWrapperBase &operator=(const ArgWrapperBase &) = delete; + virtual ~ArgWrapperBase() = default; + + virtual void deepCopy() = 0; + virtual size_t getSize() const = 0; + virtual const void *getPtr() const = 0; + +protected: + ArgWrapperBase() = default; +}; + +/// Helps to manage arguments in a typeless way. +template <typename Type> class ArgWrapper : public ArgWrapperBase { +public: + ArgWrapper(Type &Arg) { Ptr = &Arg; } + ArgWrapper(const ArgWrapper &) = delete; + ArgWrapper &operator=(const ArgWrapper &) = delete; + + /// \return size of argument in bytes. + size_t getSize() const override { return sizeof(Type); } + + /// Returns raw pointer to the corresponding argument. + /// No copy is done by this method. It works with pointer to the memory whose + /// existence must be guaranteed by class user or with copy that must be + /// explicitly requested by class user via deepCopy method. + /// \return pointer to the argument. + const void *getPtr() const override { + assert((!DeepCopy || (DeepCopy.get()) == Ptr) && + "Incorrect state of copied argument"); + return Ptr; + } + + /// Copies agrument to RT owned storage. + void deepCopy() override { + if (DeepCopy) + return; + + DeepCopy.reset(new Type(*Ptr)); + Ptr = DeepCopy.get(); + } + +private: + Type *Ptr; + std::unique_ptr<Type> DeepCopy; +}; + +/// Collection of arguments. Provides functionality to accumulate all arguments +/// data to pass through ABI boundary. +class ArgCollection { +public: + /// Adds argument to the collection. Don't own the memory. Argument lifetime + /// must be guaranteed by class user. If extended lifetime is needed (copy), + /// deepCopy must be called. + template <typename Type> void addArg(Type &Arg) { + MArgs.emplace_back(new ArgWrapper(Arg)); + } + + /// \return array of argument pointers. + const void **getArgPtrArray() { + if (MPtrs.size() != MArgs.size()) { + MPtrs.clear(); + MPtrs.reserve(MArgs.size()); + auto it = MArgs.cbegin(); + while (it != MArgs.cend()) { + MPtrs.push_back((*it++)->getPtr()); + } + } + return MPtrs.data(); + } + + /// \return array of argument sizes. + int64_t *getSizesArray() { + if (MSizes.size() != MArgs.size()) { + MSizes.clear(); + MSizes.reserve(MArgs.size()); + auto it = MArgs.cbegin(); + while (it != MArgs.cend()) { + MSizes.push_back(static_cast<int64_t>((*it++)->getSize())); + } + } + return MSizes.data(); + } + + /// \return count of arguments in collection. + size_t getArgCount() { return MArgs.size(); } + + /// Extends arguments lifetime by doing copy of all arguments. + void deepCopy() { + for (auto &Arg : MArgs) + Arg->deepCopy(); + } + +private: + std::vector<std::unique_ptr<ArgWrapperBase>> MArgs; + std::vector<int64_t> MSizes; + std::vector<const void *> MPtrs; +}; + +} // namespace detail + +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP diff --git a/libsycl/include/sycl/__impl/detail/unified_range_view.hpp b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp new file mode 100644 index 0000000000000..afa613fc8627b --- /dev/null +++ b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains helper function class to unify ABI for different kernel +/// ranges. +/// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___IMPL_DETAIL_UNIFIED_RANGE_VIEW_HPP +#define _LIBSYCL___IMPL_DETAIL_UNIFIED_RANGE_VIEW_HPP + +#include <sycl/__impl/detail/config.hpp> + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +namespace detail { + +/// The structure to keep dimension and references to ranges unified for +/// all dimensions. +class UnifiedRangeView { + +public: + /// Default contructed view matches single task execution range. + UnifiedRangeView() = default; + UnifiedRangeView(const UnifiedRangeView &Desc) = default; + UnifiedRangeView(UnifiedRangeView &&Desc) = default; + UnifiedRangeView &operator=(const UnifiedRangeView &Desc) = default; + UnifiedRangeView &operator=(UnifiedRangeView &&Desc) = default; + + // TODO: ctors with sycl::range and nd::range will be added later. + + UnifiedRangeView(const size_t *GlobalSize, const size_t *LocalSize, + const size_t *Offset, size_t Dims) + : MGlobalSize(GlobalSize), MLocalSize(LocalSize), MOffset(Offset), + MDims(Dims) {} + + const size_t *MGlobalSize = nullptr; + const size_t *MLocalSize = nullptr; + const size_t *MOffset = nullptr; + size_t MDims = 1; +}; +} // namespace detail + +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL___IMPL_DETAIL_UNIFIED_RANGE_VIEW_HPP diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp index 587f56a8eb245..d1ac320433c38 100644 --- a/libsycl/include/sycl/__impl/queue.hpp +++ b/libsycl/include/sycl/__impl/queue.hpp @@ -20,9 +20,11 @@ #include <sycl/__impl/event.hpp> #include <sycl/__impl/property_list.hpp> +#include <sycl/__impl/detail/arg_wrapper.hpp> #include <sycl/__impl/detail/config.hpp> #include <sycl/__impl/detail/default_async_handler.hpp> #include <sycl/__impl/detail/obj_utils.hpp> +#include <sycl/__impl/detail/unified_range_view.hpp> _LIBSYCL_BEGIN_NAMESPACE_SYCL @@ -31,6 +33,27 @@ class context; namespace detail { class QueueImpl; +template <typename, typename T> struct CheckFunctionSignature { + static_assert(std::integral_constant<T, false>::value, + "Second template parameter is required to be of function type"); +}; + +template <typename F, typename RetT, typename... Args> +struct CheckFunctionSignature<F, RetT(Args...)> { +private: + template <typename T> + static constexpr auto check(T *) -> typename std::is_same< + decltype(std::declval<T>().operator()(std::declval<Args>()...)), + RetT>::type; + + template <typename> static constexpr std::false_type check(...); + + using type = decltype(check<F>(0)); + +public: + static constexpr bool value = type::value; +}; + } // namespace detail // SYCL 2020 4.6.5. Queue class. @@ -138,12 +161,85 @@ class _LIBSYCL_EXPORT queue { template <typename Param> typename Param::return_type get_backend_info() const; + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type. + /// + /// \param kernelFunc is the kernel functor or lambda. + /// \return an event that represents the status of the submitted kernel. + template <typename KernelName, typename KernelType> + event single_task(const KernelType &kernelFunc) { + return single_task<KernelName, KernelType>({}, kernelFunc); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type. + /// + /// \param depEvent is an event that specifies the kernel dependency. + /// \param kernelFunc is the kernel functor or lambda. + /// \return an event that represents the status of the submitted kernel. + template <typename KernelName, typename KernelType> + event single_task(event depEvent, const KernelType &kernelFunc) { + return single_task<KernelName, KernelType>({depEvent}, kernelFunc); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type. + /// + /// \param depEvents is a collection of events which specify the kernel + /// dependencies. + /// \param kernelFunc is the kernel functor or lambda. + /// \return an event that represents the status of the submitted kernel. + template <typename KernelName, typename KernelType> + event single_task(const std::vector<event> &depEvents, + const KernelType &kernelFunc) { + static_assert( + (detail::CheckFunctionSignature<std::remove_reference_t<KernelType>, + void()>::value), + "sycl::queue::single_task() requires a kernel instead of command " + "group. "); + + setKernelParameters(depEvents); + submitSingleTask<KernelName, KernelType>(kernelFunc); + return getLastEvent(); + } + /// Blocks the calling thread until all commands previously submitted to this /// queue have completed. Synchronous errors are reported through SYCL /// exceptions. void wait(); private: + // Name of this function is defined by compiler. It generates call to this + // function in the host implementation of KernelFunc in submitSingleTask. + template <typename, typename... Args> + void sycl_kernel_launch(const char *KernelName, Args &&...args) { + static_assert((sizeof...(args) == 1) && + "Only 2 arguments are expected in sycl_kernel_launch."); + detail::ArgCollection TypelessArgs; + (TypelessArgs.addArg(args), ...); + + submitKernelImpl(KernelName, TypelessArgs); + } + +#ifdef SYCL_LANGUAGE_VERSION +# define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) \ + [[clang::sycl_kernel_entry_point(KernelName)]] +#else +# define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) +#endif // SYCL_LANGUAGE_VERSION + + template <typename KernelName, typename KernelType> + _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) + void submitSingleTask(const KernelType KernelFunc) { + KernelFunc(); + } + + event getLastEvent(); + void submitKernelImpl(const char *KernelName, + detail::ArgCollection &TypelessArgs); + void setKernelParameters(const std::vector<event> &Events, + const detail::UnifiedRangeView &Range = {}); + queue(const std::shared_ptr<detail::QueueImpl> &Impl) : impl(Impl) {} std::shared_ptr<detail::QueueImpl> impl; diff --git a/libsycl/src/detail/queue_impl.cpp b/libsycl/src/detail/queue_impl.cpp index 74ccc48877c09..243f38612e74c 100644 --- a/libsycl/src/detail/queue_impl.cpp +++ b/libsycl/src/detail/queue_impl.cpp @@ -16,6 +16,32 @@ _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { +static void setKernelLaunchArgs(const detail::UnifiedRangeView &Range, + ol_kernel_launch_size_args_t &ArgsToSet) { + size_t GlobalSize[3] = {1, 1, 1}; + if (Range.MGlobalSize) { + for (uint32_t I = 0; I < Range.MDims; I++) { + GlobalSize[I] = Range.MGlobalSize[I]; + } + } + + size_t GroupSize[3] = {1, 1, 1}; + if (Range.MLocalSize) { + for (uint32_t I = 0; I < Range.MDims; I++) { + GroupSize[I] = Range.MLocalSize[I]; + } + } + + ArgsToSet.Dimensions = Range.MDims; + ArgsToSet.NumGroups.x = GlobalSize[0] / GroupSize[0]; + ArgsToSet.NumGroups.y = GlobalSize[1] / GroupSize[1]; + ArgsToSet.NumGroups.z = GlobalSize[2] / GroupSize[2]; + ArgsToSet.GroupSize.x = GroupSize[0]; + ArgsToSet.GroupSize.y = GroupSize[1]; + ArgsToSet.GroupSize.z = GroupSize[2]; + ArgsToSet.DynSharedMemory = 0; +} + QueueImpl::QueueImpl(DeviceImpl &deviceImpl, const async_handler &asyncHandler, const property_list &propList, PrivateTag) : MIsInorder(false), MAsyncHandler(asyncHandler), MPropList(propList), @@ -34,5 +60,91 @@ backend QueueImpl::getBackend() const noexcept { return MDevice.getBackend(); } void QueueImpl::wait() { callAndThrow(olSyncQueue, MOffloadQueue); } +static bool checkEventsPlatformMatch(std::vector<EventImplPtr> &Events, + const PlatformImpl &QueuePlatform) { + // liboffload limitation to olWaitEvents. We can't do any extra handling for + // cross context/platform events without host task support now. + // "The input events can be from any queue on any device provided by the + // same platform as `Queue`." + return std::all_of(Events.cbegin(), Events.cend(), + [&QueuePlatform](const EventImplPtr &Event) { + return &Event->getPlatformImpl() == &QueuePlatform; + }); +} + +void QueueImpl::setKernelParameters(std::vector<EventImplPtr> &&Events, + const detail::UnifiedRangeView &Range) { + if (!checkEventsPlatformMatch(Events, MDevice.getPlatformImpl())) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "libsycl doesn't support cross-context/platform event dependencies " + "now."); + + // TODO: this convertion and storing only offload events is possible only + // while we don't have host tasks (and features based on host tasks, like + // streams). With them - it is very likely we should copy EventImplPtr + // (shared_ptr) and keep it here. Although it may differ if host tasks will be + // implemented on offload level (no data now). + assert(MCurrentSubmitInfo.DepEvents.empty() && + "Kernel submission must clean up dependencies."); + MCurrentSubmitInfo.DepEvents.reserve(Events.size()); + for (auto &Event : Events) { + assert(Event && "Event impl object can't be nullptr"); + MCurrentSubmitInfo.DepEvents.push_back(Event->getHandle()); + } + setKernelLaunchArgs(Range, MCurrentSubmitInfo.Range); +} + +void QueueImpl::submitKernelImpl(const char *KernelName, + detail::ArgCollection &TypelessArgs) { + ol_symbol_handle_t Kernel = + detail::ProgramManager::getInstance().getOrCreateKernel(KernelName, + MDevice); + assert(Kernel); + + ol_event_handle_t NewEvent{}; + if (!MCurrentSubmitInfo.DepEvents.empty()) { + callAndThrow(olWaitEvents, MOffloadQueue, + MCurrentSubmitInfo.DepEvents.data(), + MCurrentSubmitInfo.DepEvents.size()); + } + + const void *Arguments = nullptr; + int64_t ArgumentsSize = 0; + if (TypelessArgs.getArgCount()) { + // without decomposition and free functions extension we always expect 1 + // argument to the kernel - lambda capture. + assert(TypelessArgs.getArgCount() == 1 && + "No arg decomposition or extensions are supported now."); + // TODO: liboffload doesn't support more than 1 argument without copy now. + // It doesn't expect array of arguments, it requires a contiguous memory + // with args. While we have only 1 argument we don't need extra handling + // here, we just pass the first argument directly. + Arguments = TypelessArgs.getArgPtrArray()[0]; + ArgumentsSize = TypelessArgs.getSizesArray()[0]; + } + + // ol_kernel_launch_prop_t Props[2]; + // Props[0].type = OL_KERNEL_LAUNCH_PROP_TYPE_SIZE; + // Props[0].data = &ArgumentsSize; + // Props[1] = OL_KERNEL_LAUNCH_PROP_END; + auto Result = + olLaunchKernel(MOffloadQueue, MDevice.getHandle(), Kernel, Arguments, + ArgumentsSize, &MCurrentSubmitInfo.Range /*, Props*/); + // Clean up current kernel submit data to prepare structures for next + // submission. + MCurrentSubmitInfo.DepEvents.clear(); + MCurrentSubmitInfo.Range = {}; + if (isFailed(Result)) + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + std::string("Kernel submission (") + KernelName + + ") failed with " + formatCodeString(Result)); + + callAndThrow(olCreateEvent, MOffloadQueue, &NewEvent); + + MCurrentSubmitInfo.LastEvent = + EventImpl::createEventWithHandle(NewEvent, MDevice.getPlatformImpl()); +} + } // namespace detail _LIBSYCL_END_NAMESPACE_SYCL diff --git a/libsycl/src/detail/queue_impl.hpp b/libsycl/src/detail/queue_impl.hpp index cdb7595e852ec..6edb40471826a 100644 --- a/libsycl/src/detail/queue_impl.hpp +++ b/libsycl/src/detail/queue_impl.hpp @@ -15,6 +15,7 @@ #include <OffloadAPI.h> #include <memory> +#include <mutex> _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { @@ -62,16 +63,52 @@ class QueueImpl : public std::enable_shared_from_this<QueueImpl> { /// \return true if and only if the queue is in order. bool isInOrder() const { return MIsInorder; } + /// Enqueues kernel to liboffload. + /// Kernel parameters like dependencies and range must be passed in advance by + /// calling setKernelParameters. + /// \param KernelName a name of kernel to be enqueued. + /// \param TypelessArgs data about kernel arguments to be used for enqueue. + void submitKernelImpl(const char *KernelName, + detail::ArgCollection &TypelessArgs); + + /// \return an event impl object that corresponds to the last kernel + /// submission in the calling thread. + EventImplPtr getLastEvent() { + assert(MCurrentSubmitInfo.LastEvent && + "getLastEvent must be called after enqueue"); + return MCurrentSubmitInfo.LastEvent; + } + + /// Sets kernel parameters to be used in the next submitKernelImpl call. + /// Must be called prior to submitKernelImpl call. + /// \param Events a collection of events that kernal depends on. + /// \param Range a unified range view of execution range. + void setKernelParameters(std::vector<EventImplPtr> &&Events, + const detail::UnifiedRangeView &Range); + /// Waits for completion of all kernels submitted to this queue. void wait(); private: + // Queue features. ol_queue_handle_t MOffloadQueue = {}; const bool MIsInorder; const async_handler MAsyncHandler; const property_list MPropList; DeviceImpl &MDevice; ContextImpl &MContext; + + // Submit data. + struct KernelSubmitInfo { + EventImplPtr LastEvent; + ol_kernel_launch_size_args_t Range; + // TODO: consider storing EventImplPtr here, it will work with plain handle + // only because submission is done within queue::submit call. Otherwise we + // need to ensure that event handle is still alive by keeping our own copy + // of EventImpl. + std::vector<ol_event_handle_t> DepEvents; + }; + inline static thread_local KernelSubmitInfo MCurrentSubmitInfo = {}; }; } // namespace detail diff --git a/libsycl/src/queue.cpp b/libsycl/src/queue.cpp index 9fe020eabf2cc..f9d867e9567d7 100644 --- a/libsycl/src/queue.cpp +++ b/libsycl/src/queue.cpp @@ -33,6 +33,25 @@ device queue::get_device() const { bool queue::is_in_order() const { return impl->isInOrder(); } +event queue::getLastEvent() { + return detail::createSyclObjFromImpl<event>(impl->getLastEvent()); +} + +void queue::setKernelParameters(const std::vector<event> &Events, + const detail::UnifiedRangeView &Range) { + std::vector<detail::EventImplPtr> DepEventImplRefs; + DepEventImplRefs.reserve(Events.size()); + for (const auto &Event : Events) { + DepEventImplRefs.push_back(detail::getSyclObjImpl(Event)); + } + return impl->setKernelParameters(std::move(DepEventImplRefs), Range); +} + +void queue::submitKernelImpl(const char *KernelName, + detail::ArgCollection &TypelessArgs) { + impl->submitKernelImpl(KernelName, TypelessArgs); +} + void queue::wait() { return impl->wait(); } _LIBSYCL_END_NAMESPACE_SYCL diff --git a/libsycl/test/basic/get_backend.cpp b/libsycl/test/basic/get_backend.cpp new file mode 100644 index 0000000000000..064149a0c67e8 --- /dev/null +++ b/libsycl/test/basic/get_backend.cpp @@ -0,0 +1,54 @@ +// REQUIRES: any-device +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out + +#include <iostream> + +#include <sycl/sycl.hpp> + +using namespace sycl; + +class Kernel1; + +bool check(backend be) { + switch (be) { + case backend::opencl: + case backend::level_zero: + case backend::cuda: + case backend::hip: + return true; + default: + return false; + } + return false; +} + +inline void return_fail() { + std::cout << "Failed" << std::endl; + exit(1); +} + +int main() { + for (const auto &plt : platform::get_platforms()) { + if (check(plt.get_backend()) == false) { + return_fail(); + } + + auto device = device::get_devices()[0]; + if (device.get_backend() != plt.get_backend()) { + return_fail(); + } + + queue q(device); + if (q.get_backend() != plt.get_backend()) { + return_fail(); + } + + event e = q.single_task<Kernel1>([]() {}); + if (e.get_backend() != plt.get_backend()) { + return_fail(); + } + } + std::cout << "Passed" << std::endl; + return 0; +} diff --git a/libsycl/test/basic/submit_fn_ptr.cpp b/libsycl/test/basic/submit_fn_ptr.cpp new file mode 100644 index 0000000000000..2a5ce832d4db2 --- /dev/null +++ b/libsycl/test/basic/submit_fn_ptr.cpp @@ -0,0 +1,18 @@ +// REQUIRES: any-device +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out + +#include <sycl/sycl.hpp> + +class Test; + +int main() { + sycl::queue q; + int *p = sycl::malloc_shared<int>(1, q); + *p = 0; + q.single_task<Test>([=]() { *p = 42; }); + q.wait(); + assert(*p == 42); + sycl::free(p, q); + return 0; +} >From 104ccef02b0d36581c7a60bcf6d7459284e8db64 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" <[email protected]> Date: Thu, 26 Mar 2026 07:05:33 -0700 Subject: [PATCH 2/5] draft Signed-off-by: Tikhomirova, Kseniya <[email protected]> add tests for parallel_for Signed-off-by: Tikhomirova, Kseniya <[email protected]> remove operators from index space classes Signed-off-by: Tikhomirova, Kseniya <[email protected]> --- libsycl/docs/index.rst | 1 + .../sycl/__impl/detail/kernel_arg_helpers.hpp | 187 ++++++++ .../sycl/__impl/detail/unified_range_view.hpp | 6 +- .../sycl/__impl/index_space_classes.hpp | 413 ++++++++++++++++++ libsycl/include/sycl/__impl/queue.hpp | 223 ++++++++-- libsycl/include/sycl/__spirv/spirv_vars.hpp | 75 ++++ .../test/basic/queue_parallel_for_generic.cpp | 47 ++ libsycl/test/basic/wrapped_usm_pointers.cpp | 111 +++++ 8 files changed, 1031 insertions(+), 32 deletions(-) create mode 100644 libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp create mode 100644 libsycl/include/sycl/__impl/index_space_classes.hpp create mode 100644 libsycl/include/sycl/__spirv/spirv_vars.hpp create mode 100644 libsycl/test/basic/queue_parallel_for_generic.cpp create mode 100644 libsycl/test/basic/wrapped_usm_pointers.cpp diff --git a/libsycl/docs/index.rst b/libsycl/docs/index.rst index 5961eeeedcedb..585d05a78987d 100644 --- a/libsycl/docs/index.rst +++ b/libsycl/docs/index.rst @@ -126,6 +126,7 @@ TODO for added SYCL classes * handle sub devices once they are implemented (blocked by liboffload support) * ``event``: get_wait_list, get_info, get_profiling_info, wait_and_throw & default ctor are not implemented +* ``range``, ``id`` - to add operators * general opens: * define a way to report errors from object dtors. \ No newline at end of file diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp new file mode 100644 index 0000000000000..d4a0ea9f63ff2 --- /dev/null +++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp @@ -0,0 +1,187 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// to add +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS +#define _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS + +#include <sycl/__impl/index_space_classes.hpp> + +#include <sycl/__impl/detail/config.hpp> + +#ifdef __SYCL_DEVICE_ONLY__ +# include <sycl/__spirv/spirv_vars.hpp> +#endif + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +namespace detail { + +/// \name Helpers for the unnamed lambda extension. +/// @{ +/// This class is the default kernel name template parameter type for kernel +/// invocation APIs such as single_task. +class AutoName {}; + +/// Helper struct to get a kernel name type based on given Name and Type +/// types: if Name is undefined (is a AutoName) then Type becomes +/// the Name. +template <typename Name, typename Type> struct get_kernel_name_t { + using name = Name; +}; + +/// Specialization for the case when Name is undefined. +/// This is only legal with our compiler with the unnamed lambda extension or if +/// the kernel is a functor object. +template <typename Type> struct get_kernel_name_t<detail::AutoName, Type> { + using name = Type; +}; +/// @} + +/// \name Helpers to verify kernel lambda type. +/// \brief Checks that the function is callable with operator(). +/// @{ +template <typename, typename T> struct CheckFunctionSignature { + static_assert(std::integral_constant<T, false>::value, + "Second template parameter is required to be of function type"); +}; + +template <typename F, typename RetT, typename... Args> +struct CheckFunctionSignature<F, RetT(Args...)> { +private: + template <typename T> + static constexpr auto check(T *) -> typename std::is_same< + decltype(std::declval<T>().operator()(std::declval<Args>()...)), + RetT>::type; + + template <typename> static constexpr std::false_type check(...); + + using type = decltype(check<F>(0)); + +public: + static constexpr bool value = type::value; +}; +/// @} + +/// \name Helpers to extract types of lambda arguments. +/// @{ +template <typename RetType, typename Func, typename Arg> +static Arg member_ptr_helper(RetType (Func::*)(Arg) const); + +// Non-const version of the above template to match functors whose +// 'operator()' is declared w/o the 'const' qualifier. +template <typename RetType, typename Func, typename Arg> +static Arg member_ptr_helper(RetType (Func::*)(Arg)); + +template <typename F, typename SuggestedArgType> +decltype(member_ptr_helper(&F::operator())) argument_helper(int); + +template <typename F, typename SuggestedArgType> +SuggestedArgType argument_helper(...); + +template <typename F, typename SuggestedArgType> +using lambda_arg_type = decltype(argument_helper<F, SuggestedArgType>(0)); + +#if __has_builtin(__type_pack_element) +template <int N, typename... Ts> +using nth_type_t = __type_pack_element<N, Ts...>; +#else +template <int N, typename T, typename... Ts> struct nth_type { + using type = typename nth_type<N - 1, Ts...>::type; +}; + +template <typename T, typename... Ts> struct nth_type<0, T, Ts...> { + using type = T; +}; + +template <int N, typename... Ts> +using nth_type_t = typename nth_type<N, Ts...>::type; +#endif +/// @} + +template <typename T> T *declptr() { return static_cast<T *>(nullptr); } + +template <int N> +static inline constexpr bool isValidDimensions = (N > 0) && (N < 4); + +/// Class provides helper functions for iteration space coordinates in kernel +/// invocation on device. +class Builder { +public: + Builder() = delete; + +#ifdef __SYCL_DEVICE_ONLY__ + /// \return a global index of work item currently being operated on by device. + template <int Dims> static const id<Dims> getElement(id<Dims> *) { + static_assert(isValidDimensions<Dims>, "invalid dimensions"); + return __spirv::initBuiltInGlobalInvocationId<Dims, id<Dims>>(); + } + + /// Constructs item with the given data. + /// \param Extent a range representing the dimensions of the range of possible + /// values of the item. + /// \param Index a constituent id representing the work-item’s position in the + /// iteration space. + /// \param Offset an id representing the n-dimensional offset that should be + /// added to the global-ID of each work-item, if this item represents a global + /// range. Deprecated in SYCL 2020. + template <int Dims, bool WithOffset> + static std::enable_if_t<WithOffset, item<Dims, WithOffset>> + createItem(const range<Dims> &Extent, const id<Dims> &Index, + const id<Dims> &Offset) { + return item<Dims, WithOffset>(Extent, Index, Offset); + } + + /// Constructs item with the given data. + /// \param Extent a range representing the dimensions of the range of possible + /// values of the item. + /// \param Index a constituent id representing the work-item’s position in the + /// iteration space. + template <int Dims, bool WithOffset> + static std::enable_if_t<!WithOffset, item<Dims, WithOffset>> + createItem(const range<Dims> &Extent, const id<Dims> &Index) { + return item<Dims, WithOffset>(Extent, Index); + } + + /// Creates sycl::item instance for work item that is currently being operated + /// on. + template <int Dims, bool WithOffset> + static std::enable_if_t<WithOffset, const item<Dims, WithOffset>> getItem() { + static_assert(isValidDimensions<Dims>, "invalid dimensions"); + id<Dims> GlobalId{__spirv::initBuiltInGlobalInvocationId<Dims, id<Dims>>()}; + range<Dims> GlobalSize{__spirv::initBuiltInGlobalSize<Dims, range<Dims>>()}; + id<Dims> GlobalOffset{__spirv::initBuiltInGlobalOffset<Dims, id<Dims>>()}; + return createItem<Dims, true>(GlobalSize, GlobalId, GlobalOffset); + } + + /// Creates sycl::item instance for work item that is currently being operated + /// on. + template <int Dims, bool WithOffset> + static std::enable_if_t<!WithOffset, const item<Dims, WithOffset>> getItem() { + static_assert(isValidDimensions<Dims>, "invalid dimensions"); + id<Dims> GlobalId{__spirv::initBuiltInGlobalInvocationId<Dims, id<Dims>>()}; + range<Dims> GlobalSize{__spirv::initBuiltInGlobalSize<Dims, range<Dims>>()}; + return createItem<Dims, false>(GlobalSize, GlobalId); + } + + /// \return a work item currently being operated on by device. + template <int Dims, bool WithOffset> + static auto getElement(item<Dims, WithOffset> *) + -> decltype(getItem<Dims, WithOffset>()) { + return getItem<Dims, WithOffset>(); + } + +#endif // __SYCL_DEVICE_ONLY__ +}; + +} // namespace detail + +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS diff --git a/libsycl/include/sycl/__impl/detail/unified_range_view.hpp b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp index afa613fc8627b..8f321349d4c2e 100644 --- a/libsycl/include/sycl/__impl/detail/unified_range_view.hpp +++ b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp @@ -17,6 +17,8 @@ #include <sycl/__impl/detail/config.hpp> +#include <sycl/__impl/index_space_classes.hpp> + _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { @@ -33,7 +35,9 @@ class UnifiedRangeView { UnifiedRangeView &operator=(const UnifiedRangeView &Desc) = default; UnifiedRangeView &operator=(UnifiedRangeView &&Desc) = default; - // TODO: ctors with sycl::range and nd::range will be added later. + template <int Dims> + UnifiedRangeView(sycl::range<Dims> &N) + : MGlobalSize(&(N[0])), MDims(size_t(Dims)) {} UnifiedRangeView(const size_t *GlobalSize, const size_t *LocalSize, const size_t *Offset, size_t Dims) diff --git a/libsycl/include/sycl/__impl/index_space_classes.hpp b/libsycl/include/sycl/__impl/index_space_classes.hpp new file mode 100644 index 0000000000000..ef2897cee5307 --- /dev/null +++ b/libsycl/include/sycl/__impl/index_space_classes.hpp @@ -0,0 +1,413 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the SYCL 2020 ranges and index space +/// identifiers (4.9.1.). +/// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___IMPL_INDEX_SPACE_CLASSES_HPP +#define _LIBSYCL___IMPL_INDEX_SPACE_CLASSES_HPP + +#include <sycl/__impl/detail/config.hpp> + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +namespace detail { + +class Builder; + +/// Helper class for dimensions data management. +template <int Dimensions = 1> class RawArray { + static_assert(Dimensions >= 1 && Dimensions <= 3, + "RawArray can only be 1, 2, or 3 Dimensional."); + +public: + /// Constructs one-dimensional instance and assign corresponding data to Dim0 + /// value. Available only if Dimensions = 1. + template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true> + RawArray(size_t Dim0 = 0) : MArray{Dim0} {} + + /// Constructs two-dimensional instance and assign corresponding data. + /// Available only if Dimensions = 2. + template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true> + RawArray(size_t Dim0, size_t Dim1) : MArray{Dim0, Dim1} {} + + /// Constructs two-dimensional instance with zero-initialized corresponding + /// data. Available only if Dimensions = 2. + template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true> + RawArray() : RawArray(0, 0) {} + + /// Constructs three-dimensional instance and assign corresponding data. + /// Available only if Dimensions = 3. + template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true> + RawArray(size_t Dim0, size_t Dim1, size_t Dim2) : MArray{Dim0, Dim1, Dim2} {} + + /// Constructs three-dimensional instance with zero-initialized corresponding + /// data. Available only if Dimensions = 3. + template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true> + RawArray() : RawArray(0, 0, 0) {} + + /// Returns value for the specified dimension. + /// Results in undefined behavior if dimension is not in the range [0, + /// Dimensions). + /// \param Dimension a dimension to query data for. + /// \return value in array matching requested dimension. + std::size_t get(int Dimension) const noexcept { return MArray[Dimension]; } + + /// Returns value for the specified dimension. + /// Results in undefined behavior if dimension is not in the range [0, + /// Dimensions). + /// \param Dimension a dimension to query data for. + /// \return value in array matching requested dimension. + std::size_t &operator[](int Dimension) noexcept { return MArray[Dimension]; } + + /// Returns value for the specified dimension. + /// Results in undefined behavior if dimension is not in the range [0, + /// Dimensions). + /// \param Dimension a dimension to query data for. + /// \return value in array matching requested dimension. + std::size_t operator[](int Dimension) const noexcept { + return MArray[Dimension]; + } + + RawArray(const RawArray<Dimensions> &rhs) = default; + RawArray(RawArray<Dimensions> &&rhs) = default; + RawArray<Dimensions> &operator=(const RawArray<Dimensions> &rhs) = default; + RawArray<Dimensions> &operator=(RawArray<Dimensions> &&rhs) = default; + ~RawArray() = default; + + friend bool operator==(const RawArray<Dimensions> &lhs, + const RawArray<Dimensions> &rhs) { + for (int i = 0; i < Dimensions; ++i) { + if (lhs.MArray[i] != rhs.MArray[i]) { + return false; + } + } + return true; + } + + friend bool operator!=(const RawArray<Dimensions> &lhs, + const RawArray<Dimensions> &rhs) { + for (int i = 0; i < Dimensions; ++i) { + if (lhs.MArray[i] != rhs.MArray[i]) { + return true; + } + } + return false; + } + +protected: + size_t MArray[Dimensions]; +}; +} // namespace detail + +/// SYCL 2020 4.9.1.1. range class. +/// range<int Dimensions> is a 1D, 2D or 3D vector that defines the iteration +/// domain of either a single work-group in a parallel dispatch, or the overall +/// Dimensions of the dispatch. +template <int Dimensions = 1> +class range : public detail::RawArray<Dimensions> { + static_assert(Dimensions >= 1 && Dimensions <= 3, + "range can only be 1, 2, or 3 Dimensional."); + using Base = detail::RawArray<Dimensions>; + +public: + static constexpr int dimensions = Dimensions; + range() noexcept = default; + range(const range<Dimensions> &rhs) = default; + range(range<Dimensions> &&rhs) = default; + range<Dimensions> &operator=(const range<Dimensions> &rhs) = default; + range<Dimensions> &operator=(range<Dimensions> &&rhs) = default; + + /// Construct a 1D range with value dim0. + /// Only valid when the template parameter Dimensions is equal to 1. + template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true> + range(std::size_t dim0) noexcept : Base(dim0) {} + + /// Construct a 2D range with values dim0 and dim1. + /// Only valid when the template parameter Dimensions is equal to 2. + template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true> + range(std::size_t dim0, std::size_t dim1) noexcept : Base(dim0, dim1) {} + + /// Construct a 3D range with values dim0, dim1 and dim2. + /// Only valid when the template parameter Dimensions is equal to 3. + template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true> + range(std::size_t dim0, std::size_t dim1, std::size_t dim2) noexcept + : Base(dim0, dim1, dim2) {} + + /* + Declared and implemented in detail::RawArray: + std::size_t get(int dimension) const noexcept; + std::size_t& operator[](int dimension) noexcept; + std::size_t operator[](int dimension) const noexcept; + */ + + /// \return the size of the range computed as dimension0*…*dimensionN. + std::size_t size() const noexcept { + std::size_t size = 1; + for (int i = 0; i < Dimensions; ++i) { + size *= Base::MArray[i]; + } + return size; + } + + // TODO: operators to be added +}; + +/// c++ deduction guides. +#ifdef __cpp_deduction_guides +range(std::size_t) -> range<1>; +range(std::size_t, std::size_t) -> range<2>; +range(std::size_t, std::size_t, std::size_t) -> range<3>; +#endif + +template <int Dimensions = 1, bool WithOffset = true> class item; + +/// SYCL 2020 4.9.1.3. id class. +/// id<int Dimensions> is a vector of Dimensions that is used to represent an id +/// into a global or local range. It can be used as an index in an accessor of +/// the same rank. +template <int Dimensions = 1> class id : public detail::RawArray<Dimensions> { + static_assert(Dimensions >= 1 && Dimensions <= 3, + "id can only be 1, 2, or 3 Dimensional."); + using Base = detail::RawArray<Dimensions>; + + // Helper class for conversion operator. Void type is not suitable. User + // cannot even try to get address of the operator PrivateTag(). User + // may try to get an address of operator void() and will get the + // compile-time error + class PrivateTag; + template <bool Condition, typename T> + using EnableIfT = std::conditional_t<Condition, T, PrivateTag>; + +public: + static constexpr int dimensions = Dimensions; + + id() noexcept = default; + id(const id<Dimensions> &rhs) = default; + id(id<Dimensions> &&rhs) = default; + id<Dimensions> &operator=(const id<Dimensions> &rhs) = default; + id<Dimensions> &operator=(id<Dimensions> &&rhs) = default; + + /// Construct a 1D id with value dim0. + /// Only valid when the template parameter Dimensions is equal to 1. + template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true> + id(std::size_t dim0) noexcept : Base(dim0) {} + + /// Construct a 2D id with values dim0, dim1. + /// Only valid when the template parameter Dimensions is equal to 2. + template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true> + id(std::size_t dim0, std::size_t dim1) noexcept : Base(dim0, dim1) {} + + /// Construct a 3D id with values dim0, dim1, dim2. + /// Only valid when the template parameter Dimensions is equal to 3. + template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true> + id(std::size_t dim0, std::size_t dim1, std::size_t dim2) noexcept + : Base(dim0, dim1, dim2) {} + + /// Construct an id from the dimensions of range. + /// Only valid when the template parameter Dimensions is equal to 1. + template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true> + id(const range<Dimensions> &range) noexcept : Base(range.get(0)) {} + + /// Construct an id from the dimensions of range. + /// Only valid when the template parameter Dimensions is equal to 2. + template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true> + id(const range<Dimensions> &range) noexcept + : Base(range.get(0), range.get(1)) {} + + /// Construct an id from the dimensions of range. + /// Only valid when the template parameter Dimensions is equal to 3. + template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true> + id(const range<Dimensions> &range) noexcept + : Base(range.get(0), range.get(1), range.get(2)) {} + + /// Construct an id from item.get_id(). + /// Only valid when the template parameter Dimensions is equal to 1. + template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true> + id(const item<Dimensions> &item) noexcept : Base(item.get_id(0)) {} + + /// Construct an id from item.get_id(). + /// Only valid when the template parameter Dimensions is equal to 2. + template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true> + id(const item<Dimensions> &item) noexcept + : Base(item.get_id(0), item.get_id(1)) {} + + /// Construct an id from item.get_id(). + /// Only valid when the template parameter Dimensions is equal to 3. + template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true> + id(const item<Dimensions> &item) noexcept + : Base(item.get_id(0), item.get_id(1), item.get_id(2)) {} + + /* + Declared and implemented in detail::RawArray: + std::size_t get(int dimension) const noexcept; + std::size_t& operator[](int dimension) noexcept; + std::size_t operator[](int dimension) const noexcept; + */ + + // Template operator is not allowed because it disables further type + // conversion. For example, the next code will not work in case of template + // conversion: + // int a = id<1>(value); + /// Returns the same value as get(0). + /// Available only when: Dimensions == 1. + operator EnableIfT<(Dimensions == 1), std::size_t>() const noexcept { + return Base::get(0); + } + + // TODO: operators to be added +}; + +/// c++ deduction guides. +#ifdef __cpp_deduction_guides +id(std::size_t) -> id<1>; +id(std::size_t, std::size_t) -> id<2>; +id(std::size_t, std::size_t, std::size_t) -> id<3>; +#endif + +/// SYCL 2020 4.9.1.4. item class. +/// item identifies an instance of the function object executing at each point +/// in a range. +template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item { + /* Helper class for conversion operator. Void type is not suitable. User + * cannot even try to get address of the operator PrivateTag(). User + * may try to get an address of operator void() and will get the + * compile-time error */ + class PrivateTag; + template <bool Condition, typename T> + using EnableIfT = std::conditional_t<Condition, T, PrivateTag>; + +public: + static constexpr int dimensions = Dimensions; + + item() = delete; + + item(const item &rhs) = default; + + item(item<Dimensions, WithOffset> &&rhs) = default; + + item &operator=(const item &rhs) = default; + + item &operator=(item &&rhs) = default; + + friend bool operator==(const item<Dimensions, WithOffset> &lhs, + const item<Dimensions, WithOffset> &rhs) { + if constexpr (WithOffset) + return (lhs.MId == rhs.MId) && (lhs.MRange == rhs.MRange) && + (lhs.MOffset == rhs.MOffset); + else + return (lhs.MId == rhs.MId) && (lhs.MRange == rhs.MRange); + } + + friend bool operator!=(const item<Dimensions, WithOffset> &lhs, + const item<Dimensions, WithOffset> &rhs) { + return !(lhs == rhs); + } + + /// \return the constituent id representing the work-item’s position in the + /// iteration space. + id<Dimensions> get_id() const noexcept { return MId; } + + /// Equivalent to return get_id()[dimension]. + std::size_t get_id(int dimension) const noexcept { + return MId.get(dimension); + } + + /// Equivalent to return get_id(dimension). + std::size_t operator[](int dimension) const noexcept { + return MId[dimension]; + } + + /// \return a range representing the dimensions of the range of possible + /// values of the item. + range<Dimensions> get_range() const noexcept { return MRange; } + + /// Equivalent to return get_range().get(dimension). + std::size_t get_range(int dimension) const noexcept { + return MRange[dimension]; + } + + /// Deprecated in SYCL 2020. + /// For an item converted from an item with no offset this will always return + /// an id of all 0 values. This member function is only available if + /// WithOffset is true. + /// \return an id representing the n-dimensional offset provided to the + /// parallel_for and that is added by the runtime to the global-ID of each + /// work-item, if this item represents a global range. + template <bool HasOffset = WithOffset, + std::enable_if_t<HasOffset == true, bool> = true> + id<Dimensions> get_offset() const noexcept { + return MOffset; + } + + /// Deprecated in SYCL 2020. + /// This conversion allow users to seamlessly write code that assumes an + /// offset and still provides an offset-less item. Available only when: + /// WithOffset == false. + /// \return an item representing the same information as the object holds but + /// also includes the offset set to 0. + template <bool HasOffset = WithOffset, + std::enable_if_t<HasOffset == false, bool> = true> + operator item<Dimensions, true>() const noexcept { + return item<Dimensions, true>(MRange, MId, id<Dimensions>{}); + } + + /// Equivalent to get_id(0). + /// Available only when: Dimensions == 1. + operator EnableIfT<(Dimensions == 1), std::size_t>() const noexcept { + return get_id(0); + } + + /// \return Return the id as a linear index value. + std::size_t get_linear_id() const noexcept { + if constexpr (WithOffset) { + if constexpr (1 == Dimensions) { + return MId; + } + if constexpr (2 == Dimensions) { + return (MId[0] - MOffset[0]) * MRange[1] + (MId[1] - MOffset[1]); + } + return ((MId[0] - MOffset[0]) * MRange[1] * MRange[2]) + + ((MId[1] - MOffset[1]) * MRange[2]) + (MId[2] - MOffset[2]); + } else { + if constexpr (1 == Dimensions) { + return MId[0]; + } + if constexpr (2 == Dimensions) { + return MId[0] * MRange[1] + MId[1]; + } + return (MId[0] * MRange[1] * MRange[2]) + (MId[1] * MRange[2]) + MId[2]; + } + } + +protected: + template <bool HasOffset = WithOffset, + std::enable_if_t<HasOffset == true, bool> = true> + item(const sycl::range<Dimensions> &range, const sycl::id<Dimensions> &id, + const sycl::id<Dimensions> &offset) + : MRange(range), MId(id), MOffset(offset) {} + + template <bool HasOffset = WithOffset, + std::enable_if_t<HasOffset == false, bool> = true> + item(const range<Dimensions> &range, const id<Dimensions> &id) + : MRange(range), MId(id), MOffset() {} + +private: + range<Dimensions> MRange; + id<Dimensions> MId; + id<Dimensions> MOffset; + + friend class detail::Builder; +}; + +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL___IMPL_INDEX_SPACE_CLASSES_HPP diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp index d1ac320433c38..95653ab0c34ff 100644 --- a/libsycl/include/sycl/__impl/queue.hpp +++ b/libsycl/include/sycl/__impl/queue.hpp @@ -23,6 +23,7 @@ #include <sycl/__impl/detail/arg_wrapper.hpp> #include <sycl/__impl/detail/config.hpp> #include <sycl/__impl/detail/default_async_handler.hpp> +#include <sycl/__impl/detail/kernel_arg_helpers.hpp> #include <sycl/__impl/detail/obj_utils.hpp> #include <sycl/__impl/detail/unified_range_view.hpp> @@ -32,28 +33,6 @@ class context; namespace detail { class QueueImpl; - -template <typename, typename T> struct CheckFunctionSignature { - static_assert(std::integral_constant<T, false>::value, - "Second template parameter is required to be of function type"); -}; - -template <typename F, typename RetT, typename... Args> -struct CheckFunctionSignature<F, RetT(Args...)> { -private: - template <typename T> - static constexpr auto check(T *) -> typename std::is_same< - decltype(std::declval<T>().operator()(std::declval<Args>()...)), - RetT>::type; - - template <typename> static constexpr std::false_type check(...); - - using type = decltype(check<F>(0)); - -public: - static constexpr bool value = type::value; -}; - } // namespace detail // SYCL 2020 4.6.5. Queue class. @@ -166,7 +145,7 @@ class _LIBSYCL_EXPORT queue { /// /// \param kernelFunc is the kernel functor or lambda. /// \return an event that represents the status of the submitted kernel. - template <typename KernelName, typename KernelType> + template <typename KernelName = detail::AutoName, typename KernelType> event single_task(const KernelType &kernelFunc) { return single_task<KernelName, KernelType>({}, kernelFunc); } @@ -177,7 +156,7 @@ class _LIBSYCL_EXPORT queue { /// \param depEvent is an event that specifies the kernel dependency. /// \param kernelFunc is the kernel functor or lambda. /// \return an event that represents the status of the submitted kernel. - template <typename KernelName, typename KernelType> + template <typename KernelName = detail::AutoName, typename KernelType> event single_task(event depEvent, const KernelType &kernelFunc) { return single_task<KernelName, KernelType>({depEvent}, kernelFunc); } @@ -189,7 +168,7 @@ class _LIBSYCL_EXPORT queue { /// dependencies. /// \param kernelFunc is the kernel functor or lambda. /// \return an event that represents the status of the submitted kernel. - template <typename KernelName, typename KernelType> + template <typename KernelName = detail::AutoName, typename KernelType> event single_task(const std::vector<event> &depEvents, const KernelType &kernelFunc) { static_assert( @@ -199,18 +178,169 @@ class _LIBSYCL_EXPORT queue { "group. "); setKernelParameters(depEvents); - submitSingleTask<KernelName, KernelType>(kernelFunc); + using NameT = + typename detail::get_kernel_name_t<KernelName, KernelType>::name; + submitSingleTask<NameT, KernelType>(kernelFunc); return getLastEvent(); } + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<1> numWorkItems, Rest &&...rest) { + return parallel_for<KernelName>(numWorkItems, {}, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<2> numWorkItems, Rest &&...rest) { + return parallel_for<KernelName>(numWorkItems, {}, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<3> numWorkItems, Rest &&...rest) { + return parallel_for<KernelName>(numWorkItems, {}, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel. + /// \param depEvent adds a requirement that the action represented by depEvent + /// must complete before executing this kernel. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<1> numWorkItems, event depEvent, Rest &&...rest) { + return parallel_for<KernelName>(numWorkItems, {depEvent}, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel. + /// \param depEvent adds a requirement that the action represented by depEvent + /// must complete before executing this kernel. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<2> numWorkItems, event depEvent, Rest &&...rest) { + return parallel_for<KernelName>(numWorkItems, {depEvent}, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel. + /// \param depEvent adds a requirement that the action represented by depEvent + /// must complete before executing this kernel. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<3> numWorkItems, event depEvent, Rest &&...rest) { + return parallel_for<KernelName>(numWorkItems, {depEvent}, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel + /// \param depEvents is a vector of events that specifies the kernel + /// dependencies. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<1> numWorkItems, const std::vector<event> &depEvents, + Rest &&...rest) { + return parallelForImpl<KernelName>(numWorkItems, depEvents, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel + /// \param depEvents is a vector of events that specifies the kernel + /// dependencies. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<2> numWorkItems, const std::vector<event> &depEvents, + Rest &&...rest) { + return parallelForImpl<KernelName>(numWorkItems, depEvents, + std::forward<Rest>(rest)...); + } + + /// Defines and invokes a SYCL kernel function as a lambda expression or a + /// named function object type, for the specified range. + /// + /// \param numWorkItems specifies the global work space of the kernel + /// \param depEvents is a vector of events that specifies the kernel + /// dependencies. + /// \param rest acts as-if: const KernelType &KernelFunc". + // TODO: Rest will represent reduction types once it is supported. + template <typename KernelName = detail::AutoName, typename... Rest> + event parallel_for(range<3> numWorkItems, const std::vector<event> &depEvents, + Rest &&...rest) { + return parallelForImpl<KernelName>(numWorkItems, depEvents, + std::forward<Rest>(rest)...); + } + /// Blocks the calling thread until all commands previously submitted to this /// queue have completed. Synchronous errors are reported through SYCL /// exceptions. void wait(); private: - // Name of this function is defined by compiler. It generates call to this - // function in the host implementation of KernelFunc in submitSingleTask. + template <typename KernelName, int Dims, typename... Rest> + event parallelForImpl(range<Dims> numWorkItems, + const std::vector<event> &depEvents, Rest &&...rest) { + if constexpr (sizeof...(Rest) != 1) + throw sycl::exception(errc::feature_not_supported, + "Reductions are not supported."); + setKernelParameters(depEvents, numWorkItems); + + using KernelType = + std::decay_t<detail::nth_type_t<sizeof...(Rest) - 1, Rest...>>; + using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>; + static_assert( + std::is_convertible_v<sycl::item<Dims>, LambdaArgType>, + "Kernel argument of a sycl::parallel_for with sycl::range " + "must be either sycl::item or be convertible from sycl::item"); + + using NameT = + typename detail::get_kernel_name_t<KernelName, KernelType>::name; + submitParallelFor<NameT, item<Dims>, KernelType>(rest...); + return getLastEvent(); + } + + /// Name of this function is defined by compiler. It generates call to this + /// function in the host implementation of KernelFunc in submitSingleTask or + /// submitParallelFor. + /// \param KernelName a name of the kernel being invoked. + /// \param args kernel arguments for kernel invocation. + // TODO: now `args` always represents single argument - lambda capture. template <typename, typename... Args> void sycl_kernel_launch(const char *KernelName, Args &&...args) { static_assert((sizeof...(args) == 1) && @@ -221,6 +351,10 @@ class _LIBSYCL_EXPORT queue { submitKernelImpl(KernelName, TypelessArgs); } + /// The sycl_kernel_entry_point attribute facilitates the generation of an + /// offload kernel entry point function with parameters corresponding to the + /// (potentially decomposed) kernel arguments and a body that (potentially + /// reconstructs the arguments and) executes the kernel. #ifdef SYCL_LANGUAGE_VERSION # define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) \ [[clang::sycl_kernel_entry_point(KernelName)]] @@ -228,18 +362,45 @@ class _LIBSYCL_EXPORT queue { # define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) #endif // SYCL_LANGUAGE_VERSION + /// Specifies the parameters and body of the generated offload kernel entry + /// point for single_task invocations. On host compiler generates call to + /// sycl_kernel_launch instead of KernelFunc invocation. template <typename KernelName, typename KernelType> _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) void submitSingleTask(const KernelType KernelFunc) { KernelFunc(); } - event getLastEvent(); - void submitKernelImpl(const char *KernelName, - detail::ArgCollection &TypelessArgs); + /// Specifies the parameters and body of the generated offload kernel entry + /// point for parallel_for invocations. On host compiler generates call to + /// sycl_kernel_launch instead of KernelFunc invocation. + template <typename KernelName, typename ElementType, typename KernelType> + _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) + void submitParallelFor(const KernelType KernelFunc) { +#ifdef __SYCL_DEVICE_ONLY__ + KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>())); +#endif + (void)KernelFunc; + } + + /// Passes kernel parameters to runtime. + /// \param Events a collection of events representing dependencies of the + /// kernel to submit. + /// \param Range a unified view of range for kernel execution. void setKernelParameters(const std::vector<event> &Events, const detail::UnifiedRangeView &Range = {}); + /// Passes kernel arguments to runtime. + /// If all dependencies are met and kernel can be submitted to backend - it is + /// done in this call. + /// \param KernelName a name of the kernel being invoked. + /// \param TypelessArgs a unified arguments collection. + void submitKernelImpl(const char *KernelName, + detail::ArgCollection &TypelessArgs); + + /// \return an event representing last kernel invocation. + event getLastEvent(); + queue(const std::shared_ptr<detail::QueueImpl> &Impl) : impl(Impl) {} std::shared_ptr<detail::QueueImpl> impl; diff --git a/libsycl/include/sycl/__spirv/spirv_vars.hpp b/libsycl/include/sycl/__spirv/spirv_vars.hpp new file mode 100644 index 0000000000000..ec8c691b35e92 --- /dev/null +++ b/libsycl/include/sycl/__spirv/spirv_vars.hpp @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains SPIRV builtins needed for kernel invocations +/// (parallel_for). +/// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL___SPIRV_SPIRV_VARS +#define _LIBSYCL___SPIRV_SPIRV_VARS + +#ifdef __SYCL_DEVICE_ONLY__ + +# include <cstddef> +# include <cstdint> + +// SPIR-V built-in variables mapped to function call. +# define _LIBSYCL_SYCL_DEVICE_ATTR __attribute__((sycl_external)) + +_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t +__spirv_BuiltInGlobalInvocationId(int); +_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t +__spirv_BuiltInGlobalSize(int); +_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t +__spirv_BuiltInGlobalOffset(int); + +namespace __spirv { + +// Helper function templates to initialize and get vector component from SPIR-V +// built-in variables +# define __SPIRV_DEFINE_INIT_AND_GET_HELPERS(POSTFIX) \ + template <int ID> size_t get##POSTFIX(); \ + template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX(0); } \ + template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX(1); } \ + template <> size_t get##POSTFIX<2>() { return __spirv_##POSTFIX(2); } \ + \ + template <int Dim, class DstT> struct InitSizesST##POSTFIX; \ + \ + template <class DstT> struct InitSizesST##POSTFIX<1, DstT> { \ + static DstT initSize() { return {get##POSTFIX<0>()}; } \ + }; \ + \ + template <class DstT> struct InitSizesST##POSTFIX<2, DstT> { \ + static DstT initSize() { \ + return {get##POSTFIX<1>(), get##POSTFIX<0>()}; \ + } \ + }; \ + \ + template <class DstT> struct InitSizesST##POSTFIX<3, DstT> { \ + static DstT initSize() { \ + return {get##POSTFIX<2>(), get##POSTFIX<1>(), get##POSTFIX<0>()}; \ + } \ + }; \ + \ + template <int Dims, class DstT> DstT init##POSTFIX() { \ + return InitSizesST##POSTFIX<Dims, DstT>::initSize(); \ + } + +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalSize); +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalInvocationId) +__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalOffset) + +# undef __SPIRV_DEFINE_INIT_AND_GET_HELPERS + +} // namespace __spirv + +#endif //__SYCL_DEVICE_ONLY__ + +#endif // _LIBSYCL___SPIRV_SPIRV_VARS diff --git a/libsycl/test/basic/queue_parallel_for_generic.cpp b/libsycl/test/basic/queue_parallel_for_generic.cpp new file mode 100644 index 0000000000000..cac423b85f218 --- /dev/null +++ b/libsycl/test/basic/queue_parallel_for_generic.cpp @@ -0,0 +1,47 @@ +// REQUIRES: any-device +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out + +#include <sycl/sycl.hpp> + +#include <cassert> +#include <iostream> +#include <type_traits> + +int main() { + // TODO: uncomment property once it is implemented. now all sycl::queue + // objects are in-order due to liboffload limitation. Test is intended to + // check in-order execution. + sycl::queue q{/*sycl::property::queue::in_order()*/}; + auto dev = q.get_device(); + auto ctx = q.get_context(); + constexpr int N = 8; + + auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx)); + + for (int i = 0; i < N; i++) { + A[i] = 1; + } + + q.parallel_for<class Bar>(N, [=](auto i) { + static_assert(std::is_same<decltype(i), sycl::item<1>>::value, + "lambda arg type is unexpected"); + A[i]++; + }); + + q.parallel_for<class Foo>({N}, [=](auto i) { + static_assert(std::is_same<decltype(i), sycl::item<1>>::value, + "lambda arg type is unexpected"); + A[i]++; + }); + + // TODO: add kernel with offset and kernel with nd_range once they + // are implemented. + + q.wait(); + + for (int i = 0; i < N; i++) { + assert(A[i] == 3); + } + sycl::free(A, ctx); +} diff --git a/libsycl/test/basic/wrapped_usm_pointers.cpp b/libsycl/test/basic/wrapped_usm_pointers.cpp new file mode 100644 index 0000000000000..16a86963cc976 --- /dev/null +++ b/libsycl/test/basic/wrapped_usm_pointers.cpp @@ -0,0 +1,111 @@ +// REQUIRES: any-device +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %t.out + +#include <sycl/sycl.hpp> + +#include <iostream> + +struct Simple { + int *Data; + int Addition; +}; + +struct WrapperOfSimple { + int Addition; + Simple Obj; +}; + +struct NonTrivial { + int Addition; + int *Data; + + NonTrivial(int *D, int A) : Data(D), Addition(A) {} +}; + +struct NonTrivialDerived : NonTrivial { + int AA = 0; + NonTrivialDerived(int *D, int A) : NonTrivial(D, A) {} +}; + +using namespace sycl; + +int main() { + constexpr int NumOfElements = 7; + + queue Q; + + NonTrivial NonTrivialObj(sycl::malloc_shared<int>(NumOfElements, Q), 38); + NonTrivialDerived NonTrivialDerivedObj( + sycl::malloc_shared<int>(NumOfElements, Q), 39); + Simple SimpleObj = {sycl::malloc_shared<int>(NumOfElements, Q), 42}; + WrapperOfSimple WrapperOfSimpleObj = { + 300, {sycl::malloc_shared<int>(NumOfElements, Q), 100500}}; + + // Test simple struct containing pointer. + Q.parallel_for(NumOfElements, [=](id<1> Idx) { + SimpleObj.Data[Idx] = Idx + SimpleObj.Addition; + }); + + // Test simple non-trivial struct containing pointer. + Q.parallel_for(NumOfElements, [=](id<1> Idx) { + NonTrivialObj.Data[Idx] = Idx + NonTrivialObj.Addition; + }); + + // Test simple non-trivial derived struct containing pointer. + Q.parallel_for(NumOfElements, [=](id<1> Idx) { + NonTrivialDerivedObj.Data[Idx] = Idx + NonTrivialDerivedObj.Addition; + }); + + // Test nested struct containing pointer. + Q.parallel_for(NumOfElements, [=](id<1> Idx) { + WrapperOfSimpleObj.Obj.Data[Idx] = Idx + WrapperOfSimpleObj.Obj.Addition; + }); + + // Test array of structs containing pointers. + Simple SimpleArr[NumOfElements]; + for (int i = 0; i < NumOfElements; ++i) { + SimpleArr[i].Data = sycl::malloc_shared<int>(NumOfElements, Q); + SimpleArr[i].Addition = 38 + i; + } + + Q.parallel_for(range<2>(NumOfElements, NumOfElements), [=](item<2> Idx) { + SimpleArr[Idx.get_id(0)].Data[Idx.get_id(1)] = + Idx.get_id(1) + SimpleArr[Idx.get_id(0)].Addition; + }); + + Q.wait(); + + auto Checker = [](auto Obj) { + for (int i = 0; i < NumOfElements; ++i) { + if (Obj.Data[i] != (i + Obj.Addition)) { + std::cout << "line: " << __LINE__ << " result[" << i << "] is " + << Obj.Data[i] << " expected " << i + Obj.Addition + << std::endl; + return true; // true if fail + } + } + + return false; + }; + + bool Fail = false; + Fail = Checker(SimpleObj); + Fail = Checker(NonTrivialObj); + Fail = Checker(NonTrivialDerivedObj); + Fail = Checker(WrapperOfSimpleObj.Obj); + + for (int i = 0; i < NumOfElements; ++i) + Fail = Checker(SimpleArr[i]); + + // Free allocated memory. + sycl::free(NonTrivialObj.Data, Q); + sycl::free(NonTrivialDerivedObj.Data, Q); + sycl::free(SimpleObj.Data, Q); + sycl::free(WrapperOfSimpleObj.Obj.Data, Q); + + for (int i = 0; i < NumOfElements; ++i) + sycl::free(SimpleArr[i].Data, Q); + + return Fail; +} >From a5b6a4c40d367b6f80d7ec43915d94a0707aa911 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" <[email protected]> Date: Fri, 27 Mar 2026 10:37:44 -0700 Subject: [PATCH 3/5] removed invalid comment Signed-off-by: Tikhomirova, Kseniya <[email protected]> --- libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp index d4a0ea9f63ff2..a7478e1300e21 100644 --- a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp +++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp @@ -5,8 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// to add -//===----------------------------------------------------------------------===// #ifndef _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS #define _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS >From 60af5a987839fac6b250e7519a74fe33ef8ff3b4 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" <[email protected]> Date: Tue, 21 Apr 2026 04:40:29 -0700 Subject: [PATCH 4/5] fix merge errors Signed-off-by: Tikhomirova, Kseniya <[email protected]> --- .../sycl/__impl/detail/arg_wrapper.hpp | 135 ------------------ libsycl/include/sycl/__impl/queue.hpp | 43 ------ 2 files changed, 178 deletions(-) delete mode 100644 libsycl/include/sycl/__impl/detail/arg_wrapper.hpp diff --git a/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp b/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp deleted file mode 100644 index 96f60a3121787..0000000000000 --- a/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp +++ /dev/null @@ -1,135 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains helper functions used to wrap kernel arguments to -/// typeless collection. -/// -//===----------------------------------------------------------------------===// - -#ifndef _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP -#define _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP - -#include <sycl/__impl/detail/config.hpp> -#include <sycl/__impl/exception.hpp> - -#include <cassert> -#include <memory> - -_LIBSYCL_BEGIN_NAMESPACE_SYCL - -namespace detail { - -/// Base class is needed for unification, we pass arguments through ABI -/// boundary. -class ArgWrapperBase { -public: - ArgWrapperBase(const ArgWrapperBase &) = delete; - ArgWrapperBase &operator=(const ArgWrapperBase &) = delete; - virtual ~ArgWrapperBase() = default; - - virtual void deepCopy() = 0; - virtual size_t getSize() const = 0; - virtual const void *getPtr() const = 0; - -protected: - ArgWrapperBase() = default; -}; - -/// Helps to manage arguments in a typeless way. -template <typename Type> class ArgWrapper : public ArgWrapperBase { -public: - ArgWrapper(Type &Arg) { Ptr = &Arg; } - ArgWrapper(const ArgWrapper &) = delete; - ArgWrapper &operator=(const ArgWrapper &) = delete; - - /// \return size of argument in bytes. - size_t getSize() const override { return sizeof(Type); } - - /// Returns raw pointer to the corresponding argument. - /// No copy is done by this method. It works with pointer to the memory whose - /// existence must be guaranteed by class user or with copy that must be - /// explicitly requested by class user via deepCopy method. - /// \return pointer to the argument. - const void *getPtr() const override { - assert((!DeepCopy || (DeepCopy.get()) == Ptr) && - "Incorrect state of copied argument"); - return Ptr; - } - - /// Copies agrument to RT owned storage. - void deepCopy() override { - if (DeepCopy) - return; - - DeepCopy.reset(new Type(*Ptr)); - Ptr = DeepCopy.get(); - } - -private: - Type *Ptr; - std::unique_ptr<Type> DeepCopy; -}; - -/// Collection of arguments. Provides functionality to accumulate all arguments -/// data to pass through ABI boundary. -class ArgCollection { -public: - /// Adds argument to the collection. Don't own the memory. Argument lifetime - /// must be guaranteed by class user. If extended lifetime is needed (copy), - /// deepCopy must be called. - template <typename Type> void addArg(Type &Arg) { - MArgs.emplace_back(new ArgWrapper(Arg)); - } - - /// \return array of argument pointers. - const void **getArgPtrArray() { - if (MPtrs.size() != MArgs.size()) { - MPtrs.clear(); - MPtrs.reserve(MArgs.size()); - auto it = MArgs.cbegin(); - while (it != MArgs.cend()) { - MPtrs.push_back((*it++)->getPtr()); - } - } - return MPtrs.data(); - } - - /// \return array of argument sizes. - int64_t *getSizesArray() { - if (MSizes.size() != MArgs.size()) { - MSizes.clear(); - MSizes.reserve(MArgs.size()); - auto it = MArgs.cbegin(); - while (it != MArgs.cend()) { - MSizes.push_back(static_cast<int64_t>((*it++)->getSize())); - } - } - return MSizes.data(); - } - - /// \return count of arguments in collection. - size_t getArgCount() { return MArgs.size(); } - - /// Extends arguments lifetime by doing copy of all arguments. - void deepCopy() { - for (auto &Arg : MArgs) - Arg->deepCopy(); - } - -private: - std::vector<std::unique_ptr<ArgWrapperBase>> MArgs; - std::vector<int64_t> MSizes; - std::vector<const void *> MPtrs; -}; - -} // namespace detail - -_LIBSYCL_END_NAMESPACE_SYCL - -#endif // _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp index 5f31777c09cf7..e3856d2f5b4b6 100644 --- a/libsycl/include/sycl/__impl/queue.hpp +++ b/libsycl/include/sycl/__impl/queue.hpp @@ -20,7 +20,6 @@ #include <sycl/__impl/event.hpp> #include <sycl/__impl/property_list.hpp> -#include <sycl/__impl/detail/arg_wrapper.hpp> #include <sycl/__impl/detail/config.hpp> #include <sycl/__impl/detail/default_async_handler.hpp> #include <sycl/__impl/detail/get_device_kernel_info.hpp> @@ -311,48 +310,6 @@ class _LIBSYCL_EXPORT queue { /// exceptions. void wait(); - /// Defines and invokes a SYCL kernel function as a lambda expression or a - /// named function object type. - /// - /// \param kernelFunc is the kernel functor or lambda. - /// \return an event that represents the status of the submitted kernel. - template <typename KernelName, typename KernelType> - event single_task(const KernelType &kernelFunc) { - return single_task<KernelName, KernelType>({}, kernelFunc); - } - - /// Defines and invokes a SYCL kernel function as a lambda expression or a - /// named function object type. - /// - /// \param depEvent is an event that specifies the kernel dependency. - /// \param kernelFunc is the kernel functor or lambda. - /// \return an event that represents the status of the submitted kernel. - template <typename KernelName, typename KernelType> - event single_task(event depEvent, const KernelType &kernelFunc) { - return single_task<KernelName, KernelType>({depEvent}, kernelFunc); - } - - /// Defines and invokes a SYCL kernel function as a lambda expression or a - /// named function object type. - /// - /// \param depEvents is a collection of events that specify the kernel - /// dependencies. - /// \param kernelFunc is the kernel functor or lambda. - /// \return an event that represents the status of the submitted kernel. - template <typename KernelName, typename KernelType> - event single_task(const std::vector<event> &depEvents, - const KernelType &kernelFunc) { - static_assert( - detail::CheckFunctionSignature<std::remove_reference_t<KernelType>, - void()>::value, - "sycl::queue::single_task() requires a kernel instead of a command " - "group"); - - setKernelParameters(depEvents); - submitSingleTask<KernelName, KernelType>(kernelFunc); - return getLastEvent(); - } - private: template <typename KernelName, int Dims, typename... Rest> event parallelForImpl(range<Dims> numWorkItems, >From 2861b1031d9c712c8c43c360b1fc88ab3ba388ec Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" <[email protected]> Date: Tue, 21 Apr 2026 08:46:55 -0700 Subject: [PATCH 5/5] fix more comments and revert some merge issues Signed-off-by: Tikhomirova, Kseniya <[email protected]> --- .../sycl/__impl/detail/kernel_arg_helpers.hpp | 11 ++++++++-- .../sycl/__impl/index_space_classes.hpp | 20 +++++++++---------- libsycl/include/sycl/__impl/queue.hpp | 20 +++++++++---------- libsycl/include/sycl/__spirv/spirv_vars.hpp | 10 +++------- libsycl/include/sycl/sycl.hpp | 1 + libsycl/src/detail/queue_impl.cpp | 1 + libsycl/src/detail/queue_impl.hpp | 1 - libsycl/test/basic/wrapped_usm_pointers.cpp | 10 +++++----- 8 files changed, 38 insertions(+), 36 deletions(-) diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp index a7478e1300e21..f3d733981922a 100644 --- a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp +++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp @@ -5,6 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains helpers for kernel invocation. +/// +//===----------------------------------------------------------------------===// #ifndef _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS #define _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS @@ -17,11 +22,13 @@ # include <sycl/__spirv/spirv_vars.hpp> #endif +#include <type_traits> + _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { -/// \name Helpers for the unnamed lambda extension. +/// \name Helpers for the unnamed lambda. /// @{ /// This class is the default kernel name template parameter type for kernel /// invocation APIs such as single_task. @@ -35,7 +42,7 @@ template <typename Name, typename Type> struct get_kernel_name_t { }; /// Specialization for the case when Name is undefined. -/// This is only legal with our compiler with the unnamed lambda extension or if +/// This is only legal with our compiler with the unnamed lambda support or if /// the kernel is a functor object. template <typename Type> struct get_kernel_name_t<detail::AutoName, Type> { using name = Type; diff --git a/libsycl/include/sycl/__impl/index_space_classes.hpp b/libsycl/include/sycl/__impl/index_space_classes.hpp index ef2897cee5307..0dc8e90decc3d 100644 --- a/libsycl/include/sycl/__impl/index_space_classes.hpp +++ b/libsycl/include/sycl/__impl/index_space_classes.hpp @@ -17,6 +17,9 @@ #include <sycl/__impl/detail/config.hpp> +#include <cstddef> +#include <type_traits> + _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { @@ -95,12 +98,7 @@ template <int Dimensions = 1> class RawArray { friend bool operator!=(const RawArray<Dimensions> &lhs, const RawArray<Dimensions> &rhs) { - for (int i = 0; i < Dimensions; ++i) { - if (lhs.MArray[i] != rhs.MArray[i]) { - return true; - } - } - return false; + return !(lhs == rhs); } protected: @@ -370,13 +368,13 @@ template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item { std::size_t get_linear_id() const noexcept { if constexpr (WithOffset) { if constexpr (1 == Dimensions) { - return MId; + return MId[0] - MOffset[0]; } if constexpr (2 == Dimensions) { - return (MId[0] - MOffset[0]) * MRange[1] + (MId[1] - MOffset[1]); + return (MId[0] - MOffset[0]) * MRange[1] + MId[1] - MOffset[1]; } - return ((MId[0] - MOffset[0]) * MRange[1] * MRange[2]) + - ((MId[1] - MOffset[1]) * MRange[2]) + (MId[2] - MOffset[2]); + return (MId[0] - MOffset[0]) * MRange[1] * MRange[2] + + (MId[1] - MOffset[1]) * MRange[2] + MId[2] - MOffset[2]; } else { if constexpr (1 == Dimensions) { return MId[0]; @@ -384,7 +382,7 @@ template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item { if constexpr (2 == Dimensions) { return MId[0] * MRange[1] + MId[1]; } - return (MId[0] * MRange[1] * MRange[2]) + (MId[1] * MRange[2]) + MId[2]; + return MId[0] * MRange[1] * MRange[2] + MId[1] * MRange[2] + MId[2]; } } diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp index e3856d2f5b4b6..ea96ee03da5ee 100644 --- a/libsycl/include/sycl/__impl/queue.hpp +++ b/libsycl/include/sycl/__impl/queue.hpp @@ -138,6 +138,11 @@ class _LIBSYCL_EXPORT queue { template <typename Param> typename Param::return_type get_backend_info() const; + /// Blocks the calling thread until all commands previously submitted to this + /// queue have completed. Synchronous errors are reported through SYCL + /// exceptions. + void wait(); + /// Defines and invokes a SYCL kernel function as a lambda expression or a /// named function object type. /// @@ -172,8 +177,8 @@ class _LIBSYCL_EXPORT queue { static_assert( (detail::CheckFunctionSignature<std::remove_reference_t<KernelType>, void()>::value), - "sycl::queue::single_task() requires a kernel instead of command " - "group. "); + "sycl::queue::single_task() requires a kernel instead of a command " + "group"); setKernelParameters(depEvents); using NameT = @@ -305,18 +310,13 @@ class _LIBSYCL_EXPORT queue { std::forward<Rest>(rest)...); } - /// Blocks the calling thread until all commands previously submitted to this - /// queue have completed. Synchronous errors are reported through SYCL - /// exceptions. - void wait(); - private: template <typename KernelName, int Dims, typename... Rest> event parallelForImpl(range<Dims> numWorkItems, const std::vector<event> &depEvents, Rest &&...rest) { if constexpr (sizeof...(Rest) != 1) throw sycl::exception(errc::feature_not_supported, - "Reductions are not supported."); + "Reductions are not supported"); setKernelParameters(depEvents, numWorkItems); using KernelType = @@ -366,7 +366,7 @@ class _LIBSYCL_EXPORT queue { /// sycl_kernel_launch instead of KernelFunc invocation. template <typename KernelName, typename KernelType> _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) - void submitSingleTask(const KernelType KernelFunc) { + void submitSingleTask(const KernelType &KernelFunc) { KernelFunc(); } @@ -375,7 +375,7 @@ class _LIBSYCL_EXPORT queue { /// sycl_kernel_launch instead of KernelFunc invocation. template <typename KernelName, typename ElementType, typename KernelType> _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) - void submitParallelFor(const KernelType KernelFunc) { + void submitParallelFor(const KernelType &KernelFunc) { #ifdef __SYCL_DEVICE_ONLY__ KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>())); #endif diff --git a/libsycl/include/sycl/__spirv/spirv_vars.hpp b/libsycl/include/sycl/__spirv/spirv_vars.hpp index ec8c691b35e92..2c93e510565b3 100644 --- a/libsycl/include/sycl/__spirv/spirv_vars.hpp +++ b/libsycl/include/sycl/__spirv/spirv_vars.hpp @@ -21,14 +21,10 @@ # include <cstdint> // SPIR-V built-in variables mapped to function call. -# define _LIBSYCL_SYCL_DEVICE_ATTR __attribute__((sycl_external)) -_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t -__spirv_BuiltInGlobalInvocationId(int); -_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t -__spirv_BuiltInGlobalSize(int); -_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t -__spirv_BuiltInGlobalOffset(int); +__attribute__((const)) size_t __spirv_BuiltInGlobalInvocationId(int); +__attribute__((const)) size_t __spirv_BuiltInGlobalSize(int); +__attribute__((const)) size_t __spirv_BuiltInGlobalOffset(int); namespace __spirv { diff --git a/libsycl/include/sycl/sycl.hpp b/libsycl/include/sycl/sycl.hpp index ce9fc8defd90b..7e81d952bd41c 100644 --- a/libsycl/include/sycl/sycl.hpp +++ b/libsycl/include/sycl/sycl.hpp @@ -19,6 +19,7 @@ #include <sycl/__impl/device_selector.hpp> #include <sycl/__impl/event.hpp> #include <sycl/__impl/exception.hpp> +#include <sycl/__impl/index_space_classes.hpp> #include <sycl/__impl/platform.hpp> #include <sycl/__impl/queue.hpp> #include <sycl/__impl/usm_functions.hpp> diff --git a/libsycl/src/detail/queue_impl.cpp b/libsycl/src/detail/queue_impl.cpp index 93a1f43d25bf6..623b326637932 100644 --- a/libsycl/src/detail/queue_impl.cpp +++ b/libsycl/src/detail/queue_impl.cpp @@ -20,6 +20,7 @@ namespace detail { static void setKernelLaunchArgs(const detail::UnifiedRangeView &Range, ol_kernel_launch_size_args_t &ArgsToSet) { + assert(Range.MDims < 4 && "Invalid dimensions."); uint32_t GlobalSize[3] = {1, 1, 1}; if (Range.MGlobalSize) { for (size_t I = 0; I < Range.MDims; ++I) { diff --git a/libsycl/src/detail/queue_impl.hpp b/libsycl/src/detail/queue_impl.hpp index a504c467e3927..8800464e96612 100644 --- a/libsycl/src/detail/queue_impl.hpp +++ b/libsycl/src/detail/queue_impl.hpp @@ -21,7 +21,6 @@ #include <OffloadAPI.h> #include <memory> -#include <mutex> _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { diff --git a/libsycl/test/basic/wrapped_usm_pointers.cpp b/libsycl/test/basic/wrapped_usm_pointers.cpp index 16a86963cc976..c936dcada4a6b 100644 --- a/libsycl/test/basic/wrapped_usm_pointers.cpp +++ b/libsycl/test/basic/wrapped_usm_pointers.cpp @@ -90,13 +90,13 @@ int main() { }; bool Fail = false; - Fail = Checker(SimpleObj); - Fail = Checker(NonTrivialObj); - Fail = Checker(NonTrivialDerivedObj); - Fail = Checker(WrapperOfSimpleObj.Obj); + Fail |= Checker(SimpleObj); + Fail |= Checker(NonTrivialObj); + Fail |= Checker(NonTrivialDerivedObj); + Fail |= Checker(WrapperOfSimpleObj.Obj); for (int i = 0; i < NumOfElements; ++i) - Fail = Checker(SimpleArr[i]); + Fail |= Checker(SimpleArr[i]); // Free allocated memory. sycl::free(NonTrivialObj.Data, Q); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
