aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/runtime/gpu/cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/runtime/gpu/cl')
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp139
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h24
2 files changed, 6 insertions, 157 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 9ca20fa152..eab5cddd07 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,12 +26,11 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "src/core/CL/CLUtils.h"
-#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include "src/gpu/cl/ClKernelLibrary.h"
#include "support/Cast.h"
+
namespace arm_compute
{
namespace experimental
@@ -61,128 +60,6 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
_arguments = code.arguments();
}
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-
-inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx,
- const GpuKernelArgumentInfo &arg,
- const ICLTensor *tensor,
- const Window &arg_slice,
- std::vector<cl::Image2D> &cl_images)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
-
- switch (arg.type)
- {
- case GpuKernelArgumentInfo::Type::Scalar:
- {
- ARM_COMPUTE_ERROR("Unsupported yet");
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Vector:
- {
- add_1D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Image:
- {
- add_2D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D:
- {
- add_2D_tensor_argument(idx, tensor, arg_slice);
- const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom;
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad));
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
- {
- const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
- tensor->info()->dimension(2) *
- tensor->info()->dimension(3));
- const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d =
- create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
- tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
- cl_images.push_back(tensor_image2d);
- _kernel.setArg(idx++, tensor_image2d);
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Image_3D:
- {
- add_2D_tensor_argument(idx, tensor, arg_slice);
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
- break;
- }
- case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
- {
- const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
- tensor->info()->dimension(2) *
- tensor->info()->dimension(3));
- const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d =
- create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
- tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
- cl_images.push_back(tensor_image2d);
- _kernel.setArg(idx++, tensor_image2d);
- _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Tensor_3D:
- {
- add_3D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
-
- case GpuKernelArgumentInfo::Type::Tensor_4D:
- {
- add_4D_tensor_argument(idx, tensor, arg_slice);
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
- {
- add_4d_tensor_nhwc_argument(idx, tensor);
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- const size_t image_w = tensor->info()->dimension(0) / 4;
- const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1);
- const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
-
- cl::Image2D tensor_image2d = create_image2d_from_buffer(
- CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h),
- tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
- cl_images.push_back(tensor_image2d);
-
- _kernel.setArg(idx++, tensor_image2d);
- add_4d_tensor_nhwc_argument(idx, tensor);
- break;
- }
- case GpuKernelArgumentInfo::Type::Tensor_Special_0:
- {
- const ITensorInfo *info = tensor->info();
- const Strides &strides = info->strides_in_bytes();
-
- _kernel.setArg(idx++, tensor->cl_buffer());
- const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2];
- _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2));
- const size_t stride1 = strides[1];
- _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1));
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported");
- }
- }
-}
-
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx,
const GpuKernelArgumentBinding &arg,
const ICLTensor *tensor,
@@ -234,7 +111,6 @@ inline void ClKernelRuntime::add_kernel_argument(unsigned int
}
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
@@ -253,17 +129,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
// Set kernel arguments
// CLImages created from tensor arguments. Need to be retained until enqueue
std::vector<cl::Image2D> cl_images;
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- for (auto id_arg : _arguments)
- {
- const auto arg = id_arg.second;
- auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
- ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
- ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
- add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images);
- }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
for (const auto &arg : _arguments)
{
auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
@@ -271,7 +137,6 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info());
add_kernel_argument(idx, arg, tensor, cl_images);
}
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
// Dispatch kernel
enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
index e78567eb9d..148e4db581 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME
-#define SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME
+#ifndef ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H
+#define ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
@@ -59,21 +59,6 @@ public:
virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
private:
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- /** Set a kernel tensor argument
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] arg Kernel argument descriptor accompanying @p tensor
- * @param[in] tensor Tensor to set as an argument of the object's kernel
- * @param[in] arg_slice Window the kernel will be run on
- * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
- */
- inline void add_tensor_argument(unsigned int &idx,
- const GpuKernelArgumentInfo &arg,
- const ICLTensor *tensor,
- const Window &arg_slice,
- std::vector<cl::Image2D> &cl_images);
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
/** Set a kernel argument as part of a tensor
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -85,7 +70,6 @@ private:
const GpuKernelArgumentBinding &arg,
const ICLTensor *tensor,
std::vector<cl::Image2D> &cl_images);
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
private:
GpuKernelArgumentList _arguments{};
@@ -94,4 +78,4 @@ private:
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
-#endif /* SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME */
+#endif // ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H