aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Sujak <jakub.sujak@arm.com>2023-06-16 09:52:50 +0100
committerJakub Sujak <jakub.sujak@arm.com>2023-06-26 13:07:05 +0000
commit8c49f16e5909a9bd5dc6e68638d2e2d8acc2fc66 (patch)
tree7e6c13c7f4522ea2db1ccdafe7c2858632ee4532
parent7d9a78ebfb3553b95421a0da5e2686a3923748db (diff)
downloadComputeLibrary-8c49f16e5909a9bd5dc6e68638d2e2d8acc2fc66.tar.gz
Add helpers to set CKW tensor components as OpenCL kernel arguments
* Define ckw::TensorStorage. The tensor storage represents the type of tensor memory object. * Add helper functions for setting the CKW TensorComponent and TensorStorage as OpenCL kernel arguments. * Refactor CL Image2D method for simpler image object creation. Resolves: COMPMID-5784 Change-Id: I2d37d06783c1dc55f3b5692b44eb49b151f2401c Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9807 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--SConscript5
-rw-r--r--SConstruct7
-rw-r--r--compute_kernel_writer/CMakeLists.txt6
-rw-r--r--compute_kernel_writer/include/ckw/TensorInfo.h10
-rw-r--r--filelist.json7
-rwxr-xr-xscripts/clang_tidy_rules.py3
-rw-r--r--src/core/CL/CLUtils.cpp40
-rw-r--r--src/core/CL/CLUtils.h10
-rw-r--r--src/core/CL/ckw/KernelArgumentsHelpers.cpp97
-rw-r--r--src/core/CL/ckw/KernelArgumentsHelpers.h62
-rw-r--r--src/gpu/cl/kernels/ClDirectConv2dKernel.cpp27
12 files changed, 241 insertions, 34 deletions
diff --git a/Android.bp b/Android.bp
index 3f7bf03d07..cb41b1ac39 100644
--- a/Android.bp
+++ b/Android.bp
@@ -228,6 +228,7 @@ cc_library_static {
"src/core/CL/ICLSimpleKernel.cpp",
"src/core/CL/ICLTensor.cpp",
"src/core/CL/OpenCL.cpp",
+ "src/core/CL/ckw/KernelArgumentsHelpers.cpp",
"src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp",
"src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp",
diff --git a/SConscript b/SConscript
index be062e1d85..63009c6a05 100644
--- a/SConscript
+++ b/SConscript
@@ -529,6 +529,11 @@ if env['fixed_format_kernels']:
if env['experimental_dynamic_fusion']:
lib_files += filelist['experimental']['dynamic_fusion']
+# Compute Kernel Writer integration files
+if env['ckw']:
+ if env['opencl']:
+ lib_files += filelist['experimental']['ckw']['cl']
+
# Logging files
if env["logging"]:
lib_files += filelist['logging']
diff --git a/SConstruct b/SConstruct
index 03b0f918e2..419fa33558 100644
--- a/SConstruct
+++ b/SConstruct
@@ -126,7 +126,7 @@ vars.AddVariables(
    ├── datasets
    ├── fixtures
    └── Neon\n""", "", PathVariable.PathAccept),
- BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files", False),
+ BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files. This option also enables opencl=1 and ckw=1 on which it has a direct dependency.", False),
BoolVariable("fixed_format_kernels", "Enable fixed format kernels for GEMM", False),
BoolVariable("mapfile", "Generate a map file", False),
ListVariable("custom_options", "Custom options that can be used to turn on/off features", "none", ["disable_mmla_fp"]),
@@ -215,6 +215,11 @@ if env['os'] == 'bare_metal':
print("ERROR: OpenMP and C++11 threads not supported in bare_metal. Use cppthreads=0 openmp=0")
Exit(1)
+if env['experimental_dynamic_fusion']:
+ # Dynamic Fusion on GPU has a direct dependency on OpenCL and Compute Kernel Writer
+ env['opencl'] = 1
+ env['ckw'] = 1
+
if env['opencl'] and env['embed_kernels'] and env['compress_kernels'] and env['os'] not in ['android']:
print("Compressed kernels are supported only for android builds")
Exit(1)
diff --git a/compute_kernel_writer/CMakeLists.txt b/compute_kernel_writer/CMakeLists.txt
index f203a18f56..93372de3db 100644
--- a/compute_kernel_writer/CMakeLists.txt
+++ b/compute_kernel_writer/CMakeLists.txt
@@ -120,12 +120,6 @@ target_include_directories(ckw
PRIVATE ${CMAKE_CURRENT_LIST_DIR}
)
-set_target_properties(ckw
- PROPERTIES
- SOVERSION ${CMAKE_PROJECT_VERSION_MAJOR}
- VERSION ${CMAKE_PROJECT_VERSION}
- )
-
#---------------------------------------------------------------------
# Validation tests
diff --git a/compute_kernel_writer/include/ckw/TensorInfo.h b/compute_kernel_writer/include/ckw/TensorInfo.h
index b5f76cffa5..44846bc94c 100644
--- a/compute_kernel_writer/include/ckw/TensorInfo.h
+++ b/compute_kernel_writer/include/ckw/TensorInfo.h
@@ -86,6 +86,16 @@ enum class TensorComponent : uint32_t
Dim1xDim2xDim3 = 0x08001110
};
+/** Compute Kernel Writer tensor storage. The tensor storage represents the type of tensor memory object.
+ */
+enum class TensorStorage : uint32_t
+{
+ Unknown = 0x00000000,
+ BufferUint8Ptr = 0x01000000,
+ Texture2dReadOnly = 0x02000001,
+ Texture2dWriteOnly = 0x02000010,
+};
+
/** Compute Kernel Writer tensor shape
* Negative dimensions can be interpreted as dynamic dimensions by the Compute Kernel Writer
*/
diff --git a/filelist.json b/filelist.json
index f354e69398..0e18b37f5c 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2337,6 +2337,11 @@
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp"
- ]
+ ],
+ "ckw": {
+ "cl": [
+ "src/core/CL/ckw/KernelArgumentsHelpers.cpp"
+ ]
+ }
}
}
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 3e98e85ad3..56b33f3922 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -28,7 +28,8 @@ import re
import sys
def get_list_includes():
- return "src/cpu/kernels/assembly " \
+ return "compute_kernel_writer/include " \
+ "src/cpu/kernels/assembly " \
"src/core/NEON/kernels/assembly " \
"src/core/NEON/kernels/convolution/winograd " \
"include/linux include " \
diff --git a/src/core/CL/CLUtils.cpp b/src/core/CL/CLUtils.cpp
index 84cf88e099..709f8fa971 100644
--- a/src/core/CL/CLUtils.cpp
+++ b/src/core/CL/CLUtils.cpp
@@ -22,8 +22,8 @@
* SOFTWARE.
*/
#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Types.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
#include "support/StringSupport.h"
@@ -32,8 +32,40 @@
namespace arm_compute
{
-cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType type)
+cl::Image2D create_image2d_from_tensor(const ICLTensor *tensor, CLImage2DType image_type)
{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
+
+ const cl::Context &ctx = CLKernelLibrary::get().context();
+ const cl::Buffer &buffer = tensor->cl_buffer();
+ const ITensorInfo *info = tensor->info();
+ ARM_COMPUTE_ERROR_ON_MSG(info->lock_paddings(),
+ "Tensor paddings must not be locked to allow extending paddings to satisfy cl_image pitch alignment requirement");
+
+ const size_t image_w{ info->dimension(0) / 4 };
+ const size_t image_h{ info->tensor_shape().total_size() / info->dimension(0) };
+ const size_t max_image_w{ CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>() };
+ const size_t max_image_h{ CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>() };
+
+ ARM_COMPUTE_UNUSED(max_image_w, max_image_h);
+ ARM_COMPUTE_ERROR_ON_MSG(image_w > max_image_w, "Image width exceeds maximum width for exporting to cl_image");
+ ARM_COMPUTE_ERROR_ON_MSG(image_h > max_image_h, "Image height exceeds maximum height for exporting to cl_image");
+
+ const TensorShape shape2d(image_w, image_h);
+ const size_t image_row_pitch = info->strides_in_bytes()[1];
+
+ return create_image2d_from_buffer(ctx, buffer, shape2d, info->data_type(), image_row_pitch, image_type);
+}
+
+cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType image_type)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()),
+ "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
+ ARM_COMPUTE_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0,
+ "Impossible to retrieve the cl_image pitch alignment");
+ ARM_COMPUTE_ERROR_ON_MSG(buffer.get() == nullptr,
+ "Cannot create cl_image from empty cl_buffer");
+
cl_channel_type cl_data_type;
switch(data_type)
@@ -61,7 +93,7 @@ cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer
desc.image_width = shape2d[0];
desc.image_height = shape2d[1];
- switch(type)
+ switch(image_type)
{
case CLImage2DType::ReadOnly:
cl_image = clCreateImage(ctx(), CL_MEM_READ_ONLY, &format, &desc, nullptr, &err);
diff --git a/src/core/CL/CLUtils.h b/src/core/CL/CLUtils.h
index b31944c72f..35dbee723e 100644
--- a/src/core/CL/CLUtils.h
+++ b/src/core/CL/CLUtils.h
@@ -33,6 +33,7 @@ namespace arm_compute
class TensorShape;
class CLBuildOptions;
class ITensorInfo;
+class ICLTensor;
/** OpenCL Image2D types */
enum class CLImage2DType
@@ -41,6 +42,15 @@ enum class CLImage2DType
WriteOnly
};
+/** Create a cl::Image2D object from a tensor
+ *
+ * @param[in] tensor Tensor from which to construct Image 2D object
+ * @param[in] image_type Image 2D type (@ref CLImage2DType)
+ *
+ * @return cl::Image2D object
+ */
+cl::Image2D create_image2d_from_tensor(const ICLTensor *tensor, CLImage2DType image_type);
+
/** Create a cl::Image2D object from an OpenCL buffer
*
* @note The following conditions are required to create a OpenCL image object from OpenCL buffer,
diff --git a/src/core/CL/ckw/KernelArgumentsHelpers.cpp b/src/core/CL/ckw/KernelArgumentsHelpers.cpp
new file mode 100644
index 0000000000..3fbdc46a3a
--- /dev/null
+++ b/src/core/CL/ckw/KernelArgumentsHelpers.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "KernelArgumentsHelpers.h"
+
+namespace arm_compute
+{
+
+void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponent component)
+{
+ ARM_COMPUTE_ERROR_ON(tensor == nullptr);
+
+ const auto *info = tensor->info();
+ const auto &strides = info->strides_in_bytes();
+
+ switch(component)
+ {
+ case ckw::TensorComponent::OffsetFirstElement:
+ kernel.setArg<cl_uint>(idx++, info->offset_first_element_in_bytes());
+ break;
+ case ckw::TensorComponent::Stride0:
+ kernel.setArg<cl_uint>(idx++, strides[0]);
+ break;
+ case ckw::TensorComponent::Stride1:
+ kernel.setArg<cl_uint>(idx++, strides[1]);
+ break;
+ case ckw::TensorComponent::Stride2:
+ kernel.setArg<cl_uint>(idx++, strides[2]);
+ break;
+ case ckw::TensorComponent::Stride3:
+ kernel.setArg<cl_uint>(idx++, strides[3]);
+ break;
+ case ckw::TensorComponent::Stride4:
+ kernel.setArg<cl_uint>(idx++, strides[4]);
+ break;
+ case ckw::TensorComponent::Dim0:
+ kernel.setArg<cl_uint>(idx++, info->dimension(0));
+ break;
+ case ckw::TensorComponent::Dim1:
+ kernel.setArg<cl_uint>(idx++, info->dimension(1));
+ break;
+ case ckw::TensorComponent::Dim2:
+ kernel.setArg<cl_uint>(idx++, info->dimension(2));
+ break;
+ case ckw::TensorComponent::Dim3:
+ kernel.setArg<cl_uint>(idx++, info->dimension(3));
+ break;
+ case ckw::TensorComponent::Dim4:
+ kernel.setArg<cl_uint>(idx++, info->dimension(4));
+ break;
+ case ckw::TensorComponent::Dim1xDim2:
+ kernel.setArg<cl_uint>(idx++, info->dimension(1) * info->dimension(2));
+ break;
+ case ckw::TensorComponent::Dim2xDim3:
+ kernel.setArg<cl_uint>(idx++, info->dimension(2) * info->dimension(3));
+ break;
+ case ckw::TensorComponent::Dim1xDim2xDim3:
+ kernel.setArg<cl_uint>(idx++, info->dimension(1) * info->dimension(2) * info->dimension(3));
+ break;
+ case ckw::TensorComponent::Unknown:
+ default:
+ ARM_COMPUTE_ERROR("Unknown tensor component");
+ }
+}
+
+void cl_add_buffer_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Buffer &buffer)
+{
+ kernel.setArg(idx++, buffer);
+}
+
+void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Image &image)
+{
+ kernel.setArg(idx++, image);
+}
+
+} // namespace arm_compute
diff --git a/src/core/CL/ckw/KernelArgumentsHelpers.h b/src/core/CL/ckw/KernelArgumentsHelpers.h
new file mode 100644
index 0000000000..b681636c26
--- /dev/null
+++ b/src/core/CL/ckw/KernelArgumentsHelpers.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_SRC_CKW_KERNELARGUMENTSHELPERS_H
+#define ARM_COMPUTE_SRC_CKW_KERNELARGUMENTSHELPERS_H
+
+#include "arm_compute/core/CL/ICLTensor.h"
+
+#include "ckw/TensorInfo.h"
+
+namespace arm_compute
+{
+
+/** Select a Compute Kernel Writer tensor component from a tensor and add to the kernel's arguments at the specified index idx.
+ *
+ * @param[in,out] kernel OpenCL kernel to configure with the provided argument.
+ * @param[in,out] idx Index at which to add the argument.
+ * @param[in] tensor Tensor from which to access the tensor component.
+ * @param[in] component Tensor component to select such as tensor dimensions, strides, etc.
+ */
+void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponent component);
+
+/** Add an OpenCL buffer object to the kernel's arguments at the specified index @p idx.
+ *
+ * @param[in,out] kernel OpenCL kernel to configure with the provided argument.
+ * @param[in,out] idx Index at which to add the argument.
+ * @param[in] buffer OpenCL buffer containing the tensor's data.
+ */
+void cl_add_buffer_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Buffer &buffer);
+
+/** Add an OpenCL image object to the kernel's arguments at the specified index @p idx.
+ *
+ * @param[in,out] kernel OpenCL kernel to configure with the provided argument.
+ * @param[in,out] idx Index at which to add the argument.
+ * @param[in] image OpenCL image containing the image's data.
+ */
+void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Image &image);
+
+} // namespace arm_compute
+
+#endif //ARM_COMPUTE_SRC_CKW_KERNELARGUMENTSHELPERS_H
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index 68d7e30c9b..f01341c7b5 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -431,35 +431,20 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
if(_export_weights_to_cl_image)
{
- const size_t image_w = weights->info()->dimension(0) / 4;
- const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3);
- const TensorShape shape2d(image_w, image_h);
- const size_t image_row_pitch = weights->info()->strides_in_bytes()[1];
-
- // Export cl_buffer to cl_image
- weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+ // Export tensor to cl_image
+ weights_cl_image = create_image2d_from_tensor(weights, CLImage2DType::ReadOnly);
}
if(_export_output_to_cl_image)
{
- const size_t image_w = dst->info()->dimension(0) / 4;
- const size_t image_h = dst->info()->dimension(1) * dst->info()->dimension(2) * dst->info()->dimension(3);
- const TensorShape shape2d(image_w, image_h);
- const size_t image_row_pitch = dst->info()->strides_in_bytes()[1];
-
- // Export cl_buffer to cl_image
- output_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), dst->cl_buffer(), shape2d, dst->info()->data_type(), image_row_pitch, CLImage2DType::WriteOnly);
+ // Export tensor to cl_image
+ output_cl_image = create_image2d_from_tensor(dst, CLImage2DType::WriteOnly);
}
if(_export_input_to_cl_image)
{
- const size_t image_w = src->info()->dimension(0) / 4;
- const size_t image_h = src->info()->dimension(1) * src->info()->dimension(2) * src->info()->dimension(3);
- const TensorShape shape2d(image_w, image_h);
- const size_t image_row_pitch = src->info()->strides_in_bytes()[1];
-
- // Export cl_buffer to cl_image
- input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), src->cl_buffer(), shape2d, src->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+ // Export tensor to cl_image
+ input_cl_image = create_image2d_from_tensor(src, CLImage2DType::ReadOnly);
}
unsigned int idx = 0;