aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/CLHelpers.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/CLHelpers.cpp')
-rw-r--r--src/core/CL/CLHelpers.cpp158
1 files changed, 121 insertions, 37 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 6af378c7ab..5ea99d360a 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,13 +22,16 @@
* SOFTWARE.
*/
#include "arm_compute/core/CL/CLHelpers.h"
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Log.h"
#include "arm_compute/core/Types.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
+#include "arm_compute/core/utils/DataTypeUtils.h"
-#include "src/core/gpu/cl/ClKernelLibrary.h"
+#include "src/gpu/cl/ClCompileContext.h"
+#include "src/gpu/cl/ClKernelLibrary.h"
#include <utility>
#include <vector>
@@ -37,7 +40,7 @@ namespace arm_compute
{
std::string get_cl_type_from_data_type(const DataType &dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::QASYMM8:
@@ -73,7 +76,7 @@ std::string get_cl_type_from_data_type(const DataType &dt)
std::string get_cl_promoted_type_from_data_type(const DataType &dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::QASYMM8:
@@ -103,7 +106,7 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt)
std::string get_cl_unsigned_type_from_element_size(size_t element_size)
{
- switch(element_size)
+ switch (element_size)
{
case 1:
return "uchar";
@@ -121,7 +124,7 @@ std::string get_cl_unsigned_type_from_element_size(size_t element_size)
std::string get_cl_signed_type_from_element_size(size_t element_size)
{
- switch(element_size)
+ switch (element_size)
{
case 1:
return "char";
@@ -139,11 +142,10 @@ std::string get_cl_signed_type_from_element_size(size_t element_size)
std::string get_cl_select_type_from_data_type(const DataType &dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::QASYMM8:
- return "uchar";
case DataType::S8:
case DataType::QASYMM8_SIGNED:
case DataType::QSYMM8:
@@ -173,7 +175,7 @@ std::string get_cl_select_type_from_data_type(const DataType &dt)
std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::QASYMM8:
@@ -191,7 +193,7 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt)
std::string get_data_size_from_data_type(const DataType &dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::S8:
@@ -243,8 +245,9 @@ bool dot8_supported(const cl::Device &device)
const GPUTarget gpu_target = get_target_from_name(device_name);
// SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8
- std::set<GPUTarget> sw_workaround_issue = { GPUTarget::G76 };
- return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0);
+ std::set<GPUTarget> sw_workaround_issue = {GPUTarget::G76};
+ return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") ||
+ sw_workaround_issue.count(gpu_target) != 0);
}
bool dot8_acc_supported(const cl::Device &device)
@@ -255,19 +258,23 @@ bool dot8_acc_supported(const cl::Device &device)
CLVersion get_cl_version(const cl::Device &device)
{
std::string version_str = device.getInfo<CL_DEVICE_VERSION>();
- if(version_str.find("OpenCL 2") != std::string::npos)
+ if (version_str.find("OpenCL 3") != std::string::npos)
+ {
+ return CLVersion::CL30;
+ }
+ else if (version_str.find("OpenCL 2") != std::string::npos)
{
return CLVersion::CL20;
}
- else if(version_str.find("OpenCL 1.2") != std::string::npos)
+ else if (version_str.find("OpenCL 1.2") != std::string::npos)
{
return CLVersion::CL12;
}
- else if(version_str.find("OpenCL 1.1") != std::string::npos)
+ else if (version_str.find("OpenCL 1.1") != std::string::npos)
{
return CLVersion::CL11;
}
- else if(version_str.find("OpenCL 1.0") != std::string::npos)
+ else if (version_str.find("OpenCL 1.0") != std::string::npos)
{
return CLVersion::CL10;
}
@@ -282,14 +289,15 @@ bool device_supports_extension(const cl::Device &device, const char *extension_n
return (pos != std::string::npos);
}
-bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout)
+bool cl_winograd_convolution_layer_supported(const Size2D &output_tile,
+ const Size2D &kernel_size,
+ DataLayout data_layout)
{
ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN);
using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
- std::vector<WinogradConfiguration> winograd_configs_nchw =
- {
+ std::vector<WinogradConfiguration> winograd_configs_nchw = {
WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)),
WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)),
@@ -298,11 +306,9 @@ bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Si
WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)),
WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)),
- WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))
- };
+ WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))};
- std::vector<WinogradConfiguration> winograd_configs_nhwc =
- {
+ std::vector<WinogradConfiguration> winograd_configs_nhwc = {
WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)),
WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)),
WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)),
@@ -319,19 +325,21 @@ bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Si
std::pair<int, int>(kernel_size.width, kernel_size.height));
// Return true if supported
- if(data_layout == DataLayout::NCHW)
+ if (data_layout == DataLayout::NCHW)
{
- return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end());
+ return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) !=
+ winograd_configs_nchw.end());
}
else
{
- return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end());
+ return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) !=
+ winograd_configs_nhwc.end());
}
}
size_t preferred_vector_width(const cl::Device &device, const DataType dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::S8:
@@ -377,7 +385,7 @@ size_t get_cl_image_pitch_alignment(const cl::Device &device)
cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);
- if(err == CL_SUCCESS)
+ if (err == CL_SUCCESS)
{
return pixel_aligment;
}
@@ -387,7 +395,18 @@ size_t get_cl_image_pitch_alignment(const cl::Device &device)
}
}
-cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)
+bool get_cl_non_uniform_work_group_supported(const cl::Device &device)
+{
+ cl_bool supported = CL_FALSE;
+
+ cl_int err =
+ clGetDeviceInfo(device(), CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), &supported, nullptr);
+
+ return (err == CL_SUCCESS && supported == CL_TRUE);
+}
+
+cl::Kernel
+create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts)
{
opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();
@@ -395,7 +414,8 @@ cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_
auto kernel_src = klib.program(program_name);
const std::string kernel_path = klib.kernel_path();
- return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path, build_opts, kernel_src.is_binary));
+ return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path,
+ build_opts, kernel_src.is_binary));
}
cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size)
@@ -409,8 +429,9 @@ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimensio
bool get_wbsm_support_info(const cl::Device &device)
{
cl_bitfield capabilities = 0;
- cl_int err = clGetDeviceInfo(device.get(), ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
- if((err == CL_SUCCESS) && (capabilities & ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM))
+ cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, sizeof(cl_bitfield),
+ &capabilities, nullptr);
+ if ((err == CL_SUCCESS) && (capabilities & CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM))
{
return true;
}
@@ -419,12 +440,75 @@ bool get_wbsm_support_info(const cl::Device &device)
void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
{
- cl_int err = clSetKernelExecInfo(kernel.get(),
- ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM,
- sizeof(cl_int),
- &wbsm_hint);
+ cl_int err = clSetKernelExecInfo(kernel.get(), CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM,
+ sizeof(cl_int), &wbsm_hint);
ARM_COMPUTE_UNUSED(err);
ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
}
+bool export_to_cl_image(const ITensorInfo *tensor)
+{
+ if (tensor->tensor_shape()[0] % 4 != 0)
+ {
+ return false;
+ }
+
+ // If not floating point
+ if (!is_data_type_float(tensor->data_type()))
+ {
+ return false;
+ }
+
+ // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
+ if (!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+ {
+ return false;
+ }
+
+ // Check cl image pitch alignment
+ if (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
+ {
+ return false;
+ }
+
+ const size_t image_w = tensor->tensor_shape()[0] / 4;
+ const size_t image_h = tensor->tensor_shape().total_size() / tensor->tensor_shape()[0];
+ const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
+ const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
+
+ if (image_w > max_image_w || image_h > max_image_h)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values)
+{
+ for (const int value : values)
+ {
+ if (value > max_manual_loop_unrolling)
+ {
+ built_opts.add_option("-DUNROLL_WITH_PRAGMA");
+ return;
+ }
+ }
+}
+
+bool arm_matrix_multiply_supported(const cl::Device &device)
+{
+ return device_supports_extension(device, "cl_arm_matrix_multiply");
+}
+
+bool command_buffer_supported(const cl::Device &device)
+{
+ return device_supports_extension(device, "cl_khr_command_buffer");
+}
+
+bool command_buffer_mutable_dispatch_supported(const cl::Device &device)
+{
+ return device_supports_extension(device, "cl_khr_command_buffer_mutable_dispatch");
+}
+
} // namespace arm_compute