diff options
Diffstat (limited to 'src/core/CL/CLHelpers.cpp')
-rw-r--r-- | src/core/CL/CLHelpers.cpp | 184 |
1 files changed, 140 insertions, 44 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 895bb72827..5ea99d360a 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,12 +22,16 @@ * SOFTWARE. */ #include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLCoreRuntimeContext.h" + #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Log.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/DataTypeUtils.h" + +#include "src/gpu/cl/ClCompileContext.h" +#include "src/gpu/cl/ClKernelLibrary.h" #include <utility> #include <vector> @@ -36,7 +40,7 @@ namespace arm_compute { std::string get_cl_type_from_data_type(const DataType &dt) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::QASYMM8: @@ -72,7 +76,7 @@ std::string get_cl_type_from_data_type(const DataType &dt) std::string get_cl_promoted_type_from_data_type(const DataType &dt) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::QASYMM8: @@ -102,7 +106,7 @@ std::string get_cl_promoted_type_from_data_type(const DataType &dt) std::string get_cl_unsigned_type_from_element_size(size_t element_size) { - switch(element_size) + switch (element_size) { case 1: return "uchar"; @@ -120,7 +124,7 @@ std::string get_cl_unsigned_type_from_element_size(size_t element_size) std::string get_cl_signed_type_from_element_size(size_t element_size) { - switch(element_size) + switch (element_size) { case 1: return "char"; @@ -138,11 +142,10 @@ std::string get_cl_signed_type_from_element_size(size_t element_size) std::string get_cl_select_type_from_data_type(const DataType &dt) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::QASYMM8: - return "uchar"; case DataType::S8: case DataType::QASYMM8_SIGNED: case DataType::QSYMM8: @@ -172,7 +175,7 @@ std::string get_cl_select_type_from_data_type(const DataType &dt) std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::QASYMM8: @@ -190,7 +193,7 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt) std::string get_data_size_from_data_type(const DataType &dt) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::S8: @@ -242,8 +245,9 @@ bool dot8_supported(const cl::Device &device) const GPUTarget gpu_target = get_target_from_name(device_name); // SW_WORKAROUND: Workaround for DDK revision r14p0.to enable cl_arm_integer_dot_product_int8 - std::set<GPUTarget> sw_workaround_issue = { GPUTarget::G76 }; - return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || sw_workaround_issue.count(gpu_target) != 0); + std::set<GPUTarget> sw_workaround_issue = {GPUTarget::G76}; + return (device_supports_extension(device, "cl_arm_integer_dot_product_int8") || + sw_workaround_issue.count(gpu_target) != 0); } bool dot8_acc_supported(const cl::Device &device) @@ -254,19 +258,23 @@ bool dot8_acc_supported(const cl::Device &device) CLVersion get_cl_version(const cl::Device &device) { std::string version_str = device.getInfo<CL_DEVICE_VERSION>(); - if(version_str.find("OpenCL 2") != std::string::npos) + if (version_str.find("OpenCL 3") != std::string::npos) + { + return CLVersion::CL30; + } + else if (version_str.find("OpenCL 2") != std::string::npos) { return CLVersion::CL20; } - else if(version_str.find("OpenCL 1.2") != std::string::npos) + else if (version_str.find("OpenCL 1.2") != std::string::npos) { return CLVersion::CL12; } - else if(version_str.find("OpenCL 1.1") != std::string::npos) + else if (version_str.find("OpenCL 1.1") != std::string::npos) { return CLVersion::CL11; } - else if(version_str.find("OpenCL 1.0") != std::string::npos) + else if (version_str.find("OpenCL 1.0") != std::string::npos) { return CLVersion::CL10; } @@ -281,14 +289,15 @@ bool device_supports_extension(const cl::Device &device, const char *extension_n return (pos != std::string::npos); } -bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout) +bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, + const Size2D &kernel_size, + DataLayout data_layout) { ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN); using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>; - std::vector<WinogradConfiguration> winograd_configs_nchw = - { + std::vector<WinogradConfiguration> winograd_configs_nchw = { WinogradConfiguration(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3)), WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)), WinogradConfiguration(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1)), @@ -297,11 +306,9 @@ bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Si WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3)), WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)), WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1)), - WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5)) - }; + WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5))}; - std::vector<WinogradConfiguration> winograd_configs_nhwc = - { + std::vector<WinogradConfiguration> winograd_configs_nhwc = { WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3)), WinogradConfiguration(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3)), WinogradConfiguration(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1)), @@ -318,19 +325,21 @@ bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Si std::pair<int, int>(kernel_size.width, kernel_size.height)); // Return true if supported - if(data_layout == DataLayout::NCHW) + if (data_layout == DataLayout::NCHW) { - return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != winograd_configs_nchw.end()); + return (std::find(winograd_configs_nchw.begin(), winograd_configs_nchw.end(), p) != + winograd_configs_nchw.end()); } else { - return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != winograd_configs_nhwc.end()); + return (std::find(winograd_configs_nhwc.begin(), winograd_configs_nhwc.end(), p) != + winograd_configs_nhwc.end()); } } size_t preferred_vector_width(const cl::Device &device, const DataType dt) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::S8: @@ -376,7 +385,7 @@ size_t get_cl_image_pitch_alignment(const cl::Device &device) cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr); - if(err == CL_SUCCESS) + if (err == CL_SUCCESS) { return pixel_aligment; } @@ -386,26 +395,27 @@ size_t get_cl_image_pitch_alignment(const cl::Device &device) } } -cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts) +bool get_cl_non_uniform_work_group_supported(const cl::Device &device) { - if(ctx && ctx->kernel_library()) - { - // New api going through the core context - return static_cast<cl::Kernel>(ctx->kernel_library()->create_kernel(kernel_name, build_opts.options())); - } - else - { - // Legacy code through the singleton - return static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); - } + cl_bool supported = CL_FALSE; + + cl_int err = + clGetDeviceInfo(device(), CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool), &supported, nullptr); + + return (err == CL_SUCCESS && supported == CL_TRUE); } -cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts) +cl::Kernel +create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts) { - const std::string program_name = CLKernelLibrary::get().get_program_name(kernel_name); - std::pair<std::string, bool> kernel_src = CLKernelLibrary::get().get_program(program_name); - const std::string kernel_path = CLKernelLibrary::get().get_kernel_path(); - return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.first, kernel_path, build_opts, kernel_src.second)); + opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get(); + + const std::string program_name = klib.program_name(kernel_name); + auto kernel_src = klib.program(program_name); + const std::string kernel_path = klib.kernel_path(); + + return static_cast<cl::Kernel>(ctx.create_kernel(kernel_name, program_name, kernel_src.program, kernel_path, + build_opts, kernel_src.is_binary)); } cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size) @@ -415,4 +425,90 @@ cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimensio const unsigned int num_of_threads = ((input_dimension + border_width) / 16); return cl::NDRange(std::min(8U, num_of_threads)); } + +bool get_wbsm_support_info(const cl::Device &device) +{ + cl_bitfield capabilities = 0; + cl_int err = clGetDeviceInfo(device.get(), CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, sizeof(cl_bitfield), + &capabilities, nullptr); + if ((err == CL_SUCCESS) && (capabilities & CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM)) + { + return true; + } + return false; +} + +void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint) +{ + cl_int err = clSetKernelExecInfo(kernel.get(), CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM, + sizeof(cl_int), &wbsm_hint); + ARM_COMPUTE_UNUSED(err); + ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS); +} + +bool export_to_cl_image(const ITensorInfo *tensor) +{ + if (tensor->tensor_shape()[0] % 4 != 0) + { + return false; + } + + // If not floating point + if (!is_data_type_float(tensor->data_type())) + { + return false; + } + + // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform + if (!image2d_from_buffer_supported(CLKernelLibrary::get().get_device())) + { + return false; + } + + // Check cl image pitch alignment + if (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0) + { + return false; + } + + const size_t image_w = tensor->tensor_shape()[0] / 4; + const size_t image_h = tensor->tensor_shape().total_size() / tensor->tensor_shape()[0]; + const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>(); + const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>(); + + if (image_w > max_image_w || image_h > max_image_h) + { + return false; + } + + return true; +} + +void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values) +{ + for (const int value : values) + { + if (value > max_manual_loop_unrolling) + { + built_opts.add_option("-DUNROLL_WITH_PRAGMA"); + return; + } + } +} + +bool arm_matrix_multiply_supported(const cl::Device &device) +{ + return device_supports_extension(device, "cl_arm_matrix_multiply"); +} + +bool command_buffer_supported(const cl::Device &device) +{ + return device_supports_extension(device, "cl_khr_command_buffer"); +} + +bool command_buffer_mutable_dispatch_supported(const cl::Device &device) +{ + return device_supports_extension(device, "cl_khr_command_buffer_mutable_dispatch"); +} + } // namespace arm_compute |