aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2022-05-30 14:41:49 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2022-07-08 13:48:51 +0000
commit2cc50b39613cea5e55c8a4851ee08d284a3d4f66 (patch)
tree7faac6aaa4409bb127a9bd8ebc89056dae40066e
parent22dd8b9014112fe446cb8cff6d52933d2603a97f (diff)
downloadComputeLibrary-2cc50b39613cea5e55c8a4851ee08d284a3d4f66.tar.gz
Extended direct conv 2d interface for tuning the OpenCl kernel
Resolves COMPMID-5298 Change-Id: Ie9b907e5dcf86aa6add8d08799fa7ba7c264edea Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7888 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp2
-rw-r--r--arm_compute/core/KernelDescriptors.h9
-rw-r--r--filelist.json2
-rw-r--r--src/gpu/cl/kernels/ClDirectConv2dKernel.cpp114
-rw-r--r--src/gpu/cl/kernels/ClDirectConv2dKernel.h11
-rw-r--r--src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp192
-rw-r--r--src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h55
-rw-r--r--src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp358
-rw-r--r--src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h55
-rw-r--r--src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h64
-rw-r--r--src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h115
-rw-r--r--src/gpu/cl/operators/ClDirectConv2d.cpp32
12 files changed, 929 insertions, 80 deletions
diff --git a/Android.bp b/Android.bp
index 74c2b96c37..2590469673 100644
--- a/Android.bp
+++ b/Android.bp
@@ -637,6 +637,8 @@ cc_library_static {
"src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
"src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
"src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp",
+ "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
+ "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
"src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp",
"src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
"src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index b1086494e4..c45be9c06f 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -109,6 +109,15 @@ struct DWCComputeKernelInfo
bool export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
};
+/** Compute descriptor used by the direct convolution kernel */
+struct DirectConvComputeKernelInfo
+{
+ int32_t m0{ 1 }; /**< Number of rows to be processed by the kernel */
+ int32_t n0{ 1 }; /**< Number of columns to be processed by the kernel */
+ int32_t k0{ 1 }; /**< Number of partial accumulations to be processed in a single iteration by the kernel */
+ bool export_weights_to_cl_image{ false }; /**< Flag to export the weights to cl_image */
+};
+
/** Descriptor used by the softmax kernels */
struct SoftmaxKernelInfo
{
diff --git a/filelist.json b/filelist.json
index e22ec1d18f..ab2cc83a84 100644
--- a/filelist.json
+++ b/filelist.json
@@ -460,6 +460,8 @@
"deps": [ "Cast" ],
"files": {
"common": [
+ "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
+ "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
"src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
"src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
"src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index be4c8ef5b7..c4b70ca82b 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -23,11 +23,11 @@
*/
#include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
-#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
@@ -40,6 +40,7 @@
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include "support/Cast.h"
#include "support/StringSupport.h"
+
namespace arm_compute
{
namespace opencl
@@ -49,7 +50,7 @@ namespace kernels
namespace
{
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
@@ -83,6 +84,21 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co
}
}
+ if(data_layout == DataLayout::NHWC)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 && desc.n0 != 16,
+ "N0 can only be: 1, 2, 3, 4, 8, and 16");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+ "K0 can only be: 1, 2, 3, 4, 8, and 16");
+ if(desc.export_weights_to_cl_image)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+ "K0 can only be: 4, 8, and 16");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!export_weights_to_cl_image(weights),
+ "Export to CLImage is not supported for this weight configuration");
+ }
+ }
+
if(biases != nullptr)
{
if(is_data_type_quantized_asymmetric(src->data_type()))
@@ -121,50 +137,6 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co
}
return Status{};
}
-
-bool export_to_cl_image_support(ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
-{
- if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
- {
- return false;
- }
-
- // If not floating point
- if(!is_data_type_float(tensor->data_type()))
- {
- return false;
- }
-
- if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
- {
- return false;
- }
-
- // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
- if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
- {
- return false;
- }
-
- // Check cl image pitch alignment
- if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
- {
- return false;
- }
-
- const size_t image_w = tensor->tensor_shape()[0] / 4;
- const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
- const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
- const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
- if(image_w > max_image_w || image_h > max_image_h)
- {
- return false;
- }
-
- return true;
-}
-
} // namespace
ClDirectConv2dKernel::ClDirectConv2dKernel()
@@ -173,12 +145,12 @@ ClDirectConv2dKernel::ClDirectConv2dKernel()
}
void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
// Perform validation
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, desc));
const int conv_stride_x = std::get<0>(conv_info.stride());
const int conv_stride_y = std::get<1>(conv_info.stride());
@@ -208,15 +180,12 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
Window win;
if(_data_layout == DataLayout::NHWC)
{
- const unsigned int vec_size = std::min(static_cast<unsigned int>(dst->tensor_shape()[0]), 4u);
- unsigned int num_rows = 1U;
- if(dst->tensor_shape()[0] > 16)
- {
- num_rows = src->data_type() == DataType::F32 ? 2U : 4U;
- }
+ output_shape.collapse(2U, 1U);
+ const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
+ const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1]);
// Create window and update padding
- win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+ win = calculate_max_window(output_shape, Steps(n0, m0));
}
else if(_data_layout == DataLayout::NCHW)
{
@@ -233,16 +202,17 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
{
kernel_name << "direct_convolution_nhwc";
- const unsigned int n0 = win.x().step();
- const unsigned int m0 = win.y().step();
- const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src->dimension(channel_idx));
- const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
- const unsigned int pad_left = conv_info.pad_left();
- const unsigned int pad_top = conv_info.pad_top();
- const bool export_to_cl_image = export_to_cl_image_support(weights, gpu_target, _data_layout);
+ const unsigned int n0 = win.x().step();
+ const unsigned int m0 = win.y().step();
+ const unsigned int k0 = adjust_vec_size(desc.k0, src->dimension(channel_idx));
+ const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
+ const unsigned int pad_left = conv_info.pad_left();
+ const unsigned int pad_top = conv_info.pad_top();
+
+ _export_to_cl_image = desc.export_weights_to_cl_image;
// Update the padding for the weights tensor if we can export to cl_image
- if(export_to_cl_image)
+ if(_export_to_cl_image)
{
gemm::update_padding_for_cl_image(weights);
}
@@ -274,7 +244,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
build_options.add_option("-DDST_TENSOR_TYPE=BUFFER");
build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type));
- build_options.add_option_if_else(export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
+ build_options.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type()));
@@ -325,6 +295,8 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
}
else
{
+ _export_to_cl_image = false;
+
kernel_name << "direct_convolution_nchw";
build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS"));
build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(width_idx)));
@@ -393,9 +365,9 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
}
Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info, desc));
return Status{};
}
@@ -416,13 +388,7 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
{
cl::Image2D weights_cl_image;
- const size_t dim_y_collapsed = ceil_to_multiple(dst->info()->dimension(1) * dst->info()->dimension(2), slice.y().step());
- const bool export_to_cl_image = export_to_cl_image_support(weights->info(), get_target(), _data_layout);
-
- slice.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, slice.y().step()));
- slice.set(Window::DimZ, Window::Dimension(0, dst->info()->dimension(3), 1));
-
- if(export_to_cl_image)
+ if(_export_to_cl_image)
{
const size_t image_w = weights->info()->dimension(0) / 4;
const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3);
@@ -436,7 +402,7 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
unsigned int idx = 0;
add_4d_tensor_nhwc_argument(idx, src);
add_4d_tensor_nhwc_argument(idx, dst);
- if(export_to_cl_image)
+ if(_export_to_cl_image)
{
_kernel.setArg(idx++, weights_cl_image);
}
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/gpu/cl/kernels/ClDirectConv2dKernel.h
index 5681927816..0cb8aebbe1 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.h
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,9 @@
namespace arm_compute
{
+// Forward declaration
+struct DirectConvComputeKernelInfo;
+
namespace opencl
{
namespace kernels
@@ -62,9 +65,10 @@ public:
* The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p src.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] act_info Contains activaton information described in @ref ActivationLayerInfo.
+ * @param[in] desc Direct convolution descriptor used to build the NHWC direct convolution kernel. For NCHW, this parameter is ignored.
*/
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info);
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc);
/** Static function to check if given info will lead to a valid configuration
*
* Similar to ClDirectConv2dKernel::configure()
@@ -72,7 +76,7 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info);
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
@@ -80,6 +84,7 @@ public:
public:
DataLayout _data_layout{};
PadStrideInfo _conv_info{};
+ bool _export_to_cl_image{ false };
};
} // namespace kernels
} // namespace opencl
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
new file mode 100644
index 0000000000..4ea198133b
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+using namespace arm_compute::misc::shape_calculator;
+
+ClDirectConvDefaultConfigBifrost::ClDirectConvDefaultConfigBifrost(GPUTarget gpu)
+ : IClDirectConvKernelConfig(gpu)
+{
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigBifrost::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
+
+ ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClDirectConvDefaultConfigBifrost::configure_G71_f32,
+ &ClDirectConvDefaultConfigBifrost::configure_G71_f16,
+ &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
+
+ ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_default(&ClDirectConvDefaultConfigBifrost::configure_default_f32,
+ &ClDirectConvDefaultConfigBifrost::configure_default_f16,
+ &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
+
+ ConfigurationFunctionExecutorPtr func = nullptr;
+ switch(_target)
+ {
+ case GPUTarget::G71:
+ func = configs_G71.get_function(src->data_type());
+ break;
+ default:
+ func = configs_default.get_function(src->data_type());
+ break;
+ }
+
+ ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
+ return (this->*func)(src, wei, conv_info);
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+ desc.n0 = 4;
+
+ if(output_shape[0] > 16)
+ {
+ desc.m0 = 2;
+ }
+
+ desc.k0 = 8;
+
+ desc.export_weights_to_cl_image = false;
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+ desc.n0 = 4;
+
+ if(output_shape[0] > 16)
+ {
+ desc.m0 = 4;
+ }
+
+ desc.k0 = 8;
+
+ desc.export_weights_to_cl_image = false;
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+ desc.n0 = 4;
+
+ if(output_shape[0] > 16)
+ {
+ desc.m0 = 4;
+ }
+
+ desc.k0 = 16;
+
+ desc.export_weights_to_cl_image = false;
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+ desc.n0 = 4;
+
+ if(output_shape[0] > 16)
+ {
+ desc.m0 = 2;
+ }
+
+ desc.k0 = 8;
+
+ desc.export_weights_to_cl_image = export_weights_to_cl_image(wei);
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+ desc.n0 = 4;
+
+ if(output_shape[0] > 16)
+ {
+ desc.m0 = 4;
+ }
+
+ desc.k0 = 8;
+
+ desc.export_weights_to_cl_image = export_weights_to_cl_image(wei);
+ }
+
+ return desc;
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
new file mode 100644
index 0000000000..1e4cb66ec0
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
+#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
+
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** Bifrost based OpenCL direct convolution configuration */
+class ClDirectConvDefaultConfigBifrost final : public IClDirectConvKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ ClDirectConvDefaultConfigBifrost(GPUTarget gpu);
+
+ // Inherited overridden method
+ DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) override;
+
+private:
+ DirectConvComputeKernelInfo configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_G71_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_default_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_default_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
new file mode 100644
index 0000000000..d87cada159
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+using namespace arm_compute::misc::shape_calculator;
+
+ClDirectConvDefaultConfigValhall::ClDirectConvDefaultConfigValhall(GPUTarget gpu)
+ : IClDirectConvKernelConfig(gpu)
+{
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigValhall::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
+
+ ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDirectConvDefaultConfigValhall::configure_G78_f32,
+ &ClDirectConvDefaultConfigValhall::configure_G78_f16,
+ &ClDirectConvDefaultConfigValhall::configure_G78_u8);
+
+ ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G57(&ClDirectConvDefaultConfigValhall::configure_G57_f32,
+ &ClDirectConvDefaultConfigValhall::configure_G57_f16,
+ &ClDirectConvDefaultConfigValhall::configure_G78_u8);
+
+ ConfigurationFunctionExecutorPtr func = nullptr;
+ switch(_target)
+ {
+ case GPUTarget::G57:
+ func = configs_G57.get_function(src->data_type());
+ break;
+ case GPUTarget::G78:
+ default:
+ func = configs_G78.get_function(src->data_type());
+ break;
+ }
+
+ ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
+ return (this->*func)(src, wei, conv_info);
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ const TensorShape wei_shape = wei->tensor_shape();
+ const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+ const bool export_to_cl_image = export_weights_to_cl_image(wei);
+
+ const int32_t ofm = dst_shape[0];
+ const int32_t m = dst_shape[1] * dst_shape[2];
+ const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+ desc.export_weights_to_cl_image = export_to_cl_image;
+
+ if(dst_shape[0] <= 4)
+ {
+ if(is_pointwise)
+ {
+ if(ofm == 4)
+ {
+ desc.m0 = 1;
+ desc.n0 = 4;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ desc.n0 = 2;
+ desc.k0 = 16;
+ }
+ }
+ else
+ {
+ if(m < 64)
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 4;
+ desc.n0 = 4;
+ desc.k0 = 4;
+ }
+ }
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ const TensorShape wei_shape = wei->tensor_shape();
+ const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+ const bool export_to_cl_image = export_weights_to_cl_image(wei);
+
+ const int32_t ofm = dst_shape[0];
+ const int32_t m = dst_shape[1] * dst_shape[2];
+ const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+ desc.export_weights_to_cl_image = export_to_cl_image;
+
+ if(dst_shape[0] <= 4)
+ {
+ if(is_pointwise)
+ {
+ if(ofm == 4)
+ {
+ desc.m0 = 1;
+ desc.n0 = 4;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ desc.n0 = dst_shape[0];
+ desc.k0 = 16;
+ }
+ }
+ else
+ {
+ if(m < 64)
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ else
+ {
+ if(ofm > 16)
+ {
+ desc.m0 = 4;
+ desc.n0 = 4;
+ desc.k0 = 8;
+ }
+ else
+ {
+ desc.m0 = 4;
+ desc.n0 = 4;
+ desc.k0 = 16;
+ }
+ }
+ }
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+ desc.n0 = 4;
+
+ if(output_shape[0] > 16)
+ {
+ desc.m0 = 4;
+ }
+
+ desc.k0 = 16;
+
+ desc.export_weights_to_cl_image = false;
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ const TensorShape wei_shape = wei->tensor_shape();
+ const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+ const bool export_to_cl_image = export_weights_to_cl_image(wei);
+
+ const int32_t m = dst_shape[1] * dst_shape[2];
+ const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+ desc.export_weights_to_cl_image = export_to_cl_image;
+
+ if(dst_shape[0] <= 4)
+ {
+ if(is_pointwise)
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 1;
+ desc.n0 = dst_shape[0];
+ desc.k0 = 16;
+ }
+ }
+ else
+ {
+ if(m < 64)
+ {
+ if(m == 1)
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 4;
+ desc.n0 = 2;
+ desc.k0 = 8;
+ }
+ }
+ else
+ {
+ desc.m0 = 4;
+ desc.n0 = 4;
+ desc.k0 = 4;
+ }
+ }
+ }
+
+ return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+ DirectConvComputeKernelInfo desc;
+
+ if(src->data_layout() == DataLayout::NHWC)
+ {
+ // Get the output shape
+ const TensorShape wei_shape = wei->tensor_shape();
+ const TensorShape dst_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+ const bool export_to_cl_image = export_weights_to_cl_image(wei);
+
+ const int32_t ofm = dst_shape[0];
+ const int32_t m = dst_shape[1] * dst_shape[2];
+ const bool is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+ desc.export_weights_to_cl_image = export_to_cl_image;
+
+ if(dst_shape[0] <= 4)
+ {
+ if(is_pointwise)
+ {
+ desc.m0 = 2;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 1;
+ desc.n0 = dst_shape[0];
+ desc.k0 = 16;
+ }
+ }
+ else
+ {
+ if(m < 64)
+ {
+ if(m == 1)
+ {
+ desc.m0 = 1;
+ desc.n0 = 1;
+ desc.k0 = 16;
+ }
+ else
+ {
+ desc.m0 = 4;
+ desc.n0 = 2;
+ desc.k0 = 8;
+ }
+ }
+ else
+ {
+ if(ofm > 16)
+ {
+ desc.m0 = 4;
+ desc.n0 = 8;
+ desc.k0 = 8;
+ }
+ else
+ {
+ desc.m0 = 8;
+ desc.n0 = 4;
+ desc.k0 = 4;
+ }
+ }
+ }
+ }
+
+ return desc;
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
new file mode 100644
index 0000000000..2c65b88846
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
+#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
+
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** Valhall based OpenCL direct convolution configuration */
+class ClDirectConvDefaultConfigValhall final : public IClDirectConvKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ ClDirectConvDefaultConfigValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) override;
+
+private:
+ DirectConvComputeKernelInfo configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+ DirectConvComputeKernelInfo configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h b/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
new file mode 100644
index 0000000000..c1c2e439c6
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
+#define ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
+
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** ClDirectConvolution factory class */
+class ClDirectConvKernelConfigurationFactory final
+{
+public:
+ /** Static method to call the ClDirectConvolution kernel configuration class accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return IClDirectConvKernelConfig
+ */
+ static std::unique_ptr<IClDirectConvKernelConfig> create(GPUTarget gpu)
+ {
+ switch(get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ return std::make_unique<ClDirectConvDefaultConfigBifrost>(GPUTarget::G71);
+ case GPUTarget::BIFROST:
+ return std::make_unique<ClDirectConvDefaultConfigBifrost>(gpu);
+ case GPUTarget::VALHALL:
+ return std::make_unique<ClDirectConvDefaultConfigValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H */
diff --git a/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h b/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
new file mode 100644
index 0000000000..837fa35341
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
+#define ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
+
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/common/Macros.h"
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** Basic container for the OpenCL direct convolution configuration functions */
+template <class T>
+class ClDirectConvConfigArray
+{
+public:
+ /** Alias for F32 index */
+ static constexpr size_t DT_F32 = 0;
+ /** Alias for F16 index */
+ static constexpr size_t DT_F16 = 1;
+ /** Alias for Int8 index */
+ static constexpr size_t DT_INT8 = 2;
+
+ /** Constructor
+ *
+ * @param[in] func_f32 Function to call for direct convolution F32
+ * @param[in] func_f16 Function to call for direct convolution F16
+ * @param[in] func_int8 Function to call for direct convolution Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+ *
+ */
+ ClDirectConvConfigArray(T func_f32, T func_f16, T func_int8)
+ : _configs{ func_f32, func_f16, func_int8 }
+ {
+ }
+
+ /** Method to return the direct convolution configuration function based on data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return the valid function otherwise it returns nullptr if the data type is not valid
+ */
+ T get_function(DataType data_type)
+ {
+ switch(data_type)
+ {
+ case DataType::F32:
+ return _configs.at(DT_F32);
+ case DataType::F16:
+ return _configs.at(DT_F16);
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return _configs.at(DT_INT8);
+ default:
+ return nullptr;
+ }
+ }
+
+private:
+ std::array<T, 3> _configs;
+};
+
+/** Basic interface for the Direct convolution kernel configuration */
+class IClDirectConvKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ IClDirectConvKernelConfig(GPUTarget arch)
+ : _target(arch)
+ {
+ }
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClDirectConvKernelConfig);
+ /** Virtual destructor */
+ virtual ~IClDirectConvKernelConfig() = default;
+ /** This method returns the @ref DirectConvComputeKernelInfo for the given inputs
+ *
+ * @param[in] src Source tensor (activation tensor)
+ * @param[in] wei Weights tensor
+ * @param[in] conv_info Convolution info
+ */
+ virtual DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) = 0;
+
+protected:
+ GPUTarget _target;
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H */
diff --git a/src/gpu/cl/operators/ClDirectConv2d.cpp b/src/gpu/cl/operators/ClDirectConv2d.cpp
index 53de6fc403..ded275dbae 100644
--- a/src/gpu/cl/operators/ClDirectConv2d.cpp
+++ b/src/gpu/cl/operators/ClDirectConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,13 +23,22 @@
*/
#include "src/gpu/cl/operators/ClDirectConv2d.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "src/gpu/cl/kernels/ClActivationKernel.h"
#include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h"
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
#include "src/common/utils/Log.h"
+using namespace arm_compute::cl_direct_conv;
+
namespace arm_compute
{
namespace opencl
@@ -43,6 +52,17 @@ ITensorPack select_activation_src_dst(ITensorPack &tensors)
pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(TensorType::ACL_DST));
return pack;
}
+
+DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
+{
+ // Get GPU target
+ GPUTarget gpu_target = CLScheduler::get().target();
+
+ std::unique_ptr<IClDirectConvKernelConfig> t = ClDirectConvKernelConfigurationFactory::create(gpu_target);
+
+ return t->configure(src, weights, conv_info);
+}
+
} // namespace
void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
@@ -51,11 +71,14 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info);
+ // Initialize the direct convolution descriptor
+ const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, weights, conv_info);
+
// Configure direct convolution kernel
const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo();
auto k = std::make_unique<kernels::ClDirectConv2dKernel>();
k->set_target(CLScheduler::get().target());
- k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info);
+ k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info, desc);
_direct_conv_kernel = std::move(k);
// Configure border handler
@@ -83,7 +106,10 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI
Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo()));
+ // Initialize the direct convolution descriptor
+ const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, weights, conv_info);
+
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), desc));
if(act_info.enabled())
{
ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info));