Extended direct conv 2d interface for tuning the OpenCl kernel

Resolves COMPMID-5298 Change-Id: Ie9b907e5dcf86aa6add8d08799fa7ba7c264edea Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7888 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2022-05-30 14:41:49 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2022-07-08 13:48:51 +0000
commit: 2cc50b39613cea5e55c8a4851ee08d284a3d4f66 (patch)
tree: 7faac6aaa4409bb127a9bd8ebc89056dae40066e
parent: 22dd8b9014112fe446cb8cff6d52933d2603a97f (diff)
download: ComputeLibrary-2cc50b39613cea5e55c8a4851ee08d284a3d4f66.tar.gz
12 files changed, 929 insertions, 80 deletions
diff --git a/Android.bp b/Android.bp
index 74c2b96c37..2590469673 100644
--- a/Android.bp
+++ b/Android.bp
@@ -637,6 +637,8 @@ cc_library_static {
         "src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
         "src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
         "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp",
+        "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
+        "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
         "src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.cpp",
         "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
         "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index b1086494e4..c45be9c06f 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -109,6 +109,15 @@ struct DWCComputeKernelInfo
     bool         export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
 };
 
+/** Compute descriptor used by the direct convolution kernel */
+struct DirectConvComputeKernelInfo
+{
+    int32_t m0{ 1 };                             /**< Number of rows to be processed by the kernel */
+    int32_t n0{ 1 };                             /**< Number of columns to be processed by the kernel */
+    int32_t k0{ 1 };                             /**< Number of partial accumulations to be processed in a single iteration by the kernel */
+    bool    export_weights_to_cl_image{ false }; /**< Flag to export the weights to cl_image */
+};
+
 /** Descriptor used by the softmax kernels */
 struct SoftmaxKernelInfo
 {
diff --git a/filelist.json b/filelist.json
index e22ec1d18f..ab2cc83a84 100644
--- a/filelist.json
+++ b/filelist.json
@@ -460,6 +460,8 @@
       "deps": [ "Cast" ],
       "files": {
         "common": [
+          "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
+          "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
           "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
           "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
           "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index be4c8ef5b7..c4b70ca82b 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -23,11 +23,11 @@
  */
 #include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
 
-#include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
@@ -40,6 +40,7 @@
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
 #include "support/Cast.h"
 #include "support/StringSupport.h"
+
 namespace arm_compute
 {
 namespace opencl
@@ -49,7 +50,7 @@ namespace kernels
 namespace
 {
 Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
-                          const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+                          const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
@@ -83,6 +84,21 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co
         }
     }
 
+    if(data_layout == DataLayout::NHWC)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 && desc.n0 != 16,
+                                        "N0 can only be: 1, 2, 3, 4, 8, and 16");
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+                                        "K0 can only be: 1, 2, 3, 4, 8, and 16");
+        if(desc.export_weights_to_cl_image)
+        {
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+                                            "K0 can only be: 4, 8, and 16");
+            ARM_COMPUTE_RETURN_ERROR_ON_MSG(!export_weights_to_cl_image(weights),
+                                            "Export to CLImage is not supported for this weight configuration");
+        }
+    }
+
     if(biases != nullptr)
     {
         if(is_data_type_quantized_asymmetric(src->data_type()))
@@ -121,50 +137,6 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co
     }
     return Status{};
 }
-
-bool export_to_cl_image_support(ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
-{
-    if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
-    {
-        return false;
-    }
-
-    // If not floating point
-    if(!is_data_type_float(tensor->data_type()))
-    {
-        return false;
-    }
-
-    if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
-    {
-        return false;
-    }
-
-    // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
-    if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
-    {
-        return false;
-    }
-
-    // Check cl image pitch alignment
-    if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
-    {
-        return false;
-    }
-
-    const size_t image_w     = tensor->tensor_shape()[0] / 4;
-    const size_t image_h     = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
-    const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
-    const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
-    if(image_w > max_image_w || image_h > max_image_h)
-    {
-        return false;
-    }
-
-    return true;
-}
-
 } // namespace
 
 ClDirectConv2dKernel::ClDirectConv2dKernel()
@@ -173,12 +145,12 @@ ClDirectConv2dKernel::ClDirectConv2dKernel()
 }
 
 void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
-                                     const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+                                     const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
 
     // Perform validation
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info));
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, desc));
 
     const int conv_stride_x = std::get<0>(conv_info.stride());
     const int conv_stride_y = std::get<1>(conv_info.stride());
@@ -208,15 +180,12 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
     Window win;
     if(_data_layout == DataLayout::NHWC)
     {
-        const unsigned int vec_size = std::min(static_cast<unsigned int>(dst->tensor_shape()[0]), 4u);
-        unsigned int       num_rows = 1U;
-        if(dst->tensor_shape()[0] > 16)
-        {
-            num_rows = src->data_type() == DataType::F32 ? 2U : 4U;
-        }
+        output_shape.collapse(2U, 1U);
+        const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
+        const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1]);
 
         // Create window and update padding
-        win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+        win = calculate_max_window(output_shape, Steps(n0, m0));
     }
     else if(_data_layout == DataLayout::NCHW)
     {
@@ -233,16 +202,17 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
     {
         kernel_name << "direct_convolution_nhwc";
 
-        const unsigned int n0                 = win.x().step();
-        const unsigned int m0                 = win.y().step();
-        const unsigned int k0                 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src->dimension(channel_idx));
-        const unsigned int partial_store_n0   = dst->dimension(channel_idx) % n0;
-        const unsigned int pad_left           = conv_info.pad_left();
-        const unsigned int pad_top            = conv_info.pad_top();
-        const bool         export_to_cl_image = export_to_cl_image_support(weights, gpu_target, _data_layout);
+        const unsigned int n0               = win.x().step();
+        const unsigned int m0               = win.y().step();
+        const unsigned int k0               = adjust_vec_size(desc.k0, src->dimension(channel_idx));
+        const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
+        const unsigned int pad_left         = conv_info.pad_left();
+        const unsigned int pad_top          = conv_info.pad_top();
+
+        _export_to_cl_image = desc.export_weights_to_cl_image;
 
         // Update the padding for the weights tensor if we can export to cl_image
-        if(export_to_cl_image)
+        if(_export_to_cl_image)
         {
             gemm::update_padding_for_cl_image(weights);
         }
@@ -274,7 +244,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
         build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
         build_options.add_option("-DDST_TENSOR_TYPE=BUFFER");
         build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type));
-        build_options.add_option_if_else(export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
+        build_options.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER");
         build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
         build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
         build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type()));
@@ -325,6 +295,8 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
     }
     else
     {
+        _export_to_cl_image = false;
+
         kernel_name << "direct_convolution_nchw";
         build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS"));
         build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(width_idx)));
@@ -393,9 +365,9 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
 }
 
 Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
-                                      const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+                                      const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info, desc));
     return Status{};
 }
 
@@ -416,13 +388,7 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
     {
         cl::Image2D weights_cl_image;
 
-        const size_t dim_y_collapsed    = ceil_to_multiple(dst->info()->dimension(1) * dst->info()->dimension(2), slice.y().step());
-        const bool   export_to_cl_image = export_to_cl_image_support(weights->info(), get_target(), _data_layout);
-
-        slice.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, slice.y().step()));
-        slice.set(Window::DimZ, Window::Dimension(0, dst->info()->dimension(3), 1));
-
-        if(export_to_cl_image)
+        if(_export_to_cl_image)
         {
             const size_t      image_w = weights->info()->dimension(0) / 4;
             const size_t      image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3);
@@ -436,7 +402,7 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl
         unsigned int idx = 0;
         add_4d_tensor_nhwc_argument(idx, src);
         add_4d_tensor_nhwc_argument(idx, dst);
-        if(export_to_cl_image)
+        if(_export_to_cl_image)
         {
             _kernel.setArg(idx++, weights_cl_image);
         }
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/gpu/cl/kernels/ClDirectConv2dKernel.h
index 5681927816..0cb8aebbe1 100644
--- a/src/gpu/cl/kernels/ClDirectConv2dKernel.h
+++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,9 @@
 
 namespace arm_compute
 {
+// Forward declaration
+struct DirectConvComputeKernelInfo;
+
 namespace opencl
 {
 namespace kernels
@@ -62,9 +65,10 @@ public:
      *                             The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p src.
      * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  act_info        Contains activaton information described in @ref ActivationLayerInfo.
+     * @param[in]  desc            Direct convolution descriptor used to build the NHWC direct convolution kernel. For NCHW, this parameter is ignored.
      */
     void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
-                   const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info);
+                   const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc);
     /** Static function to check if given info will lead to a valid configuration
      *
      * Similar to ClDirectConv2dKernel::configure()
@@ -72,7 +76,7 @@ public:
      * @return a status
      */
     static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
-                           const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info);
+                           const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const DirectConvComputeKernelInfo &desc);
 
     // Inherited methods overridden:
     void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
@@ -80,6 +84,7 @@ public:
 public:
     DataLayout    _data_layout{};
     PadStrideInfo _conv_info{};
+    bool          _export_to_cl_image{ false };
 };
 } // namespace kernels
 } // namespace opencl
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
new file mode 100644
index 0000000000..4ea198133b
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+using namespace arm_compute::misc::shape_calculator;
+
+ClDirectConvDefaultConfigBifrost::ClDirectConvDefaultConfigBifrost(GPUTarget gpu)
+    : IClDirectConvKernelConfig(gpu)
+{
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigBifrost::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
+
+    ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClDirectConvDefaultConfigBifrost::configure_G71_f32,
+                                                                          &ClDirectConvDefaultConfigBifrost::configure_G71_f16,
+                                                                          &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
+
+    ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_default(&ClDirectConvDefaultConfigBifrost::configure_default_f32,
+                                                                              &ClDirectConvDefaultConfigBifrost::configure_default_f16,
+                                                                              &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
+
+    ConfigurationFunctionExecutorPtr func = nullptr;
+    switch(_target)
+    {
+        case GPUTarget::G71:
+            func = configs_G71.get_function(src->data_type());
+            break;
+        default:
+            func = configs_default.get_function(src->data_type());
+            break;
+    }
+
+    ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
+    return (this->*func)(src, wei, conv_info);
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+        desc.n0 = 4;
+
+        if(output_shape[0] > 16)
+        {
+            desc.m0 = 2;
+        }
+
+        desc.k0 = 8;
+
+        desc.export_weights_to_cl_image = false;
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+        desc.n0 = 4;
+
+        if(output_shape[0] > 16)
+        {
+            desc.m0 = 4;
+        }
+
+        desc.k0 = 8;
+
+        desc.export_weights_to_cl_image = false;
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+        desc.n0 = 4;
+
+        if(output_shape[0] > 16)
+        {
+            desc.m0 = 4;
+        }
+
+        desc.k0 = 16;
+
+        desc.export_weights_to_cl_image = false;
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+        desc.n0 = 4;
+
+        if(output_shape[0] > 16)
+        {
+            desc.m0 = 2;
+        }
+
+        desc.k0 = 8;
+
+        desc.export_weights_to_cl_image = export_weights_to_cl_image(wei);
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+        desc.n0 = 4;
+
+        if(output_shape[0] > 16)
+        {
+            desc.m0 = 4;
+        }
+
+        desc.k0 = 8;
+
+        desc.export_weights_to_cl_image = export_weights_to_cl_image(wei);
+    }
+
+    return desc;
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
new file mode 100644
index 0000000000..1e4cb66ec0
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
+#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
+
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** Bifrost based OpenCL direct convolution configuration */
+class ClDirectConvDefaultConfigBifrost final : public IClDirectConvKernelConfig
+{
+public:
+    /** Constructor
+     *
+     * @param[in] gpu GPU target
+     */
+    ClDirectConvDefaultConfigBifrost(GPUTarget gpu);
+
+    // Inherited overridden method
+    DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) override;
+
+private:
+    DirectConvComputeKernelInfo configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_G71_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_default_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_default_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
new file mode 100644
index 0000000000..d87cada159
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include <utility>
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+using namespace arm_compute::misc::shape_calculator;
+
+ClDirectConvDefaultConfigValhall::ClDirectConvDefaultConfigValhall(GPUTarget gpu)
+    : IClDirectConvKernelConfig(gpu)
+{
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigValhall::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
+
+    ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDirectConvDefaultConfigValhall::configure_G78_f32,
+                                                                          &ClDirectConvDefaultConfigValhall::configure_G78_f16,
+                                                                          &ClDirectConvDefaultConfigValhall::configure_G78_u8);
+
+    ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G57(&ClDirectConvDefaultConfigValhall::configure_G57_f32,
+                                                                          &ClDirectConvDefaultConfigValhall::configure_G57_f16,
+                                                                          &ClDirectConvDefaultConfigValhall::configure_G78_u8);
+
+    ConfigurationFunctionExecutorPtr func = nullptr;
+    switch(_target)
+    {
+        case GPUTarget::G57:
+            func = configs_G57.get_function(src->data_type());
+            break;
+        case GPUTarget::G78:
+        default:
+            func = configs_G78.get_function(src->data_type());
+            break;
+    }
+
+    ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
+    return (this->*func)(src, wei, conv_info);
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        const TensorShape wei_shape          = wei->tensor_shape();
+        const TensorShape dst_shape          = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+        const bool        export_to_cl_image = export_weights_to_cl_image(wei);
+
+        const int32_t ofm          = dst_shape[0];
+        const int32_t m            = dst_shape[1] * dst_shape[2];
+        const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+        desc.export_weights_to_cl_image = export_to_cl_image;
+
+        if(dst_shape[0] <= 4)
+        {
+            if(is_pointwise)
+            {
+                if(ofm == 4)
+                {
+                    desc.m0 = 1;
+                    desc.n0 = 4;
+                    desc.k0 = 16;
+                }
+                else
+                {
+                    desc.m0 = 1;
+                    desc.n0 = 1;
+                    desc.k0 = 16;
+                }
+            }
+            else
+            {
+                desc.m0 = 1;
+                desc.n0 = 2;
+                desc.k0 = 16;
+            }
+        }
+        else
+        {
+            if(m < 64)
+            {
+                desc.m0 = 1;
+                desc.n0 = 1;
+                desc.k0 = 16;
+            }
+            else
+            {
+                desc.m0 = 4;
+                desc.n0 = 4;
+                desc.k0 = 4;
+            }
+        }
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        const TensorShape wei_shape          = wei->tensor_shape();
+        const TensorShape dst_shape          = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+        const bool        export_to_cl_image = export_weights_to_cl_image(wei);
+
+        const int32_t ofm          = dst_shape[0];
+        const int32_t m            = dst_shape[1] * dst_shape[2];
+        const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+        desc.export_weights_to_cl_image = export_to_cl_image;
+
+        if(dst_shape[0] <= 4)
+        {
+            if(is_pointwise)
+            {
+                if(ofm == 4)
+                {
+                    desc.m0 = 1;
+                    desc.n0 = 4;
+                    desc.k0 = 16;
+                }
+                else
+                {
+                    desc.m0 = 1;
+                    desc.n0 = 1;
+                    desc.k0 = 16;
+                }
+            }
+            else
+            {
+                desc.m0 = 1;
+                desc.n0 = dst_shape[0];
+                desc.k0 = 16;
+            }
+        }
+        else
+        {
+            if(m < 64)
+            {
+                desc.m0 = 1;
+                desc.n0 = 1;
+                desc.k0 = 16;
+            }
+            else
+            {
+                if(ofm > 16)
+                {
+                    desc.m0 = 4;
+                    desc.n0 = 4;
+                    desc.k0 = 8;
+                }
+                else
+                {
+                    desc.m0 = 4;
+                    desc.n0 = 4;
+                    desc.k0 = 16;
+                }
+            }
+        }
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+
+        desc.n0 = 4;
+
+        if(output_shape[0] > 16)
+        {
+            desc.m0 = 4;
+        }
+
+        desc.k0 = 16;
+
+        desc.export_weights_to_cl_image = false;
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        const TensorShape wei_shape          = wei->tensor_shape();
+        const TensorShape dst_shape          = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+        const bool        export_to_cl_image = export_weights_to_cl_image(wei);
+
+        const int32_t m            = dst_shape[1] * dst_shape[2];
+        const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+        desc.export_weights_to_cl_image = export_to_cl_image;
+
+        if(dst_shape[0] <= 4)
+        {
+            if(is_pointwise)
+            {
+                desc.m0 = 1;
+                desc.n0 = 1;
+                desc.k0 = 16;
+            }
+            else
+            {
+                desc.m0 = 1;
+                desc.n0 = dst_shape[0];
+                desc.k0 = 16;
+            }
+        }
+        else
+        {
+            if(m < 64)
+            {
+                if(m == 1)
+                {
+                    desc.m0 = 1;
+                    desc.n0 = 1;
+                    desc.k0 = 16;
+                }
+                else
+                {
+                    desc.m0 = 4;
+                    desc.n0 = 2;
+                    desc.k0 = 8;
+                }
+            }
+            else
+            {
+                desc.m0 = 4;
+                desc.n0 = 4;
+                desc.k0 = 4;
+            }
+        }
+    }
+
+    return desc;
+}
+
+DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
+{
+    DirectConvComputeKernelInfo desc;
+
+    if(src->data_layout() == DataLayout::NHWC)
+    {
+        // Get the output shape
+        const TensorShape wei_shape          = wei->tensor_shape();
+        const TensorShape dst_shape          = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
+        const bool        export_to_cl_image = export_weights_to_cl_image(wei);
+
+        const int32_t ofm          = dst_shape[0];
+        const int32_t m            = dst_shape[1] * dst_shape[2];
+        const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
+
+        desc.export_weights_to_cl_image = export_to_cl_image;
+
+        if(dst_shape[0] <= 4)
+        {
+            if(is_pointwise)
+            {
+                desc.m0 = 2;
+                desc.n0 = 1;
+                desc.k0 = 16;
+            }
+            else
+            {
+                desc.m0 = 1;
+                desc.n0 = dst_shape[0];
+                desc.k0 = 16;
+            }
+        }
+        else
+        {
+            if(m < 64)
+            {
+                if(m == 1)
+                {
+                    desc.m0 = 1;
+                    desc.n0 = 1;
+                    desc.k0 = 16;
+                }
+                else
+                {
+                    desc.m0 = 4;
+                    desc.n0 = 2;
+                    desc.k0 = 8;
+                }
+            }
+            else
+            {
+                if(ofm > 16)
+                {
+                    desc.m0 = 4;
+                    desc.n0 = 8;
+                    desc.k0 = 8;
+                }
+                else
+                {
+                    desc.m0 = 8;
+                    desc.n0 = 4;
+                    desc.k0 = 4;
+                }
+            }
+        }
+    }
+
+    return desc;
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
new file mode 100644
index 0000000000..2c65b88846
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
+#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
+
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** Valhall based OpenCL direct convolution configuration */
+class ClDirectConvDefaultConfigValhall final : public IClDirectConvKernelConfig
+{
+public:
+    /** Constructor
+     *
+     * @param[in] gpu GPU target
+     */
+    ClDirectConvDefaultConfigValhall(GPUTarget gpu);
+
+    // Inherited overridden method
+    DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) override;
+
+private:
+    DirectConvComputeKernelInfo configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+    DirectConvComputeKernelInfo configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info);
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h b/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
new file mode 100644
index 0000000000..c1c2e439c6
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
+#define ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
+
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** ClDirectConvolution factory class */
+class ClDirectConvKernelConfigurationFactory final
+{
+public:
+    /** Static method to call the ClDirectConvolution kernel configuration class accordingly with the GPU target
+     *
+     * @param[in] gpu GPU target
+     *
+     * @return IClDirectConvKernelConfig
+     */
+    static std::unique_ptr<IClDirectConvKernelConfig> create(GPUTarget gpu)
+    {
+        switch(get_arch_from_target(gpu))
+        {
+            case GPUTarget::MIDGARD:
+                return std::make_unique<ClDirectConvDefaultConfigBifrost>(GPUTarget::G71);
+            case GPUTarget::BIFROST:
+                return std::make_unique<ClDirectConvDefaultConfigBifrost>(gpu);
+            case GPUTarget::VALHALL:
+                return std::make_unique<ClDirectConvDefaultConfigValhall>(gpu);
+            default:
+                ARM_COMPUTE_ERROR("Not supported GPU target");
+        }
+    }
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H */
diff --git a/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h b/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
new file mode 100644
index 0000000000..837fa35341
--- /dev/null
+++ b/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
+#define ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
+
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/common/Macros.h"
+
+namespace arm_compute
+{
+namespace cl_direct_conv
+{
+/** Basic container for the OpenCL direct convolution configuration functions */
+template <class T>
+class ClDirectConvConfigArray
+{
+public:
+    /** Alias for F32 index */
+    static constexpr size_t DT_F32 = 0;
+    /** Alias for F16 index */
+    static constexpr size_t DT_F16 = 1;
+    /** Alias for Int8 index */
+    static constexpr size_t DT_INT8 = 2;
+
+    /** Constructor
+     *
+     * @param[in] func_f32  Function to call for direct convolution F32
+     * @param[in] func_f16  Function to call for direct convolution F16
+     * @param[in] func_int8 Function to call for direct convolution Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+     *
+     */
+    ClDirectConvConfigArray(T func_f32, T func_f16, T func_int8)
+        : _configs{ func_f32, func_f16, func_int8 }
+    {
+    }
+
+    /** Method to return the direct convolution configuration function based on data type
+     *
+     * @param[in] data_type Input data type
+     *
+     * @return the valid function otherwise it returns nullptr if the data type is not valid
+     */
+    T get_function(DataType data_type)
+    {
+        switch(data_type)
+        {
+            case DataType::F32:
+                return _configs.at(DT_F32);
+            case DataType::F16:
+                return _configs.at(DT_F16);
+            case DataType::QASYMM8:
+            case DataType::QASYMM8_SIGNED:
+            case DataType::QSYMM8_PER_CHANNEL:
+                return _configs.at(DT_INT8);
+            default:
+                return nullptr;
+        }
+    }
+
+private:
+    std::array<T, 3> _configs;
+};
+
+/** Basic interface for the Direct convolution kernel configuration */
+class IClDirectConvKernelConfig
+{
+public:
+    /** Constructor
+     *
+     * @param[in] arch GPU target
+     */
+    IClDirectConvKernelConfig(GPUTarget arch)
+        : _target(arch)
+    {
+    }
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClDirectConvKernelConfig);
+    /** Virtual destructor */
+    virtual ~IClDirectConvKernelConfig() = default;
+    /** This method returns the @ref DirectConvComputeKernelInfo for the given inputs
+     *
+     * @param[in] src       Source tensor (activation tensor)
+     * @param[in] wei       Weights tensor
+     * @param[in] conv_info Convolution info
+     */
+    virtual DirectConvComputeKernelInfo configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info) = 0;
+
+protected:
+    GPUTarget _target;
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H */
diff --git a/src/gpu/cl/operators/ClDirectConv2d.cpp b/src/gpu/cl/operators/ClDirectConv2d.cpp
index 53de6fc403..ded275dbae 100644
--- a/src/gpu/cl/operators/ClDirectConv2d.cpp
+++ b/src/gpu/cl/operators/ClDirectConv2d.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,13 +23,22 @@
  */
 #include "src/gpu/cl/operators/ClDirectConv2d.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/helpers/AutoConfiguration.h"
 #include "src/gpu/cl/kernels/ClActivationKernel.h"
 #include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h"
+#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
 
 #include "src/common/utils/Log.h"
 
+using namespace arm_compute::cl_direct_conv;
+
 namespace arm_compute
 {
 namespace opencl
@@ -43,6 +52,17 @@ ITensorPack select_activation_src_dst(ITensorPack &tensors)
     pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(TensorType::ACL_DST));
     return pack;
 }
+
+DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
+{
+    // Get GPU target
+    GPUTarget gpu_target = CLScheduler::get().target();
+
+    std::unique_ptr<IClDirectConvKernelConfig> t = ClDirectConvKernelConfigurationFactory::create(gpu_target);
+
+    return t->configure(src, weights, conv_info);
+}
+
 } // namespace
 
 void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
@@ -51,11 +71,14 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI
     ARM_COMPUTE_ERROR_ON_NULLPTR(src);
     ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info);
 
+    // Initialize the direct convolution descriptor
+    const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, weights, conv_info);
+
     // Configure direct convolution kernel
     const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo();
     auto                      k               = std::make_unique<kernels::ClDirectConv2dKernel>();
     k->set_target(CLScheduler::get().target());
-    k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info);
+    k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info, desc);
     _direct_conv_kernel = std::move(k);
 
     // Configure border handler
@@ -83,7 +106,10 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI
 Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
                                 const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo()));
+    // Initialize the direct convolution descriptor
+    const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, weights, conv_info);
+
+    ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), desc));
     if(act_info.enabled())
     {
         ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info));
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2022-05-30 14:41:49 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2022-07-08 13:48:51 +0000
commit	2cc50b39613cea5e55c8a4851ee08d284a3d4f66 (patch)
tree	7faac6aaa4409bb127a9bd8ebc89056dae40066e
parent	22dd8b9014112fe446cb8cff6d52933d2603a97f (diff)
download	ComputeLibrary-2cc50b39613cea5e55c8a4851ee08d284a3d4f66.tar.gz