14 files changed, 92 insertions, 51 deletions
diff --git a/Android.bp b/Android.bp
index 46cdb06a25..8094c8a660 100644
--- a/Android.bp
+++ b/Android.bp
@@ -667,8 +667,6 @@ cc_library_static {
         "src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
         "src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
         "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp",
-        "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
-        "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
         "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
         "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
         "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
@@ -941,6 +939,8 @@ cc_library_static {
         "src/runtime/Tensor.cpp",
         "src/runtime/TensorAllocator.cpp",
         "src/runtime/Utils.cpp",
+        "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
+        "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
         "utils/CommonGraphOptions.cpp",
         "utils/GraphUtils.cpp",
         "utils/Utils.cpp",
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
index df3177867f..833f341b2f 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
@@ -68,11 +68,7 @@ public:
                           ITensorInfo       *rhs,
                           ITensorInfo       *dst);
     /** Check if the operator configuration is supported, irrespective of fusion
-     *
-     * @param[in]  context Workload context within which the operator is running
-     * @param[in]  lhs     Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
-     * @param[in]  rhs     Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
-     * @param[out] dst     Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized
+     * Similar to @ref GpuAdd::create_op()
      */
     static Status is_supported_op(const GpuWorkloadContext &context,
                                   const ITensorInfo        *lhs,
diff --git a/filelist.json b/filelist.json
index beb6f77daf..2db128791f 100644
--- a/filelist.json
+++ b/filelist.json
@@ -466,8 +466,6 @@
       "deps": [ "Cast" ],
       "files": {
         "common": [
-          "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
-          "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp",
           "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
           "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
           "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
@@ -501,7 +499,9 @@
           "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.cpp",
           "src/runtime/CL/functions/CLGEMM.cpp",
           "src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp",
-          "src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp"
+          "src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp",
+          "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp",
+          "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp"
         ]
       }
     },
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index dc05825500..1fbcb41028 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -57,13 +57,24 @@ bool ClComponentDirectConv2dSettings::fast_relaxed_math() const
     return _fast_relaxed_math;
 }
 
+ClComponentDirectConv2dSettings &ClComponentDirectConv2dSettings::direct_conv_descriptor(const DirectConvComputeKernelInfo &desc)
+{
+    _desc = desc;
+    return *this;
+}
+
+DirectConvComputeKernelInfo ClComponentDirectConv2dSettings::direct_conv_descriptor() const
+{
+    return _desc;
+}
+
 Status ClComponentDirectConv2d::validate(
     const Properties                &properties,
     const ArgumentPack<ITensorInfo> &tensors,
     const Attributes                &attributes,
     const Settings                  &settings)
 {
-    ARM_COMPUTE_UNUSED(properties, settings);
+    ARM_COMPUTE_UNUSED(properties);
     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
     const auto wei = tensors.get_const_tensor(TensorType::ACL_SRC_1);
     const auto bia = tensors.get_const_tensor(TensorType::ACL_SRC_2);
@@ -125,6 +136,11 @@ Status ClComponentDirectConv2d::validate(
     // Data layout
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
 
+    const auto desc = settings.direct_conv_descriptor();
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 && desc.n0 != 16,
+                                    "N0 can only be: 1, 2, 3, 4, 8, and 16");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+                                    "K0 can only be: 1, 2, 3, 4, 8, and 16");
     return Status{};
 }
 
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index fec22b84a5..c3a70ef3ae 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D
 
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/KernelDescriptors.h"
 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
 #include <memory>
 
@@ -56,9 +57,15 @@ public:
     /** Get fast_relaxed_math flag */
     bool fast_relaxed_math() const;
 
+    /** Set direct convolution descriptor */
+    ClComponentDirectConv2dSettings &direct_conv_descriptor(const DirectConvComputeKernelInfo &desc);
+    /** Get direct convolution descriptor */
+    DirectConvComputeKernelInfo direct_conv_descriptor() const;
+
 private:
-    bool _export_to_cl_image{ false };
-    bool _fast_relaxed_math{ true };
+    bool                        _export_to_cl_image{ false };
+    bool                        _fast_relaxed_math{ true };
+    DirectConvComputeKernelInfo _desc{}; // Direct convolution descriptor
 };
 
 /** Forward declaration */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 9cb4ee7815..048ee01f35 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -23,16 +23,19 @@
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
 
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
 #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
-
-#include "src/common/utils/Log.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 namespace arm_compute
 {
@@ -85,6 +88,16 @@ bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target,
     return true;
 }
 
+DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
+{
+    // Get GPU target
+    GPUTarget gpu_target = CLScheduler::get().target();
+
+    std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);
+
+    return t->configure(src, weights, conv_info);
+}
+
 constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
 } // namespace
 
@@ -112,6 +125,11 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
                                                                                           attributes.pad().right,
                                                                                           attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
 
+        // Checks performed when dst is configured
+        if(dst->total_size() != 0)
+        {
+            ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), shape);
+        }
         auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));
     }
 
@@ -175,6 +193,12 @@ void GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
                           const Conv2dAttributes &attributes)
 {
     ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes);
+    PadStrideInfo conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+                            attributes.pad().right,
+                            attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+    // Initialize the direct convolution descriptor
+    const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
+
     // Assert validation
     ARM_COMPUTE_ERROR_THROW_ON(GpuConv2d::validate_op(sketch, src, wei, bia, dst, attributes));
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);
@@ -182,10 +206,7 @@ void GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
 
     // Auto initialize dst tensor
     {
-        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(),
-                                                                            PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
-                                                                                          attributes.pad().right,
-                                                                                          attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+        auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), data_layout, wei->tensor_shape(), conv_info); // use the default DimensionRoundingType
 
         auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
     }
@@ -221,6 +242,8 @@ void GpuConv2d::create_op(GpuWorkloadSketch      &sketch,
                 arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
             }
 
+            settings.direct_conv_descriptor(desc);
+
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
             arguments.add_const_tensor(ACL_SRC_1, wei);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index 75e812af9f..6f7bf72df8 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -69,7 +69,7 @@ std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &com
     ARM_COMPUTE_UNUSED(comp_group);
 
     const auto channel_idx   = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
-    const auto k0            = adjust_vec_size(is_data_type_quantized(_src->data_type()) ? 16u : 8u, _src->dimension(channel_idx));
+    const auto k0            = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
     const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
 
     std::string code = R"_(
@@ -303,13 +303,11 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
 CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
 {
     const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
-    const DataType     data_type   = _src->data_type();
 
-    /// NOTE: For now tile sizes (n0, m0, k0) are set by the execution window. This may change in the future
     const auto         root_window      = comp_group.get_root_component()->template_writer()->get_window();
     const unsigned int n0               = root_window.x().step();
     const unsigned int m0               = root_window.y().step();
-    const unsigned int k0               = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, _src->dimension(channel_idx));
+    const unsigned int k0               = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
     const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
 
     CLBuildOptions build_opts{};
@@ -369,15 +367,16 @@ Window ClTemplateDirectConv2d::get_window() const
     ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
 
     const auto output_shape = _dst->tensor_shape();
+    const auto desc         = _settings.direct_conv_descriptor();
 
-    const unsigned int vec_size = std::min(static_cast<unsigned int>(output_shape[0]), 4u);
-    const unsigned int num_rows = (_dst->tensor_shape()[0] > 16) ? ((_src->data_type() == DataType::F32) ? 2U : 4U) : 1U;
+    const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
+    const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]);
 
     // Create and configure kernel window
-    Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+    Window win = calculate_max_window(output_shape, Steps(n0, m0));
 
-    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
-    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
+    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0);
+    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0));
     win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
 
     return win;
diff --git a/src/gpu/cl/operators/ClDirectConv2d.cpp b/src/gpu/cl/operators/ClDirectConv2d.cpp
index ded275dbae..0215dba422 100644
--- a/src/gpu/cl/operators/ClDirectConv2d.cpp
+++ b/src/gpu/cl/operators/ClDirectConv2d.cpp
@@ -30,10 +30,10 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/gpu/cl/kernels/ClActivationKernel.h"
 #include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h"
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 #include "src/common/utils/Log.h"
 
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
index ba176f8c5f..1bfb8124e9 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h
index 1e4cb66ec0..6b60b2c007 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h
@@ -21,10 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
-#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGBIFROST
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGBIFROST
 
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 namespace arm_compute
 {
@@ -52,4 +52,4 @@ private:
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_BIFROST_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGBIFROST */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp
index b693568c67..8f2fd82412 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.cpp
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h
index 2c65b88846..f9d5c5299e 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h
@@ -21,10 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
-#define ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGVALHALL
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGVALHALL
 
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 namespace arm_compute
 {
@@ -52,4 +52,4 @@ private:
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV_DEFAULT_CONFIG_VALHALL_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVDEFAULTCONFIGVALHALL */
diff --git a/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h b/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
index c1c2e439c6..232167fc59 100644
--- a/src/gpu/cl/kernels/direct_conv/ClDirectConvKernelConfig.h
+++ b/src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h
@@ -21,12 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
-#define ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG
 
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigBifrost.h"
-#include "src/gpu/cl/kernels/direct_conv/ClDirectConvDefaultConfigValhall.h"
-#include "src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
+#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
 
 #include <memory>
 
@@ -61,4 +61,4 @@ public:
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV_KERNEL_CONFIGURATION_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_CLDIRECTCONVKERNELCONFIG */
diff --git a/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h b/src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h
index 837fa35341..6104d73594 100644
--- a/src/gpu/cl/kernels/direct_conv/IClDirectConvKernelConfig.h
+++ b/src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
-#define ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H
+#ifndef SRC_RUNTIME_HEURISTICS_DIRECT_CONV_ICLDIRECTCONVKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_DIRECT_CONV_ICLDIRECTCONVKERNELCONFIG
 
 #include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/KernelDescriptors.h"
@@ -112,4 +112,4 @@ protected:
 };
 } // namespace opencl
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_ICL_DIRECT_CONV_KERNEL_CONFIG_H */
+#endif /* SRC_RUNTIME_HEURISTICS_DIRECT_CONV_ICLDIRECTCONVKERNELCONFIG */