aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/heuristics/dwc_native
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/heuristics/dwc_native')
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp309
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h79
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp326
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h74
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp61
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h45
-rw-r--r--src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h66
-rw-r--r--src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h120
8 files changed, 1080 insertions, 0 deletions
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp
new file mode 100644
index 0000000000..98ebf3ebbe
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2022-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+namespace
+{
+DWCComputeKernelInfo configure_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier,
+ bool is_g71)
+{
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+ const TensorShape wei_shape = wei->tensor_shape();
+ const size_t kernel_c = wei_shape[idx_c];
+ const size_t kernel_w = wei_shape[idx_w];
+
+ desc.export_input_to_cl_image = false;
+
+ if (is_g71)
+ {
+ desc.export_weights_to_cl_image = false;
+ }
+ else
+ {
+ desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
+ }
+
+ if (depth_multiplier == 1)
+ {
+ desc.n0 = 4;
+ }
+ else
+ {
+ if ((depth_multiplier % 4) == 0)
+ {
+ desc.n0 = 4;
+ }
+ else if ((depth_multiplier % 2) == 0)
+ {
+ desc.n0 = 2;
+ }
+ else
+ {
+ desc.n0 = 1;
+ }
+ }
+
+ // Note: If we reduce n0, export to cl_image must be false
+ ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+ (desc.export_weights_to_cl_image == true));
+
+ desc.n0 = adjust_vec_size(desc.n0, kernel_c);
+
+ // Set m0 only if stride_x == 1 and dilation_x == 1
+ if (conv_info.stride().first == 1 && dilation.x() == 1)
+ {
+ if ((kernel_w >= 9) || (kernel_w == 1))
+ {
+ desc.m0 = 1;
+ }
+ else
+ {
+ desc.m0 = 2;
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+
+DWCComputeKernelInfo configure_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier,
+ bool is_g71)
+{
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ // Src and weights have the same dimension indices
+ const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+ const TensorShape src_shape = src->tensor_shape();
+ const TensorShape wei_shape = wei->tensor_shape();
+ const size_t src_w = src_shape[idx_w];
+ const size_t kernel_c = wei_shape[idx_c];
+ const size_t kernel_w = wei_shape[idx_w];
+
+ desc.export_input_to_cl_image = false;
+
+ if (is_g71)
+ {
+ desc.export_weights_to_cl_image = false;
+ }
+ else
+ {
+ desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
+ }
+
+ if (depth_multiplier == 1)
+ {
+ if (desc.export_weights_to_cl_image == false)
+ {
+ desc.n0 = 8;
+ }
+ else
+ {
+ desc.n0 = 4;
+ }
+ }
+ else
+ {
+ if ((depth_multiplier % 4) == 0)
+ {
+ desc.n0 = 4;
+ }
+ else if ((depth_multiplier % 2) == 0)
+ {
+ desc.n0 = 2;
+ }
+ else
+ {
+ desc.n0 = 1;
+ }
+ }
+
+ // Note: If we reduce n0, export to cl_image must be false
+ ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+ (desc.export_weights_to_cl_image == true));
+
+ desc.n0 = adjust_vec_size(desc.n0, kernel_c);
+
+ // Set m0 only if stride_x == 1 and dilation_x == 1
+ if (conv_info.stride().first == 1 && dilation.x() == 1)
+ {
+ if ((kernel_w >= 9) || (kernel_w == 1))
+ {
+ desc.m0 = 1;
+ }
+ else
+ {
+ if ((src_w % 5) == 0)
+ {
+ desc.m0 = 5;
+ }
+ else
+ {
+ desc.m0 = 4;
+ }
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+} // namespace
+
+ClDWCNativeDefaultConfigBifrost::ClDWCNativeDefaultConfigBifrost(GPUTarget gpu) : IClDWCNativeKernelConfig(gpu)
+{
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigBifrost::*)(
+ const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
+ unsigned int depth_multiplier);
+
+ ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(
+ &ClDWCNativeDefaultConfigBifrost::configure_G71_f32, &ClDWCNativeDefaultConfigBifrost::configure_G71_f16,
+ &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
+
+ ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+ &ClDWCNativeDefaultConfigBifrost::configure_G7x_f32, &ClDWCNativeDefaultConfigBifrost::configure_G7x_f16,
+ &ClDWCNativeDefaultConfigBifrost::configure_G7x_u8);
+
+ ConfigurationFunctionExecutorPtr func = nullptr;
+ switch (_target)
+ {
+ case GPUTarget::G71:
+ func = configs_G71.get_function(src->data_type());
+ break;
+ default:
+ func = configs_G7x.get_function(src->data_type());
+ break;
+ }
+
+ ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for depthwise convolution");
+ return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ return configure_f32(src, wei, conv_info, dilation, depth_multiplier, true);
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ return configure_f16(src, wei, conv_info, dilation, depth_multiplier, true);
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ return configure_f32(src, wei, conv_info, dilation, depth_multiplier, false);
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ return configure_f16(src, wei, conv_info, dilation, depth_multiplier, false);
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigBifrost::configure_G7x_u8(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_UNUSED(wei);
+
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ desc.export_input_to_cl_image = false;
+ desc.export_weights_to_cl_image = false;
+ desc.n0 = (depth_multiplier == 1) ? 4 : 1;
+ if (conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
+ {
+ desc.m0 = 2;
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+} // namespace cl_dwc
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h
new file mode 100644
index 0000000000..41d86c9c14
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEDEFAULTCONFIGBIFROST
+#define SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEDEFAULTCONFIGBIFROST
+
+#include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+/** Bifrost based OpenCL depthwise convolution configuration */
+class ClDWCNativeDefaultConfigBifrost final : public IClDWCNativeKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ ClDWCNativeDefaultConfigBifrost(GPUTarget gpu);
+
+ // Inherited overridden method
+ DWCComputeKernelInfo configure(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier) override;
+
+private:
+ DWCComputeKernelInfo configure_G71_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G71_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G7x_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G7x_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G7x_u8(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+};
+} // namespace cl_dwc
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEDEFAULTCONFIGBIFROST */
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp
new file mode 100644
index 0000000000..ef1bb3858c
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2022-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu) : IClDWCNativeKernelConfig(gpu)
+{
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)(
+ const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
+ unsigned int depth_multiplier);
+
+ ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
+ &ClDWCNativeDefaultConfigValhall::configure_G78_f32, &ClDWCNativeDefaultConfigValhall::configure_G78_f16,
+ &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
+
+ ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(
+ &ClDWCNativeDefaultConfigValhall::configure_G78_f32, &ClDWCNativeDefaultConfigValhall::configure_G77_f16,
+ &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
+
+ ConfigurationFunctionExecutorPtr func = nullptr;
+ switch (_target)
+ {
+ case GPUTarget::G77:
+ func = configs_G77.get_function(src->data_type());
+ break;
+ case GPUTarget::G78:
+ default:
+ func = configs_G78.get_function(src->data_type());
+ break;
+ }
+
+ ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for depthwise convolution");
+ return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+ const TensorShape wei_shape = wei->tensor_shape();
+ const size_t kernel_c = wei_shape[idx_c];
+ const size_t kernel_w = wei_shape[idx_w];
+
+ desc.export_input_to_cl_image = false;
+ desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
+
+ if (depth_multiplier == 1)
+ {
+ desc.n0 = 4;
+ }
+ else
+ {
+ if ((depth_multiplier % 4) == 0)
+ {
+ desc.n0 = 4;
+ }
+ else if ((depth_multiplier % 2) == 0)
+ {
+ desc.n0 = 2;
+ }
+ else
+ {
+ desc.n0 = 1;
+ }
+ }
+
+ // Note: If we reduce n0, export to cl_image must be false
+ ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+ (desc.export_weights_to_cl_image == true));
+
+ desc.n0 = adjust_vec_size(desc.n0, kernel_c);
+
+ // Set m0 only if stride_x == 1 and dilation_x == 1
+ if (conv_info.stride().first == 1 && dilation.x() == 1)
+ {
+ if ((kernel_w >= 9) || (kernel_w == 1))
+ {
+ desc.m0 = 1;
+ }
+ else
+ {
+ desc.m0 = 2;
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ // Src and weights have the same dimension indices
+ const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+ const TensorShape src_shape = src->tensor_shape();
+ const TensorShape wei_shape = wei->tensor_shape();
+ const size_t src_w = src_shape[idx_w];
+ const size_t kernel_c = wei_shape[idx_c];
+ const size_t kernel_w = wei_shape[idx_w];
+
+ desc.export_input_to_cl_image = false;
+ desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
+
+ if (depth_multiplier == 1)
+ {
+ if (desc.export_weights_to_cl_image == false)
+ {
+ desc.n0 = 8;
+ }
+ else
+ {
+ desc.n0 = 4;
+ }
+ }
+ else
+ {
+ if ((depth_multiplier % 4) == 0)
+ {
+ desc.n0 = 4;
+ }
+ else if ((depth_multiplier % 2) == 0)
+ {
+ desc.n0 = 2;
+ }
+ else
+ {
+ desc.n0 = 1;
+ }
+ }
+
+ // Note: If we reduce n0, export to cl_image must be false
+ ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+ (desc.export_weights_to_cl_image == true));
+
+ desc.n0 = adjust_vec_size(desc.n0, kernel_c);
+
+ // Set m0 only if stride_x == 1 and dilation_x == 1
+ if (conv_info.stride().first == 1 && dilation.x() == 1)
+ {
+ if ((kernel_w >= 9) || (kernel_w == 1))
+ {
+ desc.m0 = 1;
+ }
+ else
+ {
+ if ((src_w % 5) == 0)
+ {
+ desc.m0 = 5;
+ }
+ else
+ {
+ desc.m0 = 4;
+ }
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ ARM_COMPUTE_UNUSED(wei);
+
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ desc.export_input_to_cl_image = false;
+ desc.export_weights_to_cl_image = false;
+ desc.n0 = (depth_multiplier == 1) ? 4 : 1;
+ if (conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
+ {
+ desc.m0 = 2;
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+
+DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier)
+{
+ DWCComputeKernelInfo desc;
+
+ if (src->data_layout() == DataLayout::NHWC)
+ {
+ const size_t idx_c = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t idx_w = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
+ const TensorShape wei_shape = wei->tensor_shape();
+ const size_t kernel_c = wei_shape[idx_c];
+ const size_t kernel_w = wei_shape[idx_w];
+
+ desc.export_input_to_cl_image = false;
+ desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
+
+ if (depth_multiplier == 1)
+ {
+ if (desc.export_weights_to_cl_image == false)
+ {
+ desc.n0 = 8;
+ }
+ else
+ {
+ desc.n0 = 4;
+ }
+ }
+ else
+ {
+ if ((depth_multiplier % 4) == 0)
+ {
+ desc.n0 = 4;
+ }
+ else if ((depth_multiplier % 2) == 0)
+ {
+ desc.n0 = 2;
+ }
+ else
+ {
+ desc.n0 = 1;
+ }
+ }
+
+ // Note: If we reduce n0, export to cl_image must be false
+ ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) &&
+ (desc.export_weights_to_cl_image == true));
+
+ desc.n0 = adjust_vec_size(desc.n0, kernel_c);
+
+ // Set m0 only if stride_x == 1 and dilation_x == 1
+ if (conv_info.stride().first == 1 && dilation.x() == 1)
+ {
+ if ((kernel_w >= 9) || (kernel_w == 1))
+ {
+ desc.m0 = 1;
+ }
+ else
+ {
+ desc.m0 = 2;
+ }
+ }
+ else
+ {
+ desc.m0 = 1;
+ }
+ }
+
+ return desc;
+}
+} // namespace cl_dwc
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h
new file mode 100644
index 0000000000..fabce77b54
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEDEFAULTCONFIGVALHALL
+#define SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEDEFAULTCONFIGVALHALL
+
+#include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h"
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+/** Valhall based OpenCL depthwise convolution configuration */
+class ClDWCNativeDefaultConfigValhall final : public IClDWCNativeKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] gpu GPU target
+ */
+ ClDWCNativeDefaultConfigValhall(GPUTarget gpu);
+
+ // Inherited overridden method
+ DWCComputeKernelInfo configure(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier) override;
+
+private:
+ DWCComputeKernelInfo configure_G78_f32(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G78_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G78_u8(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+ DWCComputeKernelInfo configure_G77_f16(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier);
+};
+} // namespace cl_dwc
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEDEFAULTCONFIGVALHALL */
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp b/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp
new file mode 100644
index 0000000000..c8b006c546
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+bool use_cl_image_for_weights(const ITensorInfo *weights, unsigned int depth_multiplier)
+{
+ // Check whether we can use the cl image with the weights.
+ if (!export_to_cl_image(weights))
+ {
+ return false;
+ }
+
+ const size_t idx_w = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+ const size_t kernel_w = weights->tensor_shape()[idx_w];
+ const size_t kernel_h = weights->tensor_shape()[idx_h];
+
+ // If we can use the cl image storage with the weights, we prefer to use the cl buffer storage in the following cases for performance reasons:
+ // 1- When the kernel size is 1x1
+ // 2- When the depth multiplier is greater than 1 and not multiple of 4.
+ if ((kernel_w == 1) && (kernel_h == 1))
+ {
+ return false;
+ }
+
+ if ((depth_multiplier > 1) && (depth_multiplier % 4) != 0)
+ {
+ return false;
+ }
+
+ return true;
+}
+} // namespace cl_dwc
+} // namespace arm_compute
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h b/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h
new file mode 100644
index 0000000000..e3484c04ff
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEHEURISTICSHELPERS
+#define SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEHEURISTICSHELPERS
+
+namespace arm_compute
+{
+// Forward declaration
+class ITensorInfo;
+
+namespace cl_dwc
+{
+/** Utility function to know whether we can use the cl image storage for the weights of depthwise convolution to get better performance
+ *
+ * @param[in] weights Weights TensorInfo of the depthwise convolution
+ * @param[in] depth_multiplier Depth multiplier
+ *
+ * @return true if the weights of depthwise convolution can be kept in the cl image storage to improve the performance
+ */
+bool use_cl_image_for_weights(const ITensorInfo *weights, unsigned int depth_multiplier);
+
+} // namespace cl_dwc
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEHEURISTICSHELPERS */
diff --git a/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h b/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
new file mode 100644
index 0000000000..031cf1859a
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022-2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG_H
+#define ACL_SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG_H
+
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.h"
+#include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+/** ClDWCNativeKernelConfigurationFactory factory class */
+class ClDWCNativeKernelConfigurationFactory final
+{
+public:
+ /** Static method to call the ClDWCNative kernel configuration class accordingly with the GPU target
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return IClDWCNativeKernelConfig
+ */
+ static std::unique_ptr<IClDWCNativeKernelConfig> create(GPUTarget gpu)
+ {
+ switch (get_arch_from_target(gpu))
+ {
+ case GPUTarget::MIDGARD:
+ // The heuristic for Midgard is the same as the one used for Arm Mali-G71
+ return std::make_unique<ClDWCNativeDefaultConfigBifrost>(GPUTarget::G71);
+ case GPUTarget::BIFROST:
+ return std::make_unique<ClDWCNativeDefaultConfigBifrost>(gpu);
+ case GPUTarget::VALHALL:
+ case GPUTarget::FIFTHGEN:
+ return std::make_unique<ClDWCNativeDefaultConfigValhall>(gpu);
+ default:
+ ARM_COMPUTE_ERROR("Not supported GPU target");
+ }
+ }
+};
+} // namespace cl_dwc
+} // namespace arm_compute
+#endif // ACL_SRC_RUNTIME_HEURISTICS_DWC_NATIVE_CLDWCNATIVEKERNELCONFIG_H
diff --git a/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h b/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h
new file mode 100644
index 0000000000..614a6622df
--- /dev/null
+++ b/src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_RUNTIME_HEURISTICS_DWC_NATIVE_ICLDWCNATIVEKERNELCONFIG
+#define SRC_RUNTIME_HEURISTICS_DWC_NATIVE_ICLDWCNATIVEKERNELCONFIG
+
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+
+#include "src/core/common/Macros.h"
+
+namespace arm_compute
+{
+namespace cl_dwc
+{
+/** Basic container for the OpenCL depthwise convolution configuration functions */
+template <class T>
+class ClDWCNativeConfigArray
+{
+public:
+ /** Alias for F32 index */
+ static constexpr size_t DT_F32 = 0;
+ /** Alias for F16 index */
+ static constexpr size_t DT_F16 = 1;
+ /** Alias for Int8 index */
+ static constexpr size_t DT_INT8 = 2;
+
+ /** Constructor
+ *
+ * @param[in] func_f32 Function to call for depthwise convolution F32
+ * @param[in] func_f16 Function to call for depthwise convolution F16
+ * @param[in] func_int8 Function to call for depthwise convolution Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
+ *
+ */
+ ClDWCNativeConfigArray(T func_f32, T func_f16, T func_int8) : _configs{func_f32, func_f16, func_int8}
+ {
+ }
+
+ /** Method to return the depthwise convolution configuration function based on data type
+ *
+ * @param[in] data_type Input data type
+ *
+ * @return the valid function otherwise it returns nullptr if the data type is not valid
+ */
+ T get_function(DataType data_type)
+ {
+ switch (data_type)
+ {
+ case DataType::F32:
+ return _configs.at(DT_F32);
+ case DataType::F16:
+ return _configs.at(DT_F16);
+ case DataType::QASYMM8:
+ case DataType::QASYMM8_SIGNED:
+ case DataType::QSYMM8_PER_CHANNEL:
+ return _configs.at(DT_INT8);
+ default:
+ return nullptr;
+ }
+ }
+
+private:
+ std::array<T, 3> _configs;
+};
+
+/** Basic interface for the depthwise convolution kernel configuration */
+class IClDWCNativeKernelConfig
+{
+public:
+ /** Constructor
+ *
+ * @param[in] arch GPU target
+ */
+ IClDWCNativeKernelConfig(GPUTarget arch) : _target(arch)
+ {
+ }
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClDWCNativeKernelConfig);
+ /** Virtual destructor */
+ virtual ~IClDWCNativeKernelConfig() = default;
+ /** This method returns the @ref DWCComputeKernelInfo for the given inputs
+ *
+ * @param[in] src Source tensor (activation tensor)
+ * @param[in] wei Weights tensor
+ * @param[in] conv_info Convolution info
+ * @param[in] dilation Kernel dilation
+ * @param[in] depth_multiplier Output feature maps multiplier
+ */
+ virtual DWCComputeKernelInfo configure(const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const PadStrideInfo &conv_info,
+ const Size2D &dilation,
+ unsigned int depth_multiplier) = 0;
+
+protected:
+ GPUTarget _target;
+};
+} // namespace cl_dwc
+} // namespace arm_compute
+#endif /* SRC_RUNTIME_HEURISTICS_DWC_NATIVE_ICLDWCNATIVEKERNELCONFIG */