aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion
diff options
context:
space:
mode:
authorMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2023-01-03 10:16:16 +0000
committerMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2023-01-20 13:11:59 +0000
commita18d85c6d2c0025938c2dc10e553eb82c01922f2 (patch)
treee62ce3acdbe065bedba7355cdaba6cf32d7ce20e /src/dynamic_fusion
parent11f7d7ed4aa6293c8ad115374b7bad9bbf5a8ae7 (diff)
downloadComputeLibrary-a18d85c6d2c0025938c2dc10e553eb82c01922f2.tar.gz
Dynamic Fusion Pooling Layer 2d
- Adds Dynamic fusion PoolingLayer2D as Unfusable Operator - Indices are not supported - Adds tests for F32/F16 Datatypes Resolves : [COMPMID-5520] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: I0d112545eb9209c836bf9ea153069f8627531e0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8893 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion')
-rw-r--r--src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp90
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp104
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h132
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp193
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp472
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h131
-rw-r--r--src/dynamic_fusion/utils/Utils.h21
7 files changed, 1139 insertions, 4 deletions
diff --git a/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
new file mode 100644
index 0000000000..c28791f5fe
--- /dev/null
+++ b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+#include "arm_compute/core/Size2D.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+PoolingType Pool2dAttributes::pool_type() const
+{
+ return _pool_type;
+}
+
+Pool2dAttributes Pool2dAttributes::pool_type(PoolingType pool_type)
+{
+ _pool_type = pool_type;
+ return *this;
+}
+
+Padding2D Pool2dAttributes::pad() const
+{
+ return _pad;
+}
+
+Pool2dAttributes Pool2dAttributes::pad(const Padding2D &pad)
+{
+ _pad = pad;
+ return *this;
+}
+
+Size2D Pool2dAttributes::pool_size() const
+{
+ return _pool_size;
+}
+
+Pool2dAttributes Pool2dAttributes::pool_size(const Size2D &pool_size)
+{
+ _pool_size = pool_size;
+ return *this;
+}
+
+Size2D Pool2dAttributes::stride() const
+{
+ return _stride;
+}
+
+Pool2dAttributes Pool2dAttributes::stride(const Size2D &stride)
+{
+ _stride = stride;
+ return *this;
+}
+
+bool Pool2dAttributes::exclude_padding() const
+{
+ return _exclude_padding;
+}
+
+Pool2dAttributes Pool2dAttributes::exclude_padding(bool exclude_padding)
+{
+ _exclude_padding = exclude_padding;
+ return *this;
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
new file mode 100644
index 0000000000..2b01803224
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentPool2d.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h"
+#include "src/dynamic_fusion/utils/Utils.h"
+#include <memory>
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Status ClComponentPool2d::validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+{
+ ARM_COMPUTE_UNUSED(properties);
+ const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_ON_MSG((attributes.pool_type() != PoolingType::AVG && attributes.pool_type() != PoolingType::MAX), "Unsupported Pooling type");
+
+ // 1. Check validity
+ // Check if pooling is valid
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_pool_region_entirely_outside_input(convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())),
+ "Pooling region that is entirely outside input tensor is unsupported");
+
+ // Matching data type
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+
+ // Matching data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
+ misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())));
+
+ // 2. Check support level
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+
+ return Status{};
+}
+
+ClComponentPool2d::ClComponentPool2d(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuKernelComponent{ id, properties, tensors },
+ _component_writer{ std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings) }
+{
+}
+ClComponentPool2d::~ClComponentPool2d()
+{
+}
+const IGpuTemplateComponentWriter *ClComponentPool2d::template_writer() const
+{
+ return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
new file mode 100644
index 0000000000..896048e27a
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+class Pool2dAttributes;
+
+/** Forward declaration */
+class ClTemplatePool2d;
+
+class ClComponentPool2d final : public IGpuKernelComponent
+{
+public:
+ /** Attributes are a set of backend-agnostic parameters that define what a component does */
+ using Attributes = Pool2dAttributes;
+ /** Settings are a set of backend-specific parameters that influence the implementation of a component */
+ using Settings = GpuPool2dSettings;
+
+public:
+ /** Validate the component
+ *
+ * @param[in] properties Component properties
+ * @param[in,out] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ *
+ * @return Status Validation results
+ *
+ * Tensor argument names:
+ * - ACL_SRC_0: Input
+ * - ACL_DST_0: Output
+ *
+ * Tensor argument constness:
+ * - ACL_SRC_0: Const
+ * - ACL_DST_0: Const
+ *
+ * Valid data layouts:
+ * - NHWC
+ *
+ * Valid data type configurations:
+ * |ACL_SRC_0 |ACL_DST_0 |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ */
+ static Status validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /** Constructor
+ *
+ * @param[in] id Unique Component Identifier within a workload
+ * @param[in] properties Component properties
+ * @param[in,out] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ */
+ ClComponentPool2d(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /** Destructor */
+ ~ClComponentPool2d() override;
+
+ /** Prevent instances of this class from being copy constructed */
+ ClComponentPool2d(const ClComponentPool2d &component) = delete;
+
+ /** Prevent instances of this class from being copied */
+ ClComponentPool2d &operator=(const ClComponentPool2d &component) = delete;
+
+ /** Allow instances of this class to be move constructed */
+ ClComponentPool2d(ClComponentPool2d &&component) = default;
+
+ /** Allow instances of this class to be moved */
+ ClComponentPool2d &operator=(ClComponentPool2d &&component) = default;
+
+ /** Get template writer for the component */
+ const IGpuTemplateComponentWriter *template_writer() const override;
+
+ /** Get component type */
+ GpuComponentType type() const override
+ {
+ return GpuComponentType::Unfusable;
+ }
+
+private:
+ std::unique_ptr<ClTemplatePool2d> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
new file mode 100644
index 0000000000..a07ad00155
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
+#include "src/dynamic_fusion/utils/Utils.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+constexpr GpuOperatorType operator_type = GpuOperatorType::Unfusable;
+} // namespace
+
+GpuPool2dSettings &GpuPool2dSettings::mixed_precision(bool mixed_precision)
+{
+ _mixed_precision = mixed_precision;
+ return *this;
+}
+
+bool GpuPool2dSettings::mixed_precision() const
+{
+ return _mixed_precision;
+}
+
+Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const Pool2dAttributes &attributes,
+ const GpuPool2dSettings &settings)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ {
+ auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()));
+ auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));
+ }
+
+ // Perform fusion test
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, src);
+ tensors.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+ "Operator fusion test failed. This operator cannot be fused into the workload");
+
+ // Check if configuration is supported
+ return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes, settings);
+}
+
+Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const Pool2dAttributes &attributes,
+ const GpuPool2dSettings &settings)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ // Data type
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+ // Data layout
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
+ // Check exclude padding is not false
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(), "Exclude padding must be set to true in Attributes!");
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ {
+ auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()));
+ auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape));
+ }
+
+ // Check components
+ if(context.gpu_language() == GpuLanguage::OpenCL)
+ {
+ const auto cl_compile_ctx = context.cl_compile_context();
+ ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
+
+ // Validate Component
+ {
+ const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC_0, src);
+ arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+ ARM_COMPUTE_RETURN_ON_ERROR(ClComponentPool2d::validate(properties, arguments, attributes, settings));
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+ }
+ return Status{};
+}
+
+void GpuPool2d::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ ITensorInfo *dst,
+ const Pool2dAttributes &attributes,
+ const GpuPool2dSettings &settings)
+{
+ // Assert validation
+ ARM_COMPUTE_ERROR_THROW_ON(GpuPool2d::validate_op(sketch, src, dst, attributes, settings));
+ ARM_COMPUTE_LOG_PARAMS(src, dst, attributes, settings);
+
+ // Auto initialize dst tensor
+ {
+ auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); // use the default DimensionRoundingType
+ auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
+ }
+
+ // Translate into components and add to component graph
+ auto &comp_graph = sketch.implementation().component_graph();
+
+ const auto sketch_ctx = sketch.implementation().context();
+
+ if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ {
+ const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
+ ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr);
+
+ // Add Component
+ {
+ auto properties = IGpuKernelComponent::Properties();
+ properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC_0, src);
+ arguments.add_const_tensor(ACL_DST_0, dst);
+ comp_graph.add_new_component<ClComponentPool2d>(properties, arguments, attributes, settings);
+ }
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+ }
+
+ // Set up fusion test by adding to the Operator Group
+ // Note this has to be performed after all the components have been successfully added to the component graph
+
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC_0, src);
+ tensors.add_tensor(ACL_DST_0, dst);
+
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ sketch.implementation().operator_group().add_operator(op);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
new file mode 100644
index 0000000000..5df4438afe
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplatePool2d.h"
+
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/WindowHelpers.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+// Shape indexes for NHWC Datalayout
+constexpr static int32_t batch_idx = 3;
+constexpr static int32_t height_idx = 2;
+constexpr static int32_t width_idx = 1;
+constexpr static int32_t channel_idx = 0;
+}
+ClTemplatePool2d::ClTemplatePool2d(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuTemplateComponentWriter{ id, tensors },
+ _src{},
+ _dst{},
+ _attributes{ attributes },
+ _settings{ settings }
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+std::string ClTemplatePool2d::get_name() const
+{
+ return "pool2d";
+}
+
+std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ // Condition to use 2x2 optimized kernel
+ if(_attributes.pool_size() == Size2D(2, 2))
+ {
+ return get_2x2_kernel_code();
+ }
+ else
+ {
+ return get_MxN_kernel_code();
+ }
+}
+
+std::string ClTemplatePool2d::get_MxN_kernel_code() const
+{
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+
+ // Define pool op macro.
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+
+ // Kernel start
+ // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
+ // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side
+ std::string code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+// IN_0(src) {{src}}
+// OUT(dst, accum) {{dst}}
+
+{
+ const int idx_out_c = g_ind_0;
+ const int idx_out_w = g_ind_1;
+)_";
+
+ // Add macro for POOL_OP
+ code += "\n" + pool_op + "\n";
+
+ code += R"_(
+ const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
+ const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
+)_";
+
+ // Define common variables.
+ code += R"_(
+ __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
+
+ __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w;
+
+ VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
+ res0 = {{INITIAL_VALUE}};
+
+ const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
+ const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
+
+ const int pool_x_s = max((int)0, -idx_in_w);
+ const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w);
+ const int pool_y_s = max((int)0, -idx_in_h);
+ const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h);
+)_";
+
+ // Determine filter size depending on if padding is excluded or not
+ if(_attributes.exclude_padding())
+ {
+ code += R"_(
+ const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
+)_";
+ }
+ else
+ {
+ code += R"_(
+ const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}};
+)_";
+ }
+
+ // Loop through pool size
+ // if global pooling
+ if(_attributes.pool_size().x() == _src->dimension(width_idx) && _attributes.pool_size().y() == _src->dimension(height_idx))
+ {
+ // Begin loop
+ code += R"_(
+ // Global pooling path
+ for(int y = 0; y < {{POOL_SIZE_Y}}; ++y)
+ {
+ #pragma unroll 8
+ for(int x = 0; x < {{POOL_SIZE_X}}; ++x)
+ {
+ VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
+ data0;
+)_";
+ }
+ else // if local pooling size
+ {
+ code += R"_(
+ for(int y = pool_y_s; y < pool_y_e; ++y)
+ {
+ #pragma unroll 8
+ for(int x = pool_x_s; x < pool_x_e; ++x)
+ {
+ VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
+ data0;
+)_";
+ } // end else
+
+ // if condition inside loop - use 32bit acc if mixed_precision.
+ // End loop through pooling section.
+ if(fp_mixed_precision)
+ {
+ // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
+ code += R"_(
+ data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
+ res0 = POOL_OP(res0, data0);
+ }
+ }
+)_";
+ }
+ else // load data, compute result and end loop
+ {
+ code += R"_(
+ data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z));
+ res0 = POOL_OP(res0, data0);
+ }
+ }
+)_";
+ }
+
+ // For Pool AVG ONLY, divide pool output by filter size
+ if(pool_type == PoolingType::AVG)
+ {
+ code += R"_(
+ res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
+)_";
+ }
+
+ // If mixed precision convert datatype before storing. Then end kernel.
+ if(fp_mixed_precision)
+ {
+ code += R"_(
+ VEC_DATA_TYPE({{DATA_TYPE}}, N0)
+ res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
+ STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
+)_";
+ }
+ else
+ {
+ // Store data
+ code += R"_(
+ STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
+)_";
+ }
+
+ code += R"_(
+//------------------ END KERNEL {{meta_kernel_id}} ---------------------
+}
+)_";
+
+ return code;
+}
+
+std::string ClTemplatePool2d::get_2x2_kernel_code() const
+{
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+
+ std::string code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+// IN_0(src) {{src}}
+// OUT(dst, accum) {{dst}}
+
+#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
+
+{
+ const int idx_out_c = g_ind_0;
+ const int idx_out_w = g_ind_1;
+)_";
+
+ // Add pool op macro
+ code += "\n" + pool_op + "\n";
+
+ // If batch size != 1, the batch size dimension is collapsed over the height dimension
+ code += R"_(
+ const int idx_out_h = g_ind_2 % {{DST_HEIGHT}};
+ const int idx_out_n = g_ind_2 / {{DST_HEIGHT}};
+)_";
+
+ code += R"_(
+ const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}};
+ const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}};
+
+ __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w;
+ __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n *
+ {{dst}}_stride_w;
+ const int pool_x_s = max((int)0, -idx_in_w);
+ const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w);
+ const int pool_y_s = max((int)0, -idx_in_h);
+ const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h);
+
+ const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s);
+ const int x0 = pool_x_s + idx_in_w;
+ const int y0 = pool_y_s + idx_in_h;
+ const int x1 = pool_x_e - 1 + idx_in_w;
+ const int y1 = pool_y_e - 1 + idx_in_h;
+
+ REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
+)_";
+
+ if(fp_mixed_precision)
+ {
+ // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
+ code += R"_(
+ data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
+ data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
+ data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
+ data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0));
+)_";
+ }
+ else
+ {
+ code += R"_(
+ data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z));
+ data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z));
+ data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z));
+ data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z));
+)_";
+ }
+
+ if(pool_type != PoolingType::MAX)
+ {
+ // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
+ code += R"_(
+ if(filter_size != 4)
+ {
+ SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0;
+ SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1);
+ SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0;
+ SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1);
+
+ data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s));
+ data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s));
+ data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e));
+ data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e));
+ }
+)_";
+ }
+
+ code += R"_(
+ VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)
+ res0 = data0;
+ res0 = POOL_OP(res0, data1);
+ res0 = POOL_OP(res0, data2);
+ res0 = POOL_OP(res0, data3);
+)_";
+
+ if(pool_type == PoolingType::AVG)
+ {
+ // If avg pooling divide result accordingly.
+ if(_attributes.exclude_padding())
+ {
+ code += R"_(
+ res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
+)_";
+ }
+ else
+ {
+ code += R"_(
+ res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4;
+)_";
+ }
+ }
+
+ // Store result
+ if(fp_mixed_precision)
+ {
+ code += R"_(
+ VEC_DATA_TYPE({{DATA_TYPE}}, N0)
+ res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0));
+ STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
+)_";
+ }
+ else
+ {
+ code += R"_(
+ STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0);
+)_";
+ }
+
+ code += R"_(
+ //------------------ END KERNEL {{meta_kernel_id}} ---------------------
+}
+#undef SELECT_TYPE
+)_";
+
+ return code;
+}
+
+void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ vtable.declare_variable(
+ comp_group,
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(
+ comp_group,
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
+}
+
+TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ TagLUT lut{};
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["dst"] = vtable.get_variable(_dst);
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+
+ // Retrieve relevant data
+ const auto padding = _attributes.pad();
+ const auto stride = _attributes.stride();
+ const auto pool_size = _attributes.pool_size();
+ const auto data_type = _src->data_type();
+ const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+
+ // pool specific
+ lut["STRIDE_X"] = stride.x();
+ lut["STRIDE_Y"] = stride.y();
+ lut["PAD_X"] = padding.left;
+ lut["PAD_Y"] = padding.top;
+ lut["POOL_SIZE_X"] = pool_size.width;
+ lut["POOL_SIZE_Y"] = pool_size.height;
+
+ // Datatypes and variables
+ lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type((use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
+ lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
+ lut["SRC_WIDTH"] = _src->dimension(width_idx);
+ lut["SRC_HEIGHT"] = _src->dimension(height_idx);
+ lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? float_to_string_with_full_precision(std::numeric_limits<float>::lowest()) : std::string("0");
+
+ // Tensor specific data
+ lut["DST_HEIGHT"] = _dst->dimension(height_idx);
+
+ return lut;
+}
+
+CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const
+{
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
+
+ CLBuildOptions build_opts{};
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClTemplatePool2d::get_config_id() const
+{
+ const DataType data_type = _src->data_type();
+ const DataLayout data_layout = _src->data_layout();
+
+ std::string config_id{};
+ config_id += "pooling_layer_2d_";
+ config_id += lower_string(string_from_data_type(data_type));
+ config_id += "_";
+ config_id += lower_string(string_from_data_layout(data_layout));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(width_idx));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(height_idx));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_dst->dimension(channel_idx));
+
+ return config_id;
+}
+
+std::set<std::string> ClTemplatePool2d::get_headers_list() const
+{
+ return std::set<std::string>{ "helpers.h", "tile_helpers.h", "repeat.h" };
+}
+
+Window ClTemplatePool2d::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+ const auto output_shape = _dst->tensor_shape();
+ const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+
+ // Create and configure kernel window
+ auto win = calculate_max_window(output_shape, Steps(vec_size));
+ win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
+ return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
new file mode 100644
index 0000000000..ef1c100f44
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D
+
+#include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplatePool2d final : public IGpuTemplateComponentWriter
+{
+public:
+ using Attributes = ClComponentPool2d::Attributes;
+ using Settings = ClComponentPool2d::Settings;
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ * @param[in] attributes Component attributes
+ * @param[in] settings Component settings
+ */
+ ClTemplatePool2d(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
+
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete;
+
+ /** Prevent instances of this class from being copied */
+ ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete;
+
+ /** Allow instances of this class to be move constructed */
+ ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default;
+
+ /** Allow instances of this class to be moved */
+ ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default;
+
+ /** Generate kernel component name */
+ std::string get_name() const override;
+
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+ /** Generate the build options used in the component
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return CLBuildOptions Build options
+ */
+ CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
+
+ /** Generate the component config id string used for tuning */
+ std::string get_config_id() const override;
+
+ /** Generate the header list used in the component */
+ std::set<std::string> get_headers_list() const override;
+
+ /** Generate the execution window for the component */
+ Window get_window() const override;
+
+private:
+ /** Generate pooling kernel template code optimized for 2x2 pooling
+ *
+ * @return std::String Component code
+ */
+ std::string get_2x2_kernel_code() const;
+
+ /** Generate generalised pooling kernel template code for MxN pooling
+ *
+ * @return std::String Component code
+ */
+ std::string get_MxN_kernel_code() const;
+
+ const ITensorInfo *_src;
+ const ITensorInfo *_dst;
+ Attributes _attributes;
+ Settings _settings;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */
diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h
index 063dbdc44e..d317ec7fd6 100644
--- a/src/dynamic_fusion/utils/Utils.h
+++ b/src/dynamic_fusion/utils/Utils.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_UTILS_UTILS
#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
namespace arm_compute
{
@@ -32,25 +33,37 @@ namespace experimental
{
namespace dynamic_fusion
{
-bool is_user_tensor(const ITensorInfo *tensor_info)
+inline bool is_user_tensor(const ITensorInfo *tensor_info)
{
return tensor_info->id() > ITensorInfo::invalid_tensor_id;
}
-bool is_intermediate_tensor(const ITensorInfo *tensor_info)
+inline bool is_intermediate_tensor(const ITensorInfo *tensor_info)
{
return tensor_info->id() < ITensorInfo::invalid_tensor_id;
}
-bool is_valid_tensor(const ITensorInfo *tensor_info)
+inline bool is_valid_tensor(const ITensorInfo *tensor_info)
{
return tensor_info->has_valid_id();
}
-bool is_invalid_tensor(const ITensorInfo *tensor_info)
+inline bool is_invalid_tensor(const ITensorInfo *tensor_info)
{
return !is_valid_tensor(tensor_info);
}
+
+/** Inline function to convert @ref Pool2dAttributes to PoolingLayerInfo
+*/
+inline PoolingLayerInfo convert_pool_attr_to_pool_info(const Pool2dAttributes &pool_attr, bool mixed_precision = false, DataLayout data_layout = DataLayout::NHWC)
+{
+ // Create PadStrideInfo
+ const Size2D stride = pool_attr.stride();
+ const Padding2D padding = pool_attr.pad();
+ const PadStrideInfo pad_stride(stride.x(), stride.y(), padding.left, padding.top, arm_compute::DimensionRoundingType::FLOOR);
+
+ return PoolingLayerInfo(pool_attr.pool_type(), pool_attr.pool_size(), data_layout, pad_stride, pool_attr.exclude_padding(), mixed_precision);
+}
}
}
}