From a18d85c6d2c0025938c2dc10e553eb82c01922f2 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Tue, 3 Jan 2023 10:16:16 +0000 Subject: Dynamic Fusion Pooling Layer 2d - Adds Dynamic fusion PoolingLayer2D as Unfusable Operator - Indices are not supported - Adds tests for F32/F16 Datatypes Resolves : [COMPMID-5520] Signed-off-by: Mohammed Suhail Munshi Change-Id: I0d112545eb9209c836bf9ea153069f8627531e0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8893 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- Android.bp | 4 + .../sketch/attributes/Pool2dAttributes.h | 82 ++++ .../sketch/gpu/operators/GpuPool2d.h | 114 +++++ filelist.json | 4 + .../sketch/attributes/Pool2dAttributes.cpp | 90 ++++ .../sketch/gpu/components/cl/ClComponentPool2d.cpp | 104 +++++ .../sketch/gpu/components/cl/ClComponentPool2d.h | 132 ++++++ .../sketch/gpu/operators/GpuPool2d.cpp | 193 +++++++++ .../gpu/template_writer/cl/ClTemplatePool2d.cpp | 472 +++++++++++++++++++++ .../gpu/template_writer/cl/ClTemplatePool2d.h | 131 ++++++ src/dynamic_fusion/utils/Utils.h | 21 +- .../datasets/dynamic_fusion/PoolingLayerDataset.h | 122 ++++++ tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp | 233 ++++++++++ .../fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h | 190 +++++++++ tests/validation/reference/PoolingLayer.cpp | 3 +- utils/TypePrinter.h | 62 +++ 16 files changed, 1951 insertions(+), 6 deletions(-) create mode 100644 arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h create mode 100644 arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h create mode 100644 src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h create mode 100644 src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h create mode 100644 tests/datasets/dynamic_fusion/PoolingLayerDataset.h create mode 100644 tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp create mode 100644 tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h diff --git a/Android.bp b/Android.bp index 8867d5b943..b02d1048b3 100644 --- a/Android.bp +++ b/Android.bp @@ -591,6 +591,7 @@ cc_library_static { "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", @@ -608,6 +609,7 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp", @@ -617,6 +619,7 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", @@ -627,6 +630,7 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", diff --git a/arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h b/arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h new file mode 100644 index 0000000000..be30781d86 --- /dev/null +++ b/arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_POOL2DATTRIBUTES +#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_POOL2DATTRIBUTES + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator. + */ + +class Pool2dAttributes +{ +public: + /* Get Pooling Type */ + PoolingType pool_type() const; + + /* Set Pooling Type */ + Pool2dAttributes pool_type(PoolingType pool_type); + + /* Get 2D Pool Size */ + Size2D pool_size() const; + + /* Set 2D Pool size */ + Pool2dAttributes pool_size(const Size2D &pool_size); + + /* Get Padding */ + Padding2D pad() const; + + /* Set Padding */ + Pool2dAttributes pad(const Padding2D &padding); + + /* Get Stride */ + Size2D stride() const; + + /* Set Stride */ + Pool2dAttributes stride(const Size2D &stride); + + /* Get exclude padding */ + bool exclude_padding() const; + + /* Set exclude padding */ + Pool2dAttributes exclude_padding(bool exclude_padding); + +private: + PoolingType _pool_type{}; + Padding2D _pad{}; + Size2D _pool_size{}; + Size2D _stride{ 1U, 1U }; + bool _exclude_padding{ true }; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_POOL2DATTRIBUTES */ diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h new file mode 100644 index 0000000000..16d88af570 --- /dev/null +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUPOOL2D +#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUPOOL2D + +#include "arm_compute/core/Error.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** Forward declaration */ +class GpuWorkloadSketch; +class GpuWorkloadContext; + +/** Operator backend specific settings +*/ +class GpuPool2dSettings +{ +public: + /* Get mixed_precision*/ + bool mixed_precision() const; + + /* Set mixed_precision */ + GpuPool2dSettings &mixed_precision(bool mixed_precision); + +private: + bool _mixed_precision{ false }; +}; + +/** Operator interface. */ +class GpuPool2d final +{ +public: + /** Attributes are a set of backend-agnostic parameters that define what an operator does */ + using Attributes = Pool2dAttributes; + /** Settings are a set of backend-specific parameters that influence the implementation of a operator */ + using Settings = GpuPool2dSettings; + + /** Create an operator and fuse it into the workload sketch. + * @note If @ref validate_op() fails, the creation also fails and may throw an error. + * @note If @ref validate_op() fails, @p sketch remains unchanged and valid. + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + * + * Valid data layouts: + * - NHWC + * + * @param[in,out] sketch Workload sketch into which the operator will be fused + * @param[in] src Source tensor + * @param[out] dst Destination tensor + * @param[in] attributes Operator attributes + * @param[in] settings Operator settings + */ + static void create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + ITensorInfo *dst, + const Attributes &attributes, + const Settings &settings); + /** Check if the operator configuration is supported, irrespective of fusion + * + * @param[in] context Workload context within which the operator is running + * @param[in] src Left hand side tensor info. Data types supported: F16/F32. + * @param[out] dst Destination tensor info. Data types supported: F16/F32. + * If an uninitialized ITensorInfo is passed in, it will be auto-initialized + * @param[in] attributes Operator attributes + * @param[in] settings Operator settings + */ + static Status is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const Attributes &attributes, + const Settings &settings); + /** Validate the operator and check if it can be fused into the workload sketch. + * Similar to @ref GpuPool2d::create_op() + */ + static Status validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *src, + const ITensorInfo *dst, + const Attributes &attributes, + const Settings &settings); +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUPOOL2D */ diff --git a/filelist.json b/filelist.json index ee6f7e364f..8be25712b7 100644 --- a/filelist.json +++ b/filelist.json @@ -2206,6 +2206,7 @@ "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", @@ -2222,6 +2223,7 @@ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", @@ -2232,6 +2234,7 @@ "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", @@ -2240,6 +2243,7 @@ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp", diff --git a/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp new file mode 100644 index 0000000000..c28791f5fe --- /dev/null +++ b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +PoolingType Pool2dAttributes::pool_type() const +{ + return _pool_type; +} + +Pool2dAttributes Pool2dAttributes::pool_type(PoolingType pool_type) +{ + _pool_type = pool_type; + return *this; +} + +Padding2D Pool2dAttributes::pad() const +{ + return _pad; +} + +Pool2dAttributes Pool2dAttributes::pad(const Padding2D &pad) +{ + _pad = pad; + return *this; +} + +Size2D Pool2dAttributes::pool_size() const +{ + return _pool_size; +} + +Pool2dAttributes Pool2dAttributes::pool_size(const Size2D &pool_size) +{ + _pool_size = pool_size; + return *this; +} + +Size2D Pool2dAttributes::stride() const +{ + return _stride; +} + +Pool2dAttributes Pool2dAttributes::stride(const Size2D &stride) +{ + _stride = stride; + return *this; +} + +bool Pool2dAttributes::exclude_padding() const +{ + return _exclude_padding; +} + +Pool2dAttributes Pool2dAttributes::exclude_padding(bool exclude_padding) +{ + _exclude_padding = exclude_padding; + return *this; +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp new file mode 100644 index 0000000000..2b01803224 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ClComponentPool2d.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "src/core/CL/CLValidate.h" +#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h" +#include "src/dynamic_fusion/utils/Utils.h" +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +Status ClComponentPool2d::validate( + const Properties &properties, + const ArgumentPack &tensors, + const Attributes &attributes, + const Settings &settings) +{ + ARM_COMPUTE_UNUSED(properties); + const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0); + const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_ON_MSG((attributes.pool_type() != PoolingType::AVG && attributes.pool_type() != PoolingType::MAX), "Unsupported Pooling type"); + + // 1. Check validity + // Check if pooling is valid + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_pool_region_entirely_outside_input(convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())), + "Pooling region that is entirely outside input tensor is unsupported"); + + // Matching data type + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + + // Matching data layout + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst); + + // All tensor infos are initialized + ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); + + // Device requirements are met + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), + misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()))); + + // 2. Check support level + // Data type + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); + + // Data layout + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); + + return Status{}; +} + +ClComponentPool2d::ClComponentPool2d( + ComponentId id, + const Properties &properties, + const ArgumentPack &tensors, + const Attributes &attributes, + const Settings &settings) + : IGpuKernelComponent{ id, properties, tensors }, + _component_writer{ std::make_unique(id, tensors, attributes, settings) } +{ +} +ClComponentPool2d::~ClComponentPool2d() +{ +} +const IGpuTemplateComponentWriter *ClComponentPool2d::template_writer() const +{ + return _component_writer.get(); +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h new file mode 100644 index 0000000000..896048e27a --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D +#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D + +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" +#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" + +namespace arm_compute +{ +/** Forward declaration */ +class ITensorInfo; +namespace experimental +{ +namespace dynamic_fusion +{ +/** Forward declaration */ +template +class ArgumentPack; +class Pool2dAttributes; + +/** Forward declaration */ +class ClTemplatePool2d; + +class ClComponentPool2d final : public IGpuKernelComponent +{ +public: + /** Attributes are a set of backend-agnostic parameters that define what a component does */ + using Attributes = Pool2dAttributes; + /** Settings are a set of backend-specific parameters that influence the implementation of a component */ + using Settings = GpuPool2dSettings; + +public: + /** Validate the component + * + * @param[in] properties Component properties + * @param[in,out] tensors Tensor arguments to the component + * @param[in] attributes Component attributes + * @param[in] settings Component settings + * + * @return Status Validation results + * + * Tensor argument names: + * - ACL_SRC_0: Input + * - ACL_DST_0: Output + * + * Tensor argument constness: + * - ACL_SRC_0: Const + * - ACL_DST_0: Const + * + * Valid data layouts: + * - NHWC + * + * Valid data type configurations: + * |ACL_SRC_0 |ACL_DST_0 | + * |:--------------|:--------------| + * |F16 |F16 | + * |F32 |F32 | + */ + static Status validate( + const Properties &properties, + const ArgumentPack &tensors, + const Attributes &attributes, + const Settings &settings); + + /** Constructor + * + * @param[in] id Unique Component Identifier within a workload + * @param[in] properties Component properties + * @param[in,out] tensors Tensor arguments to the component + * @param[in] attributes Component attributes + * @param[in] settings Component settings + */ + ClComponentPool2d( + ComponentId id, + const Properties &properties, + const ArgumentPack &tensors, + const Attributes &attributes, + const Settings &settings); + + /** Destructor */ + ~ClComponentPool2d() override; + + /** Prevent instances of this class from being copy constructed */ + ClComponentPool2d(const ClComponentPool2d &component) = delete; + + /** Prevent instances of this class from being copied */ + ClComponentPool2d &operator=(const ClComponentPool2d &component) = delete; + + /** Allow instances of this class to be move constructed */ + ClComponentPool2d(ClComponentPool2d &&component) = default; + + /** Allow instances of this class to be moved */ + ClComponentPool2d &operator=(ClComponentPool2d &&component) = default; + + /** Get template writer for the component */ + const IGpuTemplateComponentWriter *template_writer() const override; + + /** Get component type */ + GpuComponentType type() const override + { + return GpuComponentType::Unfusable; + } + +private: + std::unique_ptr _component_writer; +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D */ diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp new file mode 100644 index 0000000000..a07ad00155 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/experimental/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h" + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" +#include "src/common/utils/Log.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/dynamic_fusion/sketch/ArgumentPack.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" +#include "src/dynamic_fusion/utils/Utils.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +namespace +{ +constexpr GpuOperatorType operator_type = GpuOperatorType::Unfusable; +} // namespace + +GpuPool2dSettings &GpuPool2dSettings::mixed_precision(bool mixed_precision) +{ + _mixed_precision = mixed_precision; + return *this; +} + +bool GpuPool2dSettings::mixed_precision() const +{ + return _mixed_precision; +} + +Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *src, + const ITensorInfo *dst, + const Pool2dAttributes &attributes, + const GpuPool2dSettings &settings) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + + // Auto initialize dst tensor info + TensorInfo dst_info_to_validate = *dst; + { + auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); + auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape)); + } + + // Perform fusion test + // Pack tensor infos + ArgumentPack tensors; + tensors.add_const_tensor(ACL_SRC_0, src); + tensors.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + + const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op), + "Operator fusion test failed. This operator cannot be fused into the workload"); + + // Check if configuration is supported + return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes, settings); +} + +Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const Pool2dAttributes &attributes, + const GpuPool2dSettings &settings) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + // Data type + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); + // Data layout + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); + // Check exclude padding is not false + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(), "Exclude padding must be set to true in Attributes!"); + + // Auto initialize dst tensor info + TensorInfo dst_info_to_validate = *dst; + { + auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); + auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(shape)); + } + + // Check components + if(context.gpu_language() == GpuLanguage::OpenCL) + { + const auto cl_compile_ctx = context.cl_compile_context(); + ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); + + // Validate Component + { + const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + + ArgumentPack arguments; + arguments.add_const_tensor(ACL_SRC_0, src); + arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + ARM_COMPUTE_RETURN_ON_ERROR(ClComponentPool2d::validate(properties, arguments, attributes, settings)); + } + } + else + { + ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language"); + } + return Status{}; +} + +void GpuPool2d::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + ITensorInfo *dst, + const Pool2dAttributes &attributes, + const GpuPool2dSettings &settings) +{ + // Assert validation + ARM_COMPUTE_ERROR_THROW_ON(GpuPool2d::validate_op(sketch, src, dst, attributes, settings)); + ARM_COMPUTE_LOG_PARAMS(src, dst, attributes, settings); + + // Auto initialize dst tensor + { + auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); // use the default DimensionRoundingType + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape)); + } + + // Translate into components and add to component graph + auto &comp_graph = sketch.implementation().component_graph(); + + const auto sketch_ctx = sketch.implementation().context(); + + if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + { + const auto cl_compile_ctx = sketch_ctx->cl_compile_context(); + ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr); + + // Add Component + { + auto properties = IGpuKernelComponent::Properties(); + properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + + ArgumentPack arguments; + arguments.add_const_tensor(ACL_SRC_0, src); + arguments.add_const_tensor(ACL_DST_0, dst); + comp_graph.add_new_component(properties, arguments, attributes, settings); + } + } + else + { + ARM_COMPUTE_ERROR("Unimplemented Gpu language"); + } + + // Set up fusion test by adding to the Operator Group + // Note this has to be performed after all the components have been successfully added to the component graph + + // Pack tensor infos + ArgumentPack tensors; + tensors.add_const_tensor(ACL_SRC_0, src); + tensors.add_tensor(ACL_DST_0, dst); + + const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); + sketch.implementation().operator_group().add_operator(op); +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp new file mode 100644 index 0000000000..5df4438afe --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp @@ -0,0 +1,472 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ClTemplatePool2d.h" + +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/WindowHelpers.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +namespace +{ +// Shape indexes for NHWC Datalayout +constexpr static int32_t batch_idx = 3; +constexpr static int32_t height_idx = 2; +constexpr static int32_t width_idx = 1; +constexpr static int32_t channel_idx = 0; +} +ClTemplatePool2d::ClTemplatePool2d(ComponentId id, + const ArgumentPack &tensors, + const Attributes &attributes, + const Settings &settings) + : IGpuTemplateComponentWriter{ id, tensors }, + _src{}, + _dst{}, + _attributes{ attributes }, + _settings{ settings } +{ + _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); + _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); +} + +std::string ClTemplatePool2d::get_name() const +{ + return "pool2d"; +} + +std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const +{ + ARM_COMPUTE_UNUSED(comp_group); + + // Condition to use 2x2 optimized kernel + if(_attributes.pool_size() == Size2D(2, 2)) + { + return get_2x2_kernel_code(); + } + else + { + return get_MxN_kernel_code(); + } +} + +std::string ClTemplatePool2d::get_MxN_kernel_code() const +{ + const auto pool_type = _attributes.pool_type(); + const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX; + + // Define pool op macro. + std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; + + // Kernel start + // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0 + // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side + std::string code = R"_( +//------------------ START KERNEL {{meta_kernel_id}} --------------------- +// IN_0(src) {{src}} +// OUT(dst, accum) {{dst}} + +{ + const int idx_out_c = g_ind_0; + const int idx_out_w = g_ind_1; +)_"; + + // Add macro for POOL_OP + code += "\n" + pool_op + "\n"; + + code += R"_( + const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; + const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; +)_"; + + // Define common variables. + code += R"_( + __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; + + __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w; + + VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) + res0 = {{INITIAL_VALUE}}; + + const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; + const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; + + const int pool_x_s = max((int)0, -idx_in_w); + const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w); + const int pool_y_s = max((int)0, -idx_in_h); + const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h); +)_"; + + // Determine filter size depending on if padding is excluded or not + if(_attributes.exclude_padding()) + { + code += R"_( + const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s); +)_"; + } + else + { + code += R"_( + const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}}; +)_"; + } + + // Loop through pool size + // if global pooling + if(_attributes.pool_size().x() == _src->dimension(width_idx) && _attributes.pool_size().y() == _src->dimension(height_idx)) + { + // Begin loop + code += R"_( + // Global pooling path + for(int y = 0; y < {{POOL_SIZE_Y}}; ++y) + { + #pragma unroll 8 + for(int x = 0; x < {{POOL_SIZE_X}}; ++x) + { + VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) + data0; +)_"; + } + else // if local pooling size + { + code += R"_( + for(int y = pool_y_s; y < pool_y_e; ++y) + { + #pragma unroll 8 + for(int x = pool_x_s; x < pool_x_e; ++x) + { + VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) + data0; +)_"; + } // end else + + // if condition inside loop - use 32bit acc if mixed_precision. + // End loop through pooling section. + if(fp_mixed_precision) + { + // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE + code += R"_( + data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); + res0 = POOL_OP(res0, data0); + } + } +)_"; + } + else // load data, compute result and end loop + { + code += R"_( + data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)); + res0 = POOL_OP(res0, data0); + } + } +)_"; + } + + // For Pool AVG ONLY, divide pool output by filter size + if(pool_type == PoolingType::AVG) + { + code += R"_( + res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; +)_"; + } + + // If mixed precision convert datatype before storing. Then end kernel. + if(fp_mixed_precision) + { + code += R"_( + VEC_DATA_TYPE({{DATA_TYPE}}, N0) + res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); + STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); +)_"; + } + else + { + // Store data + code += R"_( + STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); +)_"; + } + + code += R"_( +//------------------ END KERNEL {{meta_kernel_id}} --------------------- +} +)_"; + + return code; +} + +std::string ClTemplatePool2d::get_2x2_kernel_code() const +{ + const auto pool_type = _attributes.pool_type(); + const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX; + std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; + + std::string code = R"_( +//------------------ START KERNEL {{meta_kernel_id}} --------------------- +// IN_0(src) {{src}} +// OUT(dst, accum) {{dst}} + +#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) + +{ + const int idx_out_c = g_ind_0; + const int idx_out_w = g_ind_1; +)_"; + + // Add pool op macro + code += "\n" + pool_op + "\n"; + + // If batch size != 1, the batch size dimension is collapsed over the height dimension + code += R"_( + const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; + const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; +)_"; + + code += R"_( + const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; + const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; + + __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; + __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * + {{dst}}_stride_w; + const int pool_x_s = max((int)0, -idx_in_w); + const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w); + const int pool_y_s = max((int)0, -idx_in_h); + const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h); + + const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s); + const int x0 = pool_x_s + idx_in_w; + const int y0 = pool_y_s + idx_in_h; + const int x1 = pool_x_e - 1 + idx_in_w; + const int y1 = pool_y_e - 1 + idx_in_h; + + REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0); +)_"; + + if(fp_mixed_precision) + { + // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE + code += R"_( + data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); + data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); + data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); + data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); +)_"; + } + else + { + code += R"_( + data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)); + data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)); + data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)); + data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)); +)_"; + } + + if(pool_type != PoolingType::MAX) + { + // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound) + code += R"_( + if(filter_size != 4) + { + SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0; + SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1); + SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0; + SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1); + + data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s)); + data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s)); + data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e)); + data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e)); + } +)_"; + } + + code += R"_( + VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) + res0 = data0; + res0 = POOL_OP(res0, data1); + res0 = POOL_OP(res0, data2); + res0 = POOL_OP(res0, data3); +)_"; + + if(pool_type == PoolingType::AVG) + { + // If avg pooling divide result accordingly. + if(_attributes.exclude_padding()) + { + code += R"_( + res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; +)_"; + } + else + { + code += R"_( + res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4; +)_"; + } + } + + // Store result + if(fp_mixed_precision) + { + code += R"_( + VEC_DATA_TYPE({{DATA_TYPE}}, N0) + res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); + STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); +)_"; + } + else + { + code += R"_( + STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); +)_"; + } + + code += R"_( + //------------------ END KERNEL {{meta_kernel_id}} --------------------- +} +#undef SELECT_TYPE +)_"; + + return code; +} + +void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const +{ + vtable.declare_variable( + comp_group, + _src, + GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), + "src"); + + vtable.declare_variable( + comp_group, + _dst, + GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), + "dst"); +} + +TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const +{ + ARM_COMPUTE_UNUSED(comp_group); + + TagLUT lut{}; + // Arguments and global shared variables + lut["src"] = vtable.get_variable(_src); + lut["dst"] = vtable.get_variable(_dst); + + // Local build options + lut["meta_kernel_id"] = id(); + + // Retrieve relevant data + const auto padding = _attributes.pad(); + const auto stride = _attributes.stride(); + const auto pool_size = _attributes.pool_size(); + const auto data_type = _src->data_type(); + const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX; + + // pool specific + lut["STRIDE_X"] = stride.x(); + lut["STRIDE_Y"] = stride.y(); + lut["PAD_X"] = padding.left; + lut["PAD_Y"] = padding.top; + lut["POOL_SIZE_X"] = pool_size.width; + lut["POOL_SIZE_Y"] = pool_size.height; + + // Datatypes and variables + lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type((use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use. + lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); + lut["SRC_WIDTH"] = _src->dimension(width_idx); + lut["SRC_HEIGHT"] = _src->dimension(height_idx); + lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? float_to_string_with_full_precision(std::numeric_limits::lowest()) : std::string("0"); + + // Tensor specific data + lut["DST_HEIGHT"] = _dst->dimension(height_idx); + + return lut; +} + +CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const +{ + const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); + const unsigned int n0 = root_window.x().step(); + const unsigned int partial_store_n0 = _dst->dimension(0) % n0; + + CLBuildOptions build_opts{}; + build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); + build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); + + return build_opts; +} + +std::string ClTemplatePool2d::get_config_id() const +{ + const DataType data_type = _src->data_type(); + const DataLayout data_layout = _src->data_layout(); + + std::string config_id{}; + config_id += "pooling_layer_2d_"; + config_id += lower_string(string_from_data_type(data_type)); + config_id += "_"; + config_id += lower_string(string_from_data_layout(data_layout)); + config_id += "_"; + config_id += support::cpp11::to_string(_dst->dimension(width_idx)); + config_id += "_"; + config_id += support::cpp11::to_string(_dst->dimension(height_idx)); + config_id += "_"; + config_id += support::cpp11::to_string(_dst->dimension(channel_idx)); + + return config_id; +} + +std::set ClTemplatePool2d::get_headers_list() const +{ + return std::set{ "helpers.h", "tile_helpers.h", "repeat.h" }; +} + +Window ClTemplatePool2d::get_window() const +{ + ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); + const auto output_shape = _dst->tensor_shape(); + const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0)); + + // Create and configure kernel window + auto win = calculate_max_window(output_shape, Steps(vec_size)); + win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size. + return win; +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h new file mode 100644 index 0000000000..ef1c100f44 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D +#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D + +#include "arm_compute/core/experimental/Types.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" +#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +class ClTemplatePool2d final : public IGpuTemplateComponentWriter +{ +public: + using Attributes = ClComponentPool2d::Attributes; + using Settings = ClComponentPool2d::Settings; + /** Constructor + * + * @param[in] id Component id + * @param[in] tensors Tensor arguments to the components + * @param[in] attributes Component attributes + * @param[in] settings Component settings + */ + ClTemplatePool2d(ComponentId id, + const ArgumentPack &tensors, + const Attributes &attributes, + const Settings &settings); + + /** Prevent instances of this class from being copy constructed */ + ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete; + + /** Prevent instances of this class from being copied */ + ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete; + + /** Allow instances of this class to be move constructed */ + ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default; + + /** Allow instances of this class to be moved */ + ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default; + + /** Generate kernel component name */ + std::string get_name() const override; + + /** Generate kernel component code template + * + * @param[in] comp_group Component group of which the component is a part of + * + * @return std::string Component code + */ + std::string get_component_code(const ComponentGroup &comp_group) const override; + /** Declare all variables used by the component in the @p vtable + * + * @param[out] vtable Variable table + * @param[in] comp_group Component group of which the component is a part of + */ + void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; + /** Generate the tag look-up table used to instantiate the component code. + * + * @param[in] vtable Variable table + * @param[in] comp_group Component group of which the component is a part of + * + * @return TagLUT Tag lookup table + */ + TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; + /** Generate the build options used in the component + * + * @param[in] comp_group Component group of which the component is a part of + * + * @return CLBuildOptions Build options + */ + CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; + + /** Generate the component config id string used for tuning */ + std::string get_config_id() const override; + + /** Generate the header list used in the component */ + std::set get_headers_list() const override; + + /** Generate the execution window for the component */ + Window get_window() const override; + +private: + /** Generate pooling kernel template code optimized for 2x2 pooling + * + * @return std::String Component code + */ + std::string get_2x2_kernel_code() const; + + /** Generate generalised pooling kernel template code for MxN pooling + * + * @return std::String Component code + */ + std::string get_MxN_kernel_code() const; + + const ITensorInfo *_src; + const ITensorInfo *_dst; + Attributes _attributes; + Settings _settings; +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */ diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h index 063dbdc44e..d317ec7fd6 100644 --- a/src/dynamic_fusion/utils/Utils.h +++ b/src/dynamic_fusion/utils/Utils.h @@ -25,6 +25,7 @@ #define SRC_DYNAMIC_FUSION_UTILS_UTILS #include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" namespace arm_compute { @@ -32,25 +33,37 @@ namespace experimental { namespace dynamic_fusion { -bool is_user_tensor(const ITensorInfo *tensor_info) +inline bool is_user_tensor(const ITensorInfo *tensor_info) { return tensor_info->id() > ITensorInfo::invalid_tensor_id; } -bool is_intermediate_tensor(const ITensorInfo *tensor_info) +inline bool is_intermediate_tensor(const ITensorInfo *tensor_info) { return tensor_info->id() < ITensorInfo::invalid_tensor_id; } -bool is_valid_tensor(const ITensorInfo *tensor_info) +inline bool is_valid_tensor(const ITensorInfo *tensor_info) { return tensor_info->has_valid_id(); } -bool is_invalid_tensor(const ITensorInfo *tensor_info) +inline bool is_invalid_tensor(const ITensorInfo *tensor_info) { return !is_valid_tensor(tensor_info); } + +/** Inline function to convert @ref Pool2dAttributes to PoolingLayerInfo +*/ +inline PoolingLayerInfo convert_pool_attr_to_pool_info(const Pool2dAttributes &pool_attr, bool mixed_precision = false, DataLayout data_layout = DataLayout::NHWC) +{ + // Create PadStrideInfo + const Size2D stride = pool_attr.stride(); + const Padding2D padding = pool_attr.pad(); + const PadStrideInfo pad_stride(stride.x(), stride.y(), padding.left, padding.top, arm_compute::DimensionRoundingType::FLOOR); + + return PoolingLayerInfo(pool_attr.pool_type(), pool_attr.pool_size(), data_layout, pad_stride, pool_attr.exclude_padding(), mixed_precision); +} } } } diff --git a/tests/datasets/dynamic_fusion/PoolingLayerDataset.h b/tests/datasets/dynamic_fusion/PoolingLayerDataset.h new file mode 100644 index 0000000000..c4911f4940 --- /dev/null +++ b/tests/datasets/dynamic_fusion/PoolingLayerDataset.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "utils/TypePrinter.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" + + +using Pool2dAttributes = arm_compute::experimental::dynamic_fusion::Pool2dAttributes; + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ + +class DynamicFusionPoolingLayerDataset +{ +public: + using type = std::tuple; + + struct iterator + { + iterator(std::vector::const_iterator src_it, + std::vector::const_iterator infos_it) + : _src_it{ std::move(src_it) }, + _infos_it{ std::move(infos_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "In=" << *_src_it << ":"; + description << "Info=" << *_infos_it << ":"; + return description.str(); + } + + DynamicFusionPoolingLayerDataset::type operator*() const + { + return std::make_tuple(*_src_it, *_infos_it); + } + + iterator &operator++() + { + ++_src_it; + ++_infos_it; + + return *this; + } + + private: + std::vector::const_iterator _src_it; + std::vector::const_iterator _infos_it; + }; + + iterator begin() const + { + return iterator(_src_shapes.begin(), _infos.begin()); + } + + int size() const + { + return std::min(_src_shapes.size(), _infos.size()); + } + + void add_config(TensorShape src, Pool2dAttributes info) + { + _src_shapes.emplace_back(std::move(src)); + _infos.emplace_back(std::move(info)); + } + +protected: + DynamicFusionPoolingLayerDataset() = default; + DynamicFusionPoolingLayerDataset(DynamicFusionPoolingLayerDataset &&) = default; + +private: + std::vector _src_shapes{}; + std::vector _infos{}; +}; + +// Special pooling dataset +class PoolingLayerDatasetSpecialDynamicFusion final : public DynamicFusionPoolingLayerDataset +{ +public: + PoolingLayerDatasetSpecialDynamicFusion() + { + // NCHW DataLayout + // Special cases + add_config(TensorShape(2U, 3U, 4U, 1U), Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).stride(Size2D(3,3))); + add_config(TensorShape(60U, 52U, 3U, 2U), Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(100,100)).stride(Size2D(5,5)).pad(Padding2D(50,50,50,50))); + // Asymmetric padding + add_config(TensorShape(112U, 112U, 32U), Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(3,3)).pad(Padding2D(0,1,0,1)).stride(Size2D(2,2))); + add_config(TensorShape(14U, 14U, 832U), Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(2,2)).stride(Size2D(1,1)).pad(Padding2D(0,0,0,0))); + + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute \ No newline at end of file diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp new file mode 100644 index 0000000000..a7772aef4d --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/datasets/dynamic_fusion/PoolingLayerDataset.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(POOL2D) + +constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ +constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ + +const auto PoolingLayerDatasetFP = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3) })), + framework::dataset::make("Pad", { Padding2D() })), + framework::dataset::make("Stride", { Size2D(1, 1), Size2D(2, 1), Size2D(5, 7) })), + framework::dataset::make("ExcludePadding", { true })); + +const auto pool_fp_mixed_precision_dataset = framework::dataset::make("FpMixedPrecision", { true, false }); + +template +using DynamicFusionGpuPool2dFixture = DynamicFusionGpuPool2dValidationFixture; + +template +using DFSpecialGpuPool2dFixture = DynamicFusionGpuPool2dSpecialValidationFixture; + +template +using DFPoolMixedPrecisionFixture = DynamicFusionGpuPool2dMixedPrecisionValidationFixture; +// *INDENT-OFF* +// clang-format off + +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid pad/size combination + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid pad/size combination + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Invalid parameters, unsupported pooling + TensorInfo(TensorShape(5U, 15U, 13U), 1, DataType::F32, DataLayout::NHWC), // Valid Non-rectangular Global Pooling + TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid output Global Pooling + TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Invalid - Quantized not supported. + TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC), // Valid global pooling + TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout + }), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 30U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 25U, 16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 1U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 12U, 12U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 1U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("Pool2dAttributes", { + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(3,3)).pad(Padding2D(0,0,0,0)).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).pad(Padding2D(2,2,0,0)).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).pad(Padding2D(0,0,2,2)).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::L2).pool_size(Size2D(3,3)).pad(Padding2D(0,0,0,0)).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(15U, 13U)), + Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(13U, 13U)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).pad(Padding2D()).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(13U,13U)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(13U,13U)), + })), + framework::dataset::make("Expected", { false, false, false, false, true, false, false, true, false })), + input_info, output_info, pool2d_attr, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Declare GpuPool2d settings + const GpuPool2dSettings &settings = GpuPool2dSettings().mixed_precision(false); + + // Validate Pool2d Configuration + auto src_info = sketch.create_tensor_info(input_info); + auto dst_info = sketch.create_tensor_info(output_info); + bool res = bool(GpuPool2d::validate_op(sketch, &src_info, &dst_info, pool2d_attr, settings)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +// clang-format on +// *INDENT-ON* + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuPool2dFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), PoolingLayerDatasetFP), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuPool2dFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), PoolingLayerDatasetFP), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSpecial, DFSpecialGpuPool2dFixture, framework::DatasetMode::ALL, combine(datasets::PoolingLayerDatasetSpecialDynamicFusion(), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(GlobalPooling) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuPool2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(27, 13) })), + framework::dataset::make("Pad", { Padding2D() })), + framework::dataset::make("Stride", { Size2D(1, 1) })), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuPool2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(79, 37) })), + framework::dataset::make("Pad", { Padding2D() })), + framework::dataset::make("Stride", { Size2D(1, 1) })), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, DFPoolMixedPrecisionFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), PoolingLayerDatasetFP), + framework::dataset::make("DataType", DataType::F16)), + pool_fp_mixed_precision_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, DFPoolMixedPrecisionFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), PoolingLayerDatasetFP), + framework::dataset::make("DataType", DataType::F16)), + pool_fp_mixed_precision_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +TEST_SUITE(GlobalPooling) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuPool2dFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(27, 13) })), + framework::dataset::make("Pad", { Padding2D() })), + framework::dataset::make("Stride", { Size2D(1, 1) })), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuPool2dFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(79, 37) })), + framework::dataset::make("Pad", { Padding2D() })), + framework::dataset::make("Stride", { Size2D(1, 1) })), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // FLOAT + +TEST_SUITE_END() // POOL2D +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} +} +} diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h new file mode 100644 index 0000000000..efb67f8b11 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE +#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" +#include "src/dynamic_fusion/utils/Utils.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/PoolingLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type, bool mixed_precision) + { + _target = compute_target(input_shape, pool_attr, data_type, mixed_precision); + _reference = compute_reference(input_shape, convert_pool_attr_to_pool_info(pool_attr, mixed_precision), data_type); + } + +protected: + template + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type, bool mixed_precision) + { + CLScheduler::get().default_reinit(); + + // Change shape due to NHWC data layout, test shapes are NCHW + permute(input_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Create sketch tensors + auto input_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC)); + auto dst_info = sketch.create_tensor_info(); + + // Create Pool2dSettings + GpuPool2dSettings pool_settings = GpuPool2dSettings().mixed_precision(mixed_precision); + + FunctionType::create_op(sketch, &input_info, &dst_info, pool_attr, pool_settings); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for(auto &data : runtime.get_auxiliary_tensors()) + { + auto tensor = data.first; + const auto aux_mem_req = data.second; + tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(input_info); + t_dst.allocator()->init(dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + + // Run runtime + runtime.run({ &t_input, &t_dst }); + return t_dst; + } + + SimpleTensor compute_reference(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type) + { + // Create reference + SimpleTensor src(shape, data_type, 1, QuantizationInfo()); + // Fill reference + fill(src, 0); + return reference::pooling_layer(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW); + } + + TensorType _target{}; + SimpleTensor _reference{}; +}; + +template +class DynamicFusionGpuPool2dValidationFixture : public DynamicFusionGpuPool2dValidationGenericFixture +{ +public: + template + void setup(TensorShape input_shape, PoolingType pool_type, Size2D pool_size, Padding2D pad, Size2D stride, bool exclude_padding, DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture::setup(input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(exclude_padding), + data_type, false); + } +}; + +template +class DynamicFusionGpuPool2dMixedPrecisionValidationFixture : public DynamicFusionGpuPool2dValidationGenericFixture +{ +public: + template + void setup(TensorShape input_shape, PoolingType pool_type, Size2D pool_size, Padding2D pad, Size2D stride, bool exclude_padding, DataType data_type, bool mixed_precision) + { + DynamicFusionGpuPool2dValidationGenericFixture::setup(input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(exclude_padding), + data_type, mixed_precision); + } +}; + +template +class DynamicFusionGpuPool2dSpecialValidationFixture : public DynamicFusionGpuPool2dValidationGenericFixture +{ +public: + template + void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture::setup(input_shape, pool_attr, data_type, false); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE */ diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp index 9e671e3173..378d91d829 100644 --- a/tests/validation/reference/PoolingLayer.cpp +++ b/tests/validation/reference/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,6 @@ using namespace arm_compute::misc::shape_calculator; template ::value, int>::type> SimpleTensor pooling_layer_internal(const SimpleTensor &src, const PoolingLayerInfo &info, SimpleTensor *indices, DataLayout data_layout) { - ARM_COMPUTE_ERROR_ON(info.is_global_pooling && (src.shape().x() != src.shape().y())); // Create reference SimpleTensor dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 }; auto pooled_shape = compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info); diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 8f9c24955e..2978a238e5 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -42,8 +42,10 @@ #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" #include "arm_compute/runtime/CL/CLTunerTypes.h" #include "arm_compute/runtime/CL/CLTypes.h" #include "arm_compute/runtime/FunctionDescriptors.h" @@ -3407,6 +3409,65 @@ inline std::string to_string(const Padding2D &padding2d) return str.str(); } +/** Formatted output of the arm_compute::experimental::dynamic_fusion::Pool2dAttributes type. + * + * @param[out] os Output stream. + * @param[in] pool2d_attr arm_compute::experimental::dynamic_fusion::Pool2dAttributes type to output. + * + * @return Modified output stream. + */ +inline ::std::ostream &operator<<(::std::ostream &os, const experimental::dynamic_fusion::Pool2dAttributes &pool2d_attr) +{ + os << "Pool2dAttributes=" + << "[" + << "PoolingType=" << pool2d_attr.pool_type() << "," + << "PoolSize=" << pool2d_attr.pool_size() << "," + << "Padding=" << pool2d_attr.pad() << "," + << "Stride=" << pool2d_attr.stride() << "," + << "ExcludePadding" << pool2d_attr.exclude_padding() << "]"; + + return os; +} + +/** Formatted output of the arm_compute::experimental::dynamic_fusion::Pool2dAttributes type. + * + * @param[in] pool2d_attr arm_compute::experimental::dynamic_fusion::Pool2dAttributes type to output. + * + * @return Formatted string. + */ +inline std::string to_string(const experimental::dynamic_fusion::Pool2dAttributes &pool2d_attr) +{ + std::stringstream str; + str << pool2d_attr; + return str.str(); +} + +/** Formatted output of the arm_compute::experimental::dynamic_fusion::GpuPool2dSettings type + * + * @param[out] os Output stream + * @param[in] settings arm_compute::dynamic_fusion::GpuPool2dSettings type to output + */ +inline ::std::ostream &operator<<(::std::ostream &os, const experimental::dynamic_fusion::GpuPool2dSettings &settings) +{ + os << "Settings=" + << "[" + << "FPMixedPrecision=" << settings.mixed_precision() << "]"; + return os; +} + +/** Formatted output of the arm_compute::experimental::dynamic_fusion::GpuPool2dSettings type. + * + * @param[in] settings arm_compute::experimental::dynamic_fusion::GpuPool2dSettings type to output. + * + * @return Formatted string. + */ +inline std::string to_string(const experimental::dynamic_fusion::GpuPool2dSettings &settings) +{ + std::stringstream str; + str << settings; + return str.str(); +} + /** Formatted output of the arm_compute::experimental::dynamic_fusion::Conv2dAttributes type. * * @param[out] os Output stream. @@ -3424,6 +3485,7 @@ inline ::std::ostream &operator<<(::std::ostream &os, const experimental::dynami return os; } + /** Formatted output of the arm_compute::experimental::dynamic_fusion::Conv2dAttributes type. * * @param[in] conv2d_attr arm_compute::experimental::dynamic_fusion::Conv2dAttributes type to output. -- cgit v1.2.1