From c2a51bd2cc7c4148d9444e7377af44b2f6c264ba Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Thu, 28 Sep 2023 10:30:18 +0100 Subject: Optimize CL and Neon Winograd tests Several test optimizations have been introduced into Winograd tests for Gpu and Cpu backends. The testing strategy has been detailed as a comment header in the test design files. In summary - Very large shapes in the nightly are made smaller - If the underlying kernel is the same for different data types, we only need to stress some key aspects of the kernels (e.g. read/write lengths in case of fp32/fp16). - In case the underlying kernel is the same (OpenCL), Fp16 is tested on a subset of the shapes - In Cpu, there is no need to test every combination for both NCHW and NHWC as we just permute the inputs and use NHWC kernels anyways - All activations does not need to be tested for each and every shape Resolves: COMPMID-6464 Change-Id: Ie25fded85c65b9c7386dc21b23f9b695b1e77b07 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10393 Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- .../NEON/functions/NEWinogradConvolutionLayer.h | 9 +- src/cpu/operators/CpuWinogradConv2d.h | 20 +- tests/datasets/LargeConvolutionLayerDataset.h | 211 +++++--- tests/validation/CL/Winograd.cpp | 599 +++++++++++++++------ tests/validation/NEON/ConvolutionLayer.cpp | 451 +++++++++++----- 5 files changed, 913 insertions(+), 377 deletions(-) diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h index 7f4e354362..6caa2aeb59 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H -#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H #include "arm_compute/core/Types.h" #include "arm_compute/function_info/ActivationLayerInfo.h" @@ -77,7 +77,8 @@ public: * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. + * Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32 + * -> 3x3 for Fp16 * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. @@ -117,4 +118,4 @@ private: std::unique_ptr _impl; }; } // namespace arm_compute -#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */ +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H diff --git a/src/cpu/operators/CpuWinogradConv2d.h b/src/cpu/operators/CpuWinogradConv2d.h index 7e1d952462..ba9b879431 100644 --- a/src/cpu/operators/CpuWinogradConv2d.h +++ b/src/cpu/operators/CpuWinogradConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,16 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_WINOGRAD_CONV2D_KERNEL_H -#define ARM_COMPUTE_CPU_WINOGRAD_CONV2D_KERNEL_H +#ifndef ACL_SRC_CPU_OPERATORS_CPUWINOGRADCONV2D_H +#define ACL_SRC_CPU_OPERATORS_CPUWINOGRADCONV2D_H #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/FunctionDescriptors.h" #include "src/core/common/Macros.h" #include "src/cpu/ICpuOperator.h" -#include "src/cpu/kernels/assembly/gemm_common.hpp" #include "src/cpu/kernels/CpuWinogradConv2dKernel.h" +#include "src/cpu/kernels/assembly/gemm_common.hpp" #include "src/cpu/operators/CpuActivation.h" #include "src/cpu/operators/CpuGemm.h" #include "src/cpu/operators/CpuPermute.h" @@ -65,7 +65,7 @@ public: * while every optional dimension from 4 and above represent a batch of inputs. * Data types supported: F16/F32. * @param[in] weights Weights tensor Info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. - * Currently only 3x3 and 5x5 kernels are supported. + * For supported kernel sizes, see @ref arm_compute::NEWinogradConvolutionLayer * @param[in] biases Biases tensor Info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. * @param[out] dst Destination tensor Info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. @@ -96,8 +96,8 @@ public: bool enable_fast_math = false); // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &constants) override; + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; experimental::MemoryRequirements workspace() const override; private: @@ -124,9 +124,9 @@ private: std::unique_ptr _permute_input; std::unique_ptr _permute_output; std::unique_ptr _permute_weights; - experimental::MemoryRequirements _aux_mem{Count}; + experimental::MemoryRequirements _aux_mem{ Count }; std::unique_ptr - _conv_args; // Make it unique ptr because this type does not have a default constructor + _conv_args; // Make it unique ptr because this type does not have a default constructor arm_conv::winograd::WinogradImpl _winograd_impl; DataLayout _data_layout; TensorInfo _winograd_transformed_input; @@ -143,4 +143,4 @@ private: } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_WINOGRAD_CONV2D_KERNEL_H */ +#endif // ACL_SRC_CPU_OPERATORS_CPUWINOGRADCONV2D_H diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 1cffc9a221..72f73ba6d9 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET -#define ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET +#ifndef ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H +#define ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H #include "tests/datasets/ConvolutionLayerDataset.h" @@ -44,18 +44,31 @@ public: { // Kernel size 3 // Batch size 1 - add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 256U), TensorShape(256U), TensorShape(51U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 3U, 256U, 256U), TensorShape(256U), TensorShape(54U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(12U, 12U, 512U), PadStrideInfo(1, 1, 0, 0)); - // Batch size 3, 2 and 4 - add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(110U, 111U, 128U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 128U), TensorShape(128U), TensorShape(51U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 1, 1)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1)); + } +}; + +class LargeWinogradConvolutionLayer3x3DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer3x3DatasetFp16Subset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1)); } }; @@ -66,18 +79,31 @@ public: { // Kernel size 3 // Batch size 1 - add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 1U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 256U), TensorShape(256U), TensorShape(51U, 56U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 1U, 256U, 256U), TensorShape(256U), TensorShape(56U, 56U, 256U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 512U), TensorShape(512U), TensorShape(26U, 28U, 512U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(12U, 14U, 512U), PadStrideInfo(1, 1, 0, 0)); - // Batch size 3, 2 and 4 - add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(110U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 1U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 1U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 1U, 128U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0)); + } +}; + +class LargeWinogradConvolutionLayer3x1DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer3x1DatasetFp16Subset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0)); } }; @@ -88,18 +114,31 @@ public: { // Kernel size 3 // Batch size 1 - add_config(TensorShape(224U, 222U, 64U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(112U, 113U, 64U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(112U, 112U, 128U), TensorShape(1U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 256U), TensorShape(256U), TensorShape(53U, 56U, 256U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(56U, 56U, 256U), TensorShape(1U, 3U, 256U, 256U), TensorShape(256U), TensorShape(56U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(28U, 28U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(14U, 12U, 512U), PadStrideInfo(1, 1, 0, 0)); - // Batch size 3, 2 and 4 - add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(224U, 222U, 32U), TensorShape(1U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 113U, 32U), TensorShape(1U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(1U, 3U, 128U, 128U), TensorShape(128U), TensorShape(56U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1)); + } +}; + +class LargeWinogradConvolutionLayer1x3DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x3DatasetFp16Subset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1)); } }; @@ -110,15 +149,27 @@ public: { // Kernel size 5 // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 32U), TensorShape(32U), TensorShape(220U, 220U, 32U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U, 2U), PadStrideInfo(1, 1, 0, 0)); + // Batch > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2)); + } +}; + +class LargeWinogradConvolutionLayer5x5DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer5x5DatasetFp16Subset() + { + // Kernel size 5 + // Batch size 1 + add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2)); + + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -128,15 +179,26 @@ public: LargeWinogradConvolutionLayer5x1Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 2, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 0)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 2, 0)); + // Batch > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0)); + } +}; + +class LargeWinogradConvolutionLayer5x1DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer5x1DatasetFp16Subset() + { + // Batch size 1 + add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0)); + add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0)); + + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -146,15 +208,12 @@ public: LargeWinogradConvolutionLayer7x1Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(218U, 224U, 64U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 3, 0)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 32U), TensorShape(32U), TensorShape(218U, 224U, 32U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(7U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 3, 0)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 3, 0)); + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 3, 0)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -164,15 +223,26 @@ public: LargeWinogradConvolutionLayer1x7Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 218U, 64U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 0, 3)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 32U), TensorShape(32U), TensorShape(224U, 218U, 32U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3)); + + // Batch > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3)); + } +}; + +class LargeWinogradConvolutionLayer1x7DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x7DatasetFp16Subset() + { + // Batch size 1 add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 3)); + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -182,15 +252,26 @@ public: LargeWinogradConvolutionLayer1x5Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 0, 2)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2)); add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 2)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 2)); + // Batch size > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0)); + } +}; + +class LargeWinogradConvolutionLayer1x5DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x5DatasetFp16Subset() + { + // Batch size 1 + add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2)); + add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); + + // Batch size > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -233,4 +314,4 @@ public: } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET */ +#endif // ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 6ac37d1475..196e7edb8c 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" +#include "tests/datasets/ActivationFunctionsDataset.h" #include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SmallConvolutionLayerDataset.h" @@ -47,6 +48,7 @@ namespace test { namespace validation { +using framework::dataset::make; namespace { // *INDENT-OFF* @@ -57,108 +59,232 @@ const AbsoluteTolerance tolerance_convolution_layer_f16(half(0.4f)); RelativeTolerance rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ constexpr float abs_tolerance_convolution_layer_f16 = 2.5f; /**< Tolerance number */ -constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */ +constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */ -//Activation Functions -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +const auto ActivationFunctionsDataset = make("ActivationInfo", { - ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.8f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU) }); -const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo", + +const auto ActivationFunctionsSmallDataset = make("ActivationInfo", { ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f) }); } // namespace using namespace arm_compute::misc::shape_calculator; +/* + Testing Strategy of CL Winograd: + - For nchw and nhwc and for each kernel size, we have a dedicated OpenCL kernel. + (except 1xN and Nx1 uses NxN under the hood). Therefore, test cases should be + stressed for each of these configurations. + - Fp32 and Fp16 kernels are the same. Only the DATA_TYPE build option changes + between these two. Because the same kernel is stressed thoroughly for both + small and large shapes for Fp32 data type, Fp16 kernels are run on a subset + of the shapes, because we get diminishing returns by exhaustively testing the + same kernel. + - Activations only affect the output stage and it's calculated on the output tile. + Exhaustively testing all activations with all the shapes does not provide much + value but increases the testing time quite significantly. Therefore, all activations + are tested in a subset of the shapes, and for all MxM kernels and data layouts as + they represent different OpenCL kernels. (1xM and Mx1 kernels use MxM under the hood). +*/ TEST_SUITE(CL) TEST_SUITE(Winograd) TEST_SUITE(ConvolutionLayer) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { - TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // Insufficient padding - TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch - TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported - TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed - TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported - }), - framework::dataset::make("WeightsInfo", { - TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) - })), - framework::dataset::make("BiasesInfo", { - TensorInfo(TensorShape(19U), 1, DataType::F16), - TensorInfo(TensorShape(19U), 1, DataType::F32), - TensorInfo(TensorShape(21U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32) - })), - framework::dataset::make("OutputInfo", { - TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16), - TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), - TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) - })), - framework::dataset::make("ConvInfo", { - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(1, 2, 0, 0), - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(1, 1, 1, 0) - })), - framework::dataset::make("Expected", { false, false, false, false, false })), - input_info, weights_info, bias_info, output_info, conv_info, expected) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputInfo", { + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // Insufficient padding + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch + TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported + TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported + }), + make("WeightsInfo", { + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16), + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) + }), + make("BiasesInfo", { + TensorInfo(TensorShape(19U), 1, DataType::F16), + TensorInfo(TensorShape(19U), 1, DataType::F32), + TensorInfo(TensorShape(21U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32) + }), + make("OutputInfo", { + TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16), + TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) + }), + make("ConvInfo", { + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 2, 0, 0), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 0) + }), + make("Expected", { false, false, false, false, false })), + input_info, weights_info, bias_info, output_info, conv_info, expected) { ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS); } +DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip( + make("WeightsInfo", { + // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted + + // Fp32/16, NCHW + // 3x1, 1x3, 3x3 --> all TRUE + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // 7x1, 1x7, 7x7 + // nchw does not support kernels with size 7 --> all FALSE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // Fp32/16, NHWC + // 7x1, 1x7, 7x7 --> all TRUE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + // 3x1, 1x3, 3x3 --> all TRUE + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + }), + make("Expected", { + true, true, true, // nchw, 3x3, 1x3, 3x1 + true, true, true, // nchw, 5x5, 1x5, 5x1 + false, false, false, // nchw, 7x7, 1x7, 7x1 + false, false, false, // nchw, random unsupported kernels + true, true, true, // nhwc, 7x7, 1x7, 7x1 + true, true, true, // nhwc, 3x3, 1x3, 3x1 + true, true, true, // nhwc, 5x5, 1x5, 5x1 + false, false, false, // nchw, random unsupported kernels + })), + weights_info_const, expected) +{ + DataType data_type = weights_info_const.data_type(); + DataLayout data_layout = weights_info_const.data_layout(); + + TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type); + TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type); + TensorInfo weights_info = weights_info_const; + + if(data_layout == DataLayout::NHWC) + { + // Convert to NHWC + PermutationVector perm = PermutationVector(2U, 0U, 1U); + + TensorShape input_shape = input_info.tensor_shape(); + TensorShape weights_shape = weights_info.tensor_shape(); + permute(input_shape, perm); + permute(weights_shape, perm); + + input_info.set_tensor_shape(input_shape); + weights_info.set_tensor_shape(weights_shape); + + input_info.set_data_layout(data_layout); + weights_info.set_data_layout(data_layout); + bias_info.set_data_layout(data_layout); + } + + PadStrideInfo conv_info(1, 1, 0, 0); + + TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info); + TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout); + + Status status = CLWinogradConvolutionLayer::validate( + &input_info, + &weights_info, + &bias_info, + &output_info, + conv_info, + ActivationLayerInfo(), + true /* fast math */); + + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} + TEST_SUITE(FP32) using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture; using CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture; TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(combine(combine( - framework::dataset::make("Input", TensorShape(8U, 8U, 32U)), - framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))), - framework::dataset::make("Bias", TensorShape(1U))), - framework::dataset::make("Output", TensorShape(8U, 6U, 1U))), - framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))), - framework::dataset::make("Dilation", Size2D(1U, 1U))), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(3U, 3U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(6U, 6U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); @@ -167,20 +293,20 @@ TEST_SUITE_END() // Conv3x3 TEST_SUITE(Conv3x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); @@ -189,20 +315,36 @@ TEST_SUITE_END() // Conv3x1 TEST_SUITE(Conv1x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} + +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(1U, 3U, 32U, 1U)), + make("Bias", TensorShape(1U)), + make("Output", TensorShape(8U, 6U, 1U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); @@ -211,10 +353,10 @@ TEST_SUITE_END() // Conv1x3 TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset ), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -222,11 +364,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset ), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(5U, 5U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(9U, 9U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); @@ -235,10 +393,10 @@ TEST_SUITE_END() // Conv5x5 TEST_SUITE(Conv5x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -246,10 +404,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -259,10 +417,10 @@ TEST_SUITE_END() // Conv5x1 TEST_SUITE(Conv1x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -270,16 +428,63 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } TEST_SUITE_END() // Conv1x5 + +TEST_SUITE(Conv1x7) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} + +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(1U, 7U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(13U, 11U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 2)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} +TEST_SUITE_END() // Conv1x7 + +TEST_SUITE(Conv7x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); +} +TEST_SUITE_END() // Conv7x1 + +/** @note: Although 7x7 is in the kernels, reference implementation + * does not support it. So, it remains as a "test gap". + */ + TEST_SUITE_END() // FP32 @@ -288,20 +493,36 @@ TEST_SUITE(FP16) using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture; TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x3DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} + +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(3U, 3U, 32U, 6U)), + make("Bias", TensorShape(6U)), + make("Output", TensorShape(6U, 6U, 6U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -310,20 +531,20 @@ TEST_SUITE_END() // Conv3x3 TEST_SUITE(Conv3x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x1DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -332,20 +553,20 @@ TEST_SUITE_END() // Conv3x1 TEST_SUITE(Conv1x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x3DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -354,10 +575,10 @@ TEST_SUITE_END() // Conv1x3 TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -365,23 +586,39 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x5DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } + +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(5U, 5U, 32U, 6U)), + make("Bias", TensorShape(6U)), + make("Output", TensorShape(9U, 9U, 6U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} TEST_SUITE_END() // Conv5x5 TEST_SUITE(Conv5x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -389,10 +626,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x1DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -402,10 +639,10 @@ TEST_SUITE_END() // Conv5x1 TEST_SUITE(Conv1x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -413,10 +650,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x5DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -426,10 +663,10 @@ TEST_SUITE_END() // Conv1x5 TEST_SUITE(Conv1x7) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -437,16 +674,46 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x7DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(1U, 7U, 32U, 6U)), + make("Bias", TensorShape(6U)), + make("Output", TensorShape(13U, 7U, 6U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv1x7 + +TEST_SUITE(Conv7x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); +} +TEST_SUITE_END() // Conv7x1 + TEST_SUITE_END() // FP16 TEST_SUITE_END() // ConvolutionLayer TEST_SUITE_END() // Winograd diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 06fe9f7803..2f66100fb6 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -28,15 +28,16 @@ #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" + +#include "src/core/CPP/Validate.h" #include "src/core/helpers/MemoryHelpers.h" #include "src/cpu/operators/CpuGemmConv2d.h" #include "src/cpu/operators/CpuGemmDirectConv2d.h" #include "src/cpu/operators/CpuWinogradConv2d.h" + #include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" #include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/SmallConvolutionLayerDataset.h" -#include "tests/datasets/TinyConvolutionLayerDataset.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -50,6 +51,8 @@ namespace test { namespace validation { +using framework::dataset::make; + namespace detail { template <> @@ -85,13 +88,13 @@ constexpr float tolerance_num = 0.07f; #ifdef ARM_COMPUTE_ENABLE_SME // TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode. // Temporarily increase the tolerance for quantized data. -constexpr AbsoluteTolerance tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ -#else // ARM_COMPUTE_ENABLE_SME -constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ -#endif // ARM_COMPUTE_ENABLE_SME +constexpr AbsoluteTolerance tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +#else // ARM_COMPUTE_ENABLE_SME +constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +#endif // ARM_COMPUTE_ENABLE_SME /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -99,14 +102,36 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, DataType::QASYMM8, }); -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +const auto ActivationFunctionsDataset = make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f) }); -const auto QuantizationData = framework::dataset::make("QuantizationInfo", +const auto ActivationFunctionsDatasetNightly = make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), +#ifdef __aarch64__ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU), +#endif // __aarch64__ +}); + +const auto QuantizationData = make("QuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(0.3f, 3), @@ -121,32 +146,32 @@ TEST_SUITE(ConvolutionLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32), + make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32), TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32), TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), + make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32), TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32), TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(3, 2, 1, 0) })), - framework::dataset::make("FastMath", { true, + make("FastMath", { true, true, false, false })), - framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), + make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), input_info, weights_info, output_info, conv_info, fast_math, expected) { ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true), @@ -158,6 +183,14 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z // *INDENT-ON* TEST_SUITE_END() // ConvolutionLayer +/* + Testing Strategy of Neon Winograd: + - There is no need to thoroughly test nchw cases because winograd kernels accept + nhwc and the tensors are permuted before and after if they're nchw. + - Except relu and bounded relu, testing activations for a single input + combination is enough because activation is not fused into winograd and called + separately. +*/ TEST_SUITE(WinogradLayer) template using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture; @@ -269,38 +302,148 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) } } +DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip( + make("WeightsInfo", +{ + // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted + + // Fp32, NCHW/NHWC (layout does not matter as it's ) + // 3x1, 1x3, 3x3 --> all TRUE + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // 7x1, 1x7, 7x7 + // --> all FALSE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // Fp16 + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + + // 7x1, 1x7, 7x7 + // --> all FALSE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + +}), +make("Expected", +{ + // fp32 + true, true, true, // 3x3, 1x3, 3x1 + true, true, true, // 5x5, 1x5, 5x1 + false, true, true, // 7x7, 1x7, 7x1 + false, false, false, // random unsupported kernels + + // fp16 + true, false, false, // 3x3, 1x3, 3x1 + false, false, false, // 5x5, 1x5, 5x1 + false, false, false, // 7x7, 1x7, 7x1 + false, false, false, // random unsupported kernels +})), +weights_info_const, expected_const) +{ + DataType data_type = weights_info_const.data_type(); + DataLayout data_layout = weights_info_const.data_layout(); + + TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type); + TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type); + TensorInfo weights_info = weights_info_const; + + if(data_layout == DataLayout::NHWC) + { + // Convert to NHWC + PermutationVector perm = PermutationVector(2U, 0U, 1U); + + TensorShape input_shape = input_info.tensor_shape(); + TensorShape weights_shape = weights_info.tensor_shape(); + permute(input_shape, perm); + permute(weights_shape, perm); + + input_info.set_tensor_shape(input_shape); + weights_info.set_tensor_shape(weights_shape); + + input_info.set_data_layout(data_layout); + weights_info.set_data_layout(data_layout); + bias_info.set_data_layout(data_layout); + } + + PadStrideInfo conv_info(1, 1, 0, 0); + + TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info); + TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout); + + Status status = NEWinogradConvolutionLayer::validate( + &input_info, + &weights_info, + &bias_info, + &output_info, + conv_info, + ActivationLayerInfo(), + true /* fast math */); + + Status fp16_supported = ::arm_compute::error_on_unsupported_cpu_fp16("N/A", "N/A", 0, &input_info); + bool expected = expected_const && static_cast(fp16_supported); + + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} + TEST_SUITE(FP32) TEST_SUITE(Conv1x3) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(combine(combine( - framework::dataset::make("Input", TensorShape(8U, 8U, 32U)), - framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))), - framework::dataset::make("Bias", TensorShape(1U))), - framework::dataset::make("Output", TensorShape(8U, 6U, 1U))), - framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))), - framework::dataset::make("Dilation", Size2D(1U, 1U))), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(1U, 3U, 32U, 1U)), + make("Bias", TensorShape(1U)), + make("Output", TensorShape(8U, 6U, 1U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -310,19 +453,19 @@ TEST_SUITE_END() // Conv1x3 TEST_SUITE(Conv3x1) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -332,19 +475,19 @@ TEST_SUITE_END() // Conv3x1 TEST_SUITE(Conv1x5) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -354,19 +497,19 @@ TEST_SUITE_END() // Conv1x5 TEST_SUITE(Conv5x1) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -376,10 +519,10 @@ TEST_SUITE_END() // Conv5x1 TEST_SUITE(Conv7x1) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); @@ -387,9 +530,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + make("DataType", { DataType::F32 })), + make("ActivationInfo", { ActivationLayerInfo() })), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -398,20 +541,20 @@ TEST_SUITE_END() // Conv7x1 TEST_SUITE(Conv1x7) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -420,20 +563,40 @@ TEST_SUITE_END() // Conv1x7 TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + +{ + // Validate output + validate(Accessor(_target), _reference, abs_tolerance_f32); +} +/// It's enough to run the activations for a single weight/input combination and data type because +/// activation function is called on top of the winograd output as a separate operator +/// TODO: Enable after COMPMID-6573 is resolved +FIXTURE_DATA_TEST_CASE(RunActivations, NEWinogradConvolutionLayerFixture, framework::DatasetMode::DISABLED, + combine( + make("Input", TensorShape(3U, 3U, 32U)), + make("Weight", TensorShape(3U, 3U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(1U, 1U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDatasetNightly, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } + FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -444,20 +607,20 @@ TEST_SUITE_END() // Conv3x3 TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -467,12 +630,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture, frame TEST_SUITE_END() // Conv5x5 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - datasets::SmallWinogradConvolutionLayer5x5Dataset()), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(framework::dataset::concat( + datasets::SmallWinogradConvolutionLayer3x3Dataset(), + datasets::SmallWinogradConvolutionLayer5x5Dataset()), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); @@ -484,24 +647,26 @@ TEST_SUITE_END() // FP32 TEST_SUITE(FP16) using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture; -DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16), - TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16) - }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16) - })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16) - })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0) - })), - framework::dataset::make("FastMath", { false, // case fp16 and fast_math False then disable Winograd - true // case fp16 and fast_math True then enable Winograd - })), - framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })), - input_info, weights_info, output_info, conv_info, fast_math, expected) +DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip( + make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16), + TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16) + }), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16), + TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16) + }), + make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16) + }), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0) + }), + make("FastMath", +{ + false, // case fp16 and fast_math False then disable Winograd + true // case fp16 and fast_math True then enable Winograd +}), +make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })), +input_info, weights_info, output_info, conv_info, fast_math, expected) { ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true), &weights_info.clone()->set_is_resizable(true), @@ -511,10 +676,10 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -522,10 +687,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -968,7 +1133,9 @@ TEST_SUITE(Float) #if defined(ARM_COMPUTE_ENABLE_BF16) TEST_SUITE(BFLOAT16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::BFLOAT16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { // Validate output @@ -980,7 +1147,10 @@ TEST_SUITE_END() // BFLOAT16 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); @@ -990,7 +1160,9 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { // Validate output @@ -1032,8 +1204,11 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -1059,8 +1234,11 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), + QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -1214,7 +1392,10 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); @@ -1238,8 +1419,11 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -1248,8 +1432,11 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset)) + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), + QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); -- cgit v1.2.1