diff options
Diffstat (limited to 'tests')
476 files changed, 35603 insertions, 15291 deletions
diff --git a/tests/AssetsLibrary.cpp b/tests/AssetsLibrary.cpp index 62de78cf26..571b55125b 100644 --- a/tests/AssetsLibrary.cpp +++ b/tests/AssetsLibrary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -524,13 +524,14 @@ inline void validate_npy_header(std::ifstream &stream, const std::string &expect ARM_COMPUTE_UNUSED(expect_typestr); ARM_COMPUTE_UNUSED(expect_shape); - std::string header = npy::read_header(stream); + std::string header_s = npy::read_header(stream); // Parse header - std::vector<unsigned long> shape; - bool fortran_order = false; - std::string typestr; - npy::parse_header(header, typestr, fortran_order, shape); + npy::header_t header = npy::parse_header(header_s); + + std::vector<unsigned long> shape = header.shape; + bool fortran_order = header.fortran_order; + std::string typestr = header.dtype.str(); // Check if the typestring matches the given one ARM_COMPUTE_ERROR_ON_MSG(typestr != expect_typestr, "Typestrings mismatch"); diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index f465577372..bd97cb7bd4 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -406,6 +406,17 @@ public: template <typename T, typename DataType> void fill_static_values(T &&tensor, const std::vector<DataType> &values) const; + // Function type to generate a number to fill tensors. + template <typename ResultType> + using GeneratorFunctionType = std::function<ResultType(void)>; + /** Fill a tensor with a value generator function. + * + * @param[in, out] tensor To be filled tensor. + * @param[in] generate_value A function that generates values. + */ + template <typename T, typename ResultType> + void fill_with_generator(T &&tensor, const GeneratorFunctionType<ResultType> &generate_value) const; + private: // Function prototype to convert between image formats. using Converter = void (*)(const RawTensor &src, RawTensor &dst); @@ -413,9 +424,6 @@ private: using Extractor = void (*)(const RawTensor &src, RawTensor &dst); // Function prototype to load an image file. using Loader = RawTensor (*)(const std::string &path); - // Function type to generate a number to fill tensors. - template <typename ResultType> - using GeneratorFunctionType = std::function<ResultType(void)>; const Converter &get_converter(Format src, Format dst) const; const Converter &get_converter(DataType src, Format dst) const; @@ -460,14 +468,6 @@ private: */ const RawTensor &find_or_create_raw_tensor(const std::string &name, Format format, Channel channel) const; - /** Fill a tensor with a value generator function. - * - * @param[in, out] tensor To be filled tensor. - * @param[in] generate_value A function that generates values. - */ - template <typename T, typename ResultType> - void fill_with_generator(T &&tensor, const GeneratorFunctionType<ResultType> &generate_value) const; - mutable TensorCache _cache{}; mutable arm_compute::Mutex _format_lock{}; mutable arm_compute::Mutex _channel_lock{}; @@ -725,7 +725,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t case DataType::U8: case DataType::QASYMM8: { - std::uniform_int_distribution<uint8_t> distribution_u8(std::numeric_limits<uint8_t>::lowest(), std::numeric_limits<uint8_t>::max()); + std::uniform_int_distribution<unsigned int> distribution_u8(std::numeric_limits<uint8_t>::lowest(), std::numeric_limits<uint8_t>::max()); fill(tensor, distribution_u8, seed_offset); break; } @@ -734,7 +734,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t case DataType::QSYMM8_PER_CHANNEL: case DataType::QASYMM8_SIGNED: { - std::uniform_int_distribution<int8_t> distribution_s8(std::numeric_limits<int8_t>::lowest(), std::numeric_limits<int8_t>::max()); + std::uniform_int_distribution<int> distribution_s8(std::numeric_limits<int8_t>::lowest(), std::numeric_limits<int8_t>::max()); fill(tensor, distribution_s8, seed_offset); break; } @@ -826,20 +826,20 @@ void AssetsLibrary::fill_tensor_uniform_ranged(T case DataType::U8: case DataType::QASYMM8: { - const auto converted_pairs = detail::convert_range_pair<uint8_t>(excluded_range_pairs); - RangedUniformDistribution<uint8_t> distribution_u8(std::numeric_limits<uint8_t>::lowest(), - std::numeric_limits<uint8_t>::max(), - converted_pairs); + const auto converted_pairs = detail::convert_range_pair<uint32_t>(excluded_range_pairs); + RangedUniformDistribution<uint32_t> distribution_u8(std::numeric_limits<uint8_t>::lowest(), + std::numeric_limits<uint8_t>::max(), + converted_pairs); fill(tensor, distribution_u8, seed_offset); break; } case DataType::S8: case DataType::QSYMM8: { - const auto converted_pairs = detail::convert_range_pair<int8_t>(excluded_range_pairs); - RangedUniformDistribution<int8_t> distribution_s8(std::numeric_limits<int8_t>::lowest(), - std::numeric_limits<int8_t>::max(), - converted_pairs); + const auto converted_pairs = detail::convert_range_pair<int32_t>(excluded_range_pairs); + RangedUniformDistribution<int32_t> distribution_s8(std::numeric_limits<int8_t>::lowest(), + std::numeric_limits<int8_t>::max(), + converted_pairs); fill(tensor, distribution_s8, seed_offset); break; } @@ -918,7 +918,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t case DataType::QASYMM8: { ARM_COMPUTE_ERROR_ON(!(std::is_same<uint8_t, D>::value)); - std::uniform_int_distribution<uint8_t> distribution_u8(low, high); + std::uniform_int_distribution<uint32_t> distribution_u8(low, high); fill(tensor, distribution_u8, seed_offset); break; } @@ -927,7 +927,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t case DataType::QASYMM8_SIGNED: { ARM_COMPUTE_ERROR_ON(!(std::is_same<int8_t, D>::value)); - std::uniform_int_distribution<int8_t> distribution_s8(low, high); + std::uniform_int_distribution<int32_t> distribution_s8(low, high); fill(tensor, distribution_s8, seed_offset); break; } diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel new file mode 100644 index 0000000000..5763938d3c --- /dev/null +++ b/tests/BUILD.bazel @@ -0,0 +1,158 @@ +# Copyright (c) 2023 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +#--------------------------------------------------------------------- +# Validation Framework Library + +cc_library( + name = "validation_framework", + srcs = glob([ + "validation/reference/*.cpp", + "validation/*.cpp", + "*.h", + ]), + hdrs = glob([ + "validation/reference/*.h", + "validation/**/*.h", + ]), + copts = [] + select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }) + select({ + "//:debug_flag": [ + "-O0", + "-g", + "-gdwarf-2", + ], + "//conditions:default": ["-O3"], + }) + + select({ + "//:openmp_flag": ["-fopenmp"], + "//conditions:default": [], + }) + + select({ + "//:Werror_flag": ["-Werror"], + "//conditions:default": [], + }), + linkstatic = True, + deps = [ + "//:arm_compute", + "//:common_defines", + "//tests/framework", + ], +) + +#--------------------------------------------------------------------- +# Validation Binary +cc_binary( + name = "arm_compute_validation", + srcs = glob([ + "validation/UNIT/**/*.cpp", + "validation/CPP/**/*.cpp", + "NEON/*.h", + "validation/NEON/**/*.cpp", + "validation/NEON/**/*.h", + "*.cpp", + "datasets/*.h", + "instruments/*.h", + ]), + copts = [] + select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }) + select({ + "//:debug_flag": [ + "-O0", + "-g", + "-gdwarf-2", + ], + "//conditions:default": ["-O3"], + }) + + select({ + "//:openmp_flag": ["-fopenmp"], + "//conditions:default": [], + }) + + select({ + "//:Werror_flag": ["-Werror"], + "//conditions:default": [], + }), + linkstatic = True, + deps = [ + ":validation_framework", + "//:arm_compute", + "//:arm_compute_graph", + "//:common_defines", + "//tests/framework", + ], + local_defines = [] + + select({ + "//:bf16_validation_flag": [ + "ARM_COMPUTE_ENABLE_BF16", + ], + "//conditions:default": [], + }) + + select({ + "//:sve_validation_flag": [ + "ENABLE_SVE", + "ARM_COMPUTE_ENABLE_SVE", + ], + "//conditions:default": [], + }) +) + +#--------------------------------------------------------------------- +# Benchmark Binary +cc_binary( + name = "arm_benchmark", + srcs = glob([ + "benchmark/fixtures/*.h", + "benchmark/NEON/*.cpp", + "*.cpp", + ]), + copts = [] + select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }) + select({ + "//:debug_flag": [ + "-O0", + "-g", + "-gdwarf-2", + ], + "//conditions:default": ["-O3"], + }) + + select({ + "//:openmp_flag": ["-fopenmp"], + "//conditions:default": [], + }) + + select({ + "//:Werror_flag": ["-Werror"], + "//conditions:default": [], + }), + linkstatic = True, + deps = [ + ":arm_compute_validation", + ":validation_framework", + "//:arm_compute", + ], +) diff --git a/tests/CL/CLHOGAccessor.h b/tests/CL/CLHOGAccessor.h deleted file mode 100644 index 2b594955f6..0000000000 --- a/tests/CL/CLHOGAccessor.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_CLHOGACCESSOR_H -#define ARM_COMPUTE_TEST_CLHOGACCESSOR_H - -#include "arm_compute/runtime/CL/CLHOG.h" -#include "tests/IHOGAccessor.h" - -namespace arm_compute -{ -namespace test -{ -/** Accessor implementation for @ref CLHOG objects. */ -class CLHOGAccessor : public IHOGAccessor -{ -public: - /** Create an accessor for the given @p CLHOG. */ - CLHOGAccessor(CLHOG &hog) - : _hog{ hog } - { - _hog.map(); - } - - /** Destructor that unmaps the CL memory. */ - ~CLHOGAccessor() - { - _hog.unmap(); - } - - /** Prevent instances of this class from being copied (As this class contains references). */ - CLHOGAccessor(const CLHOGAccessor &) = delete; - /** Prevent instances of this class from being copied (As this class contains references). */ - CLHOGAccessor &operator=(const CLHOGAccessor &) = delete; - - /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - * - * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - */ - float *descriptor() const override - { - return _hog.descriptor(); - } - -private: - CLHOG &_hog; -}; -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CLHOGACCESSOR_H */ diff --git a/tests/CL/CLLutAccessor.h b/tests/CL/CLLutAccessor.h deleted file mode 100644 index 78cd85d405..0000000000 --- a/tests/CL/CLLutAccessor.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_CL_CLLUTACCESSOR_H -#define ARM_COMPUTE_TEST_CL_CLLUTACCESSOR_H - -#include "tests/ILutAccessor.h" - -#include "arm_compute/runtime/CL/CLLut.h" - -namespace arm_compute -{ -namespace test -{ -/** Accessor implementation for @ref CLLut objects. */ -template <typename T> -class CLLutAccessor : public ILutAccessor<T> -{ -public: - /** Create an accessor for the given @p CLLut. - */ - CLLutAccessor(CLLut &lut) - : _lut{ lut } - { - _lut.map(true); - } - /** Default destructor */ - ~CLLutAccessor() - { - _lut.unmap(); - } - - /** Prevent instances of this class from being copy constructed */ - CLLutAccessor(const CLLutAccessor &) = delete; - /** Prevent instances of this class from being copied */ - CLLutAccessor &operator=(const CLLutAccessor &) = delete; - /** Allow instances of this class to be move constructed */ - CLLutAccessor(CLLutAccessor &&) = default; - /** Allow instance of this class to be moved */ - CLLutAccessor &operator=(CLLutAccessor &&) = default; - - int num_elements() const override - { - return _lut.num_elements(); - } - - const T &operator[](T input_value) const override - { - auto lut = reinterpret_cast<T *>(_lut.buffer()); - int32_t real_index = _lut.index_offset() + static_cast<int32_t>(input_value); - - if(0 <= real_index && real_index < num_elements()) - { - return lut[real_index]; - } - ARM_COMPUTE_ERROR("Error index not in range."); - } - - T &operator[](T input_value) override - { - auto lut = reinterpret_cast<T *>(_lut.buffer()); - int32_t real_index = _lut.index_offset() + static_cast<int32_t>(input_value); - - if(0 <= real_index && real_index < num_elements()) - { - return lut[real_index]; - } - ARM_COMPUTE_ERROR("Error index not in range."); - } - -private: - CLLut &_lut; -}; -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CL_CLLUTACCESSOR_H */ diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h index 5153e98add..dd5e8647b0 100644 --- a/tests/CL/Helper.h +++ b/tests/CL/Helper.h @@ -29,8 +29,11 @@ #include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/IFunction.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/gpu/cl/IClOperator.h" +#include "src/gpu/cl/operators/ClFill.h" #include "src/core/CL/ICLKernel.h" +#include "support/Cast.h" #include <memory> @@ -38,6 +41,86 @@ namespace arm_compute { namespace test { +/** This template synthetizes a simple IOperator which runs the given kernel K */ +template <typename K> +class CLSynthetizeOperator : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] args Configuration arguments. + */ + template <typename... Args> + void configure(Args &&... args) + { + auto k = std::make_unique<K>(); + k->configure(CLKernelLibrary::get().get_compile_context(), std::forward<Args>(args)...); + _kernel = std::move(k); + } + /** Configure the kernel setting the GPU target as well + * + * @param[in] gpu_target GPUTarget to set + * @param[in] args Configuration arguments. + */ + template <typename... Args> + void configure(GPUTarget gpu_target, Args &&... args) + { + auto k = std::make_unique<K>(); + k->set_target(gpu_target); + k->configure(CLKernelLibrary::get().get_compile_context(), std::forward<Args>(args)...); + _kernel = std::move(k); + } + /** Validate input arguments + * + * @param[in] args Configuration arguments. + */ + template <typename... Args> + static Status validate(Args &&... args) + { + return K::validate(std::forward<Args>(args)...); + } +}; + +/** As above but this also initializes to zero the input tensor */ +template <typename K, int bordersize> +class CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] first First input argument. + * @param[in] second Second input argument. + * @param[in] args Rest of the configuration arguments. + */ + template <typename T, typename... Args> + void configure(T first, T second, Args &&... args) + { + auto cctx = CLKernelLibrary::get().get_compile_context(); + auto k = std::make_unique<K>(); + k->set_target(CLScheduler::get().target()); + k->configure(cctx, first, second, std::forward<Args>(args)...); + _kernel = std::move(k); + _border_handler.configure(cctx, first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); + _fill.configure(cctx, second, PixelValue()); + } + + // Inherited method overridden: + void run(ITensorPack &tensors) override final + { + ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The CL kernel or function isn't configured"); + + ITensorPack fill_pack = { { ACL_SRC, tensors.get_tensor(TensorType::ACL_DST) } }; + _fill.run(fill_pack); + CLScheduler::get().enqueue_op(_border_handler, tensors); + CLScheduler::get().enqueue_op(*_kernel, tensors); + } + +private: + opencl::ClFill _fill{}; /**< Kernel to initialize the tensor */ + CLFillBorderKernel _border_handler{}; /**< Kernel to handle borders */ + std::unique_ptr<ICLKernel> _kernel{}; /**< Kernel to run */ +}; + /** This template synthetizes an ICLSimpleFunction which runs the given kernel K */ template <typename K> class CLSynthetizeFunction : public ICLSimpleFunction @@ -135,6 +218,39 @@ private: CLFillBorderKernel _border_handler{}; /**< Kernel to handle borders */ std::unique_ptr<ICLKernel> _kernel{}; /**< Kernel to run */ }; + +/** As above but this also setups a Zero border on the input tensor of the kernel's bordersize */ +template <typename K> +class ClSynthetizeOperatorWithBorder : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] first First configuration argument. + * @param[in] args Rest of the configuration arguments. + */ + template <typename T, typename... Args> + void configure(T first, Args &&... args) + { + auto k = std::make_unique<K>(); + k->configure(CLKernelLibrary::get().get_compile_context(), first, std::forward<Args>(args)...); + _kernel = std::move(k); + + auto b = std::make_unique<CLFillBorderKernel>(); + b->configure(CLKernelLibrary::get().get_compile_context(), first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue()); + _border_handler = std::move(b); + } + + void run(ITensorPack &tensors) override + { + CLScheduler::get().enqueue(*_border_handler); + CLScheduler::get().enqueue_op(*_kernel, tensors); + } + +private: + std::unique_ptr<ICLKernel> _border_handler{ nullptr }; /**< Kernel to handle borders */ + std::unique_ptr<ICLKernel> _kernel{}; /**< Kernel to run */ +}; } // namespace test } // namespace arm_compute #endif /* ARM_COMPUTE_TEST_CL_HELPER_H */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000000..20a010f38c --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,131 @@ +# Copyright (c) 2023-2024 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +target_sources( + arm_compute_validation_framework + PRIVATE validation/Validation.cpp + validation/Helpers.cpp + validation/reference/BoundingBoxTransform.cpp + validation/reference/GEMMReshapeRHSMatrix.cpp + validation/reference/ChannelShuffle.cpp + validation/reference/Logical.cpp + validation/reference/PoolingLayer.cpp + validation/reference/BitwiseNot.cpp + validation/reference/Conv3D.cpp + validation/reference/GEMMReshapeLHSMatrix.cpp + validation/reference/ComputeAllAnchors.cpp + validation/reference/DepthConcatenateLayer.cpp + validation/reference/TableLookup.cpp + validation/reference/ROIPoolingLayer.cpp + validation/reference/SliceOperations.cpp + validation/reference/GEMMLowp.cpp + validation/reference/Unstack.cpp + validation/reference/Pooling3dLayer.cpp + validation/reference/BitwiseOr.cpp + validation/reference/ReshapeLayer.cpp + validation/reference/SoftmaxLayer.cpp + validation/reference/Gather.cpp + validation/reference/Utils.cpp + validation/reference/Accumulate.cpp + validation/reference/CropResize.cpp + validation/reference/ReductionOperation.cpp + validation/reference/ConcatenateLayer.cpp + validation/reference/PixelWiseMultiplication.cpp + validation/reference/DepthConvertLayer.cpp + validation/reference/Erode.cpp + validation/reference/DepthToSpaceLayer.cpp + validation/reference/PadLayer.cpp + validation/reference/MeanStdDevNormalizationLayer.cpp + validation/reference/BitwiseXor.cpp + validation/reference/GEMM.cpp + validation/reference/NormalizePlanarYUVLayer.cpp + validation/reference/FuseBatchNormalization.cpp + validation/reference/BitwiseAnd.cpp + validation/reference/SpaceToDepth.cpp + validation/reference/NonMaximaSuppression.cpp + validation/reference/Reverse.cpp + validation/reference/DFT.cpp + validation/reference/L2NormalizeLayer.cpp + validation/reference/ActivationLayer.cpp + validation/reference/SpaceToBatch.cpp + validation/reference/Im2Col.cpp + validation/reference/DequantizationLayer.cpp + validation/reference/DeconvolutionLayer.cpp + validation/reference/MinMaxLocation.cpp + validation/reference/Select.cpp + validation/reference/BatchNormalizationLayer.cpp + validation/reference/InstanceNormalizationLayer.cpp + validation/reference/ROIAlignLayer.cpp + validation/reference/ElementwiseUnary.cpp + validation/reference/MeanStdDev.cpp + validation/reference/QLSTMLayerNormalization.cpp + validation/reference/Col2Im.cpp + validation/reference/FlattenLayer.cpp + validation/reference/AbsoluteDifference.cpp + validation/reference/Transpose.cpp + validation/reference/StackLayer.cpp + validation/reference/NormalizationLayer.cpp + validation/reference/Copy.cpp + validation/reference/MaxUnpoolingLayer.cpp + validation/reference/Winograd.cpp + validation/reference/Permute.cpp + validation/reference/Comparisons.cpp + validation/reference/Tile.cpp + validation/reference/BatchToSpaceLayer.cpp + validation/reference/ElementwiseOperations.cpp + validation/reference/QuantizationLayer.cpp + validation/reference/NonMaxSuppression.cpp + validation/reference/WeightsReshape.cpp + validation/reference/ArithmeticOperations.cpp + validation/reference/ConvertFullyConnectedWeights.cpp + validation/reference/Floor.cpp + validation/reference/PriorBoxLayer.cpp + validation/reference/Scale.cpp + validation/reference/ScatterLayer.cpp + validation/reference/ReorgLayer.cpp + validation/reference/Range.cpp + validation/reference/ArithmeticDivision.cpp + validation/reference/DepthwiseConvolutionLayer.cpp + validation/reference/FullyConnectedLayer.cpp + validation/reference/ConvolutionLayer.cpp + validation/reference/Reorder.cpp + framework/Framework.cpp + framework/Utils.cpp + framework/Exceptions.cpp + framework/DatasetModes.cpp + framework/TestFilter.cpp + framework/Profiler.cpp + framework/ParametersLibrary.cpp + framework/command_line/CommonOptions.cpp + framework/instruments/WallClockTimer.cpp + framework/instruments/InstrumentsStats.cpp + framework/instruments/Instruments.cpp + framework/instruments/SchedulerTimer.cpp + framework/instruments/hwc_names.hpp + framework/instruments/hwc.hpp + framework/printers/PrettyPrinter.cpp + framework/printers/Printer.cpp + framework/printers/JSONPrinter.cpp + framework/printers/Printers.cpp + AssetsLibrary.cpp + RawTensor.cpp + main.cpp) diff --git a/tests/IAccessor.h b/tests/IAccessor.h index c54c00e99e..75faee19ce 100644 --- a/tests/IAccessor.h +++ b/tests/IAccessor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/QuantizationInfo.h" namespace arm_compute { diff --git a/tests/IHOGAccessor.h b/tests/IHOGAccessor.h deleted file mode 100644 index f1c137c2ed..0000000000 --- a/tests/IHOGAccessor.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_IHOGACCESSOR_H -#define ARM_COMPUTE_TEST_IHOGACCESSOR_H - -namespace arm_compute -{ -namespace test -{ -/** Common interface to access HOG structure */ -class IHOGAccessor -{ -public: - /** Virtual destructor. */ - virtual ~IHOGAccessor() = default; - - /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - * - * @note Other elements of the array can be accessed using descriptor()[idx] for idx=[0, descriptor_size() - 1] - * - * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - */ - virtual float *descriptor() const = 0; -}; -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_IHOGACCESSOR_H */ diff --git a/tests/NEON/HOGAccessor.h b/tests/NEON/HOGAccessor.h deleted file mode 100644 index 735abb08d3..0000000000 --- a/tests/NEON/HOGAccessor.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_HOGACCESSOR_H -#define ARM_COMPUTE_TEST_HOGACCESSOR_H - -#include "arm_compute/runtime/HOG.h" -#include "tests/IHOGAccessor.h" - -namespace arm_compute -{ -namespace test -{ -/** Accessor implementation for @ref HOG objects. */ -class HOGAccessor : public IHOGAccessor -{ -public: - /** Create an accessor for the given @p HOG. - */ - HOGAccessor(HOG &hog) - : _hog{ hog } - { - } - - /** Prevent instances of this class from being copied (As this class contains references). */ - HOGAccessor(const HOGAccessor &) = delete; - /** Prevent instances of this class from being copied (As this class contains references). */ - HOGAccessor &operator=(const HOGAccessor &) = delete; - - /** Pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - * - * @return A pointer to the first element of the array which stores the linear SVM coefficients of HOG descriptor - */ - float *descriptor() const override - { - return _hog.descriptor(); - } - -private: - HOG &_hog; -}; -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_HOGACCESSOR_H */ diff --git a/tests/NEON/Helper.h b/tests/NEON/Helper.h index 714152ebcd..fb0231b62a 100644 --- a/tests/NEON/Helper.h +++ b/tests/NEON/Helper.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,9 @@ #include "arm_compute/runtime/Array.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/cpu/ICpuOperator.h" #include "tests/Globals.h" #include <algorithm> @@ -104,7 +106,7 @@ public: /** As above but this also setups a Zero border on the input tensor of the kernel's bordersize */ template <typename K> -class NESynthetizeFunctionWithZeroConstantKernelBorder : public INESimpleFunction +class NESynthetizeFunctionWithZeroConstantKernelBorder : public cpu::ICpuOperator { public: /** Configure the kernel. @@ -123,6 +125,15 @@ public: b->configure(first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue()); _border_handler = std::move(b); } + + void run(ITensorPack &tensors) + { + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors); + } + +private: + std::unique_ptr<INEKernel> _border_handler{ nullptr }; }; } // namespace test diff --git a/tests/NEON/LutAccessor.h b/tests/NEON/LutAccessor.h deleted file mode 100644 index 5204d0640c..0000000000 --- a/tests/NEON/LutAccessor.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_NEON_LUTACCESSOR_H -#define ARM_COMPUTE_TEST_NEON_LUTACCESSOR_H - -#include "tests/ILutAccessor.h" - -#include "arm_compute/runtime/Lut.h" - -namespace arm_compute -{ -namespace test -{ -/** Accessor implementation for @ref Lut objects. */ -template <typename T> -class LutAccessor : public ILutAccessor<T> -{ -public: - /** Create an accessor for the given @p Lut. - */ - LutAccessor(Lut &lut) - : _lut{ lut } - { - } - - /** Prevent instances of this class from being copy constructed */ - LutAccessor(const LutAccessor &) = delete; - /** Prevent instances of this class from being copied */ - LutAccessor &operator=(const LutAccessor &) = delete; - /** Allow instances of this class to be move constructed */ - LutAccessor(LutAccessor &&) = default; - /** Allow instances of this class to be moved */ - LutAccessor &operator=(LutAccessor &&) = default; - - int num_elements() const override - { - return _lut.num_elements(); - } - - const T &operator[](T input_value) const override - { - auto lut = reinterpret_cast<T *>(_lut.buffer()); - int32_t real_index = _lut.index_offset() + static_cast<int32_t>(input_value); - - if(0 <= real_index && real_index < num_elements()) - { - return lut[real_index]; - } - ARM_COMPUTE_ERROR("Error index not in range."); - } - - T &operator[](T input_value) override - { - auto lut = reinterpret_cast<T *>(_lut.buffer()); - int32_t real_index = _lut.index_offset() + static_cast<int32_t>(input_value); - - if(0 <= real_index && real_index < num_elements()) - { - return lut[real_index]; - } - ARM_COMPUTE_ERROR("Error index not in range."); - } - -private: - ILut &_lut; -}; -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_NEON_LUTACCESSOR_H */ diff --git a/tests/RawLutAccessor.h b/tests/RawLutAccessor.h deleted file mode 100644 index 4318fb2dcc..0000000000 --- a/tests/RawLutAccessor.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_RAWLUTACCESSOR_H -#define ARM_COMPUTE_TEST_RAWLUTACCESSOR_H - -#include "ILutAccessor.h" - -#include <map> - -namespace arm_compute -{ -namespace test -{ -/** Accessor implementation for std::map-lut objects. */ -template <typename T> -class RawLutAccessor : public ILutAccessor<T> -{ -public: - /** Create an accessor for the given @p std::map. - */ - RawLutAccessor(std::map<T, T> &lut) - : _lut{ lut } - { - } - - /** Prevent instances of this class from being copy constructed */ - RawLutAccessor(const RawLutAccessor &) = delete; - /** Prevent instances of this class from being copied */ - RawLutAccessor &operator=(const RawLutAccessor &) = delete; - /** Allow instances of this class to be move constructed */ - RawLutAccessor(RawLutAccessor &&) = default; - /** Allow instances of this class to be moved */ - RawLutAccessor &operator=(RawLutAccessor &&) = default; - - int num_elements() const override - { - return _lut.size(); - } - - const T &operator[](T input_value) const override - { - return _lut[input_value]; - } - - T &operator[](T input_value) override - { - return _lut[input_value]; - } - -private: - std::map<T, T> &_lut; -}; - -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_RAWLUTACCESSOR_H */ diff --git a/tests/SConscript b/tests/SConscript index fea68e0fe9..0907c5713b 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -1,4 +1,7 @@ -# Copyright (c) 2017-2019 Arm Limited. +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2017-2023,2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -19,7 +22,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import SCons import os.path Import('env') @@ -28,12 +30,11 @@ Import('install_bin') # vars is imported from arm_compute: variables = [ - BoolVariable("benchmark_examples", "Build benchmark examples programs", True), - BoolVariable("validate_examples", "Build validate examples programs", True), + BoolVariable("benchmark_examples", "Build benchmark examples programs", False), + BoolVariable("validate_examples", "Build validate examples programs", False), BoolVariable("reference_openmp", "Build reference validation with openmp", True), - #FIXME Switch the following two options to False before releasing - BoolVariable("validation_tests", "Build validation test programs", True), - BoolVariable("benchmark_tests", "Build benchmark test programs", True), + BoolVariable("validation_tests", "Build validation test programs", False), + BoolVariable("benchmark_tests", "Build benchmark test programs", False), ("test_filter", "Pattern to specify the tests' filenames to be compiled", "*.cpp") ] @@ -67,7 +68,8 @@ Import("arm_compute_test_framework") test_env.Append(LIBS = arm_compute_test_framework) # Disable floating-point expression contraction (e.g. fused multiply-add operations) -test_env.Append(CXXFLAGS = ['-ffp-contract=off']) +if not 'windows' in env['os']: + test_env.Append(CXXFLAGS = ['-ffp-contract=off']) # Remove -Wnoexcept from tests if 'g++' in test_env['CXX'] and '-Wnoexcept' in test_env['CXXFLAGS']: @@ -77,18 +79,19 @@ load_whole_archive = '-Wl,--whole-archive' noload_whole_archive = '-Wl,--no-whole-archive' if 'macos' in test_env['os']: load_whole_archive = '-Wl,-force_load' - noload_whole_archive = '-Wl,-noall_load' + noload_whole_archive = '' + +if (env['multi_isa']): + test_env.Append(CPPDEFINES=['ARM_COMPUTE_ENABLE_BF16']) if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']: Import("arm_compute_a") - Import("arm_compute_core_a") Import("arm_compute_graph_a") - test_env.Append(LIBS = [arm_compute_graph_a, arm_compute_a, arm_compute_core_a]) + test_env.Append(LIBS = [arm_compute_graph_a, arm_compute_a]) arm_compute_lib = arm_compute_graph_a else: Import("arm_compute_graph_so") - Import("arm_compute_core_a") - test_env.Append(LIBS = ["arm_compute_graph", "arm_compute", "arm_compute_core"]) + test_env.Append(LIBS = ["arm_compute_graph", "arm_compute"]) arm_compute_lib = arm_compute_graph_so if env['os'] in ['bare_metal']: @@ -114,6 +117,10 @@ filter_pattern = test_env['test_filter'] files_validation += Glob('validation/CPP/' + filter_pattern) if env['opencl']: + if env['experimental_dynamic_fusion']: + files_validation += Glob('validation/dynamic_fusion/gpu/' + filter_pattern) + files_validation += Glob('validation/dynamic_fusion/gpu/cl/' + filter_pattern) + filter_pattern = test_env['test_filter'] test_env.Append(CPPDEFINES=['ARM_COMPUTE_CL']) @@ -125,6 +132,7 @@ if env['opencl']: files_validation += Glob('validation/CL/*/' + filter_pattern) files_validation += Glob('validation/CL/' + filter_pattern) + if env['external_tests_dir']: files_validation += Glob(env['external_tests_dir'] + '/tests/validation/CL/' + filter_pattern) files_validation += Glob('validation/gpu/unit/*.cpp') @@ -133,6 +141,7 @@ if env['neon']: filter_pattern = test_env['test_filter'] files_benchmark += Glob('benchmark/NEON/*/' + filter_pattern) files_benchmark += Glob('benchmark/NEON/' + filter_pattern) + test_env.Append(CPPPATH = ["#/src/cpu/kernels/assembly/"]) if env['external_tests_dir']: files_benchmark += Glob(env['external_tests_dir'] + '/tests/benchmark/NEON/' + filter_pattern) @@ -150,7 +159,7 @@ if env['neon']: extra_link_flags = [] if env['os'] == 'android': test_env.Append(LIBS = ["log"]) -elif env['os'] not in ['bare_metal', 'macos']: +elif env['os'] not in ['windows','bare_metal', 'macos']: test_env.Append(LIBS = ["rt"]) extra_link_flags += ['-fstack-protector-strong'] @@ -166,14 +175,20 @@ bm_link_flags = [] if test_env['linker_script']: bm_link_flags += ['-Wl,--build-id=none', '-T', env['linker_script']] -if test_env['reference_openmp'] and env['os'] not in ['bare_metal', 'macos']: - test_env['CXXFLAGS'].append('-fopenmp') - test_env['LINKFLAGS'].append('-fopenmp') +if test_env['reference_openmp'] and env['os'] not in ['bare_metal', 'macos','windows']: + test_env['CXXFLAGS'].append('-fopenmp') + test_env['LINKFLAGS'].append('-fopenmp') + + if 'ndk_above_r21' in env: + test_env['LINKFLAGS'].append('-static-openmp') + +# Testing for fixed format GEMM kernels. +if env['fixed_format_kernels'] and test_env['validation_tests']: + test_env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS']) if test_env['validation_tests']: - arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework, arm_compute_core_a]) + arm_compute_validation_framework = env.StaticLibrary('arm_compute_validation_framework', Glob('validation/reference/*.cpp') + Glob('validation/*.cpp'), LINKFLAGS=test_env['LINKFLAGS'], CXXFLAGS=test_env['CXXFLAGS'], LIBS= [ arm_compute_test_framework ]) Depends(arm_compute_validation_framework , arm_compute_test_framework) - Depends(arm_compute_validation_framework , arm_compute_core_a) program_objects = files_validation + common_objects if test_env['os'] == 'bare_metal': @@ -272,8 +287,9 @@ if test_env['benchmark_examples']: #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils, graph_params]+ files_benchmark_examples, LIBS = test_env["LIBS"] + ["arm_compute_graph"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined']) arm_compute_benchmark_examples += [ prog ] + arm_compute_benchmark_examples = install_bin(arm_compute_benchmark_examples) Depends(arm_compute_benchmark_examples, arm_compute_test_framework) Depends(arm_compute_benchmark_examples, arm_compute_lib) Default(arm_compute_benchmark_examples) - Export('arm_compute_benchmark_examples')
\ No newline at end of file + Export('arm_compute_benchmark_examples') diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h index c1bd7f87b5..419621e808 100644 --- a/tests/SimpleTensor.h +++ b/tests/SimpleTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -173,6 +173,15 @@ public: */ QuantizationInfo quantization_info() const override; + /** Set the quantization information of the tensor. + * + * This function does not have any effect on the raw quantized data of the tensor. + * It simply changes the quantization information, hence changes the dequantized values. + * + * @return A reference to the current object. + */ + SimpleTensor<T> &quantization_info(const QuantizationInfo &qinfo); + /** Constant pointer to the underlying buffer. * * @return a constant pointer to the data. @@ -335,6 +344,13 @@ QuantizationInfo SimpleTensor<T>::quantization_info() const } template <typename T> +SimpleTensor<T> &SimpleTensor<T>::quantization_info(const QuantizationInfo &qinfo) +{ + _quantization_info = qinfo; + return *this; +} + +template <typename T> size_t SimpleTensor<T>::size() const { const size_t size = std::accumulate(_shape.cbegin(), _shape.cend(), 1, std::multiplies<size_t>()); @@ -376,6 +392,8 @@ int SimpleTensor<T>::num_channels() const case Format::S16: case Format::U32: case Format::S32: + case Format::U64: + case Format::S64: case Format::F16: case Format::F32: return 1; diff --git a/tests/SimpleTensorPrinter.h b/tests/SimpleTensorPrinter.h index 6c1506b40d..e4ca66bb36 100644 --- a/tests/SimpleTensorPrinter.h +++ b/tests/SimpleTensorPrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,9 @@ * SOFTWARE. */ +#ifndef ARM_COMPUTE_TEST_SIMPLE_TENSOR_PRINTER +#define ARM_COMPUTE_TEST_SIMPLE_TENSOR_PRINTER + #include "arm_compute/core/Error.h" #include "tests/RawTensor.h" @@ -34,8 +37,6 @@ namespace arm_compute { namespace test { -namespace -{ template <typename T> inline std::string prettify_tensor(const SimpleTensor<T> &input, const IOFormatInfo &io_fmt = IOFormatInfo{ IOFormatInfo::PrintRegion::NoPadding }) { @@ -152,6 +153,6 @@ void print_simpletensor(const SimpleTensor<T> &tensor, const std::string &title, } } #endif // PRINT_TENSOR_LIMIT -} } // namespace test } // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_SIMPLE_TENSOR_PRINTER */ diff --git a/tests/TypePrinter.h b/tests/TypePrinter.h deleted file mode 100644 index 612360d4f0..0000000000 --- a/tests/TypePrinter.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_TYPE_PRINTER_H -#define ARM_COMPUTE_TEST_TYPE_PRINTER_H - -#include "tests/Types.h" - -namespace arm_compute -{ -/** Formatted output of the GradientDimension type. - * - * @param[out] os Output stream - * @param[in] dim Type to output - * - * @return Modified output stream. - */ -inline ::std::ostream &operator<<(::std::ostream &os, const GradientDimension &dim) -{ - switch(dim) - { - case GradientDimension::GRAD_X: - os << "GRAD_X"; - break; - case GradientDimension::GRAD_Y: - os << "GRAD_Y"; - break; - case GradientDimension::GRAD_XY: - os << "GRAD_XY"; - break; - default: - ARM_COMPUTE_ERROR("NOT_SUPPORTED!"); - } - - return os; -} - -/** Formatted output of the GradientDimension type. - * - * @param[in] type Type to output - * - * @return Formatted string. - */ -inline std::string to_string(const arm_compute::GradientDimension &type) -{ - std::stringstream str; - str << type; - return str.str(); -} - -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_TYPE_PRINTER_H */ diff --git a/tests/Types.h b/tests/Types.h index c8e9a755a7..5d5a8207d5 100644 --- a/tests/Types.h +++ b/tests/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,14 +30,6 @@ namespace arm_compute { -/** Gradient dimension type. */ -enum class GradientDimension -{ - GRAD_X, /**< x gradient dimension */ - GRAD_Y, /**< y gradient dimension */ - GRAD_XY, /**< x and y gradient dimension */ -}; - /** Min and max values and locations */ template <typename MinMaxType> struct MinMaxLocationValues @@ -47,37 +39,5 @@ struct MinMaxLocationValues std::vector<Coordinates2D> min_loc{}; /**< Min value location */ std::vector<Coordinates2D> max_loc{}; /**< Max value location */ }; - -/** Parameters of Optical Flow algorithm. */ -struct OpticalFlowParameters -{ - OpticalFlowParameters(Termination termination, - float epsilon, - size_t num_iterations, - size_t window_dimension, - bool use_initial_estimate) - : termination{ std::move(termination) }, - epsilon{ std::move(epsilon) }, - num_iterations{ std::move(num_iterations) }, - window_dimension{ std::move(window_dimension) }, - use_initial_estimate{ std::move(use_initial_estimate) } - { - } - - Termination termination; - float epsilon; - size_t num_iterations; - size_t window_dimension; - bool use_initial_estimate; -}; - -/** Internal keypoint class for Lucas-Kanade Optical Flow */ -struct InternalKeyPoint -{ - float x{ 0.f }; /**< x coordinate of the keypoint */ - float y{ 0.f }; /**< y coordinate of the keypoint */ - bool tracking_status{ false }; /**< the tracking status of the keypoint */ -}; - } // namespace arm_compute #endif /* ARM_COMPUTE_TEST_TYPES_H */ diff --git a/tests/Utils.h b/tests/Utils.h index 2569c41a9e..7b831468d3 100644 --- a/tests/Utils.h +++ b/tests/Utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,6 @@ #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/HOGInfo.h" -#include "arm_compute/core/PyramidInfo.h" #include "arm_compute/core/Size2D.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/TensorShape.h" @@ -50,6 +48,7 @@ #include <type_traits> #include <vector> +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" #include "arm_compute/runtime/RuntimeContext.h" @@ -135,7 +134,7 @@ using make_unsigned_conditional_t = typename std::conditional<std::is_integral<T // clang-format on // *INDENT-ON* -} +} // namespace traits /** Look up the format corresponding to a channel. * @@ -241,101 +240,6 @@ inline ValidRegion shape_to_valid_region(const TensorShape &a_shape, bool border return valid_region; } -/** Create a valid region for Gaussian Pyramid Half based on tensor shape and valid region at level "i - 1" and border mode - * - * @note The border size is 2 in case of Gaussian Pyramid Half - * - * @param[in] a_shape Shape used at level "i - 1" of Gaussian Pyramid Half - * @param[in] a_valid_region Valid region used at level "i - 1" of Gaussian Pyramid Half - * @param[in] border_undefined (Optional) Boolean indicating if the border mode is undefined. - * - * return The valid region for the level "i" of Gaussian Pyramid Half - */ -inline ValidRegion shape_to_valid_region_gaussian_pyramid_half(const TensorShape &a_shape, const ValidRegion &a_valid_region, bool border_undefined = false) -{ - constexpr int border_size = 2; - - ValidRegion valid_region{ Coordinates(), a_shape }; - - Coordinates &anchor = valid_region.anchor; - TensorShape &shape = valid_region.shape; - - // Compute tensor shape for level "i" of Gaussian Pyramid Half - // dst_width = (src_width + 1) * 0.5f - // dst_height = (src_height + 1) * 0.5f - shape.set(0, (a_shape[0] + 1) * 0.5f); - shape.set(1, (a_shape[1] + 1) * 0.5f); - - if(border_undefined) - { - ARM_COMPUTE_ERROR_ON(shape.num_dimensions() < 2); - - // Compute the left and top invalid borders - float invalid_border_left = static_cast<float>(a_valid_region.anchor.x() + border_size) / 2.0f; - float invalid_border_top = static_cast<float>(a_valid_region.anchor.y() + border_size) / 2.0f; - - // For the new anchor point we can have 2 cases: - // 1) If the width/height of the tensor shape is odd, we have to take the ceil value of (a_valid_region.anchor.x() + border_size) / 2.0f or (a_valid_region.anchor.y() + border_size / 2.0f - // 2) If the width/height of the tensor shape is even, we have to take the floor value of (a_valid_region.anchor.x() + border_size) / 2.0f or (a_valid_region.anchor.y() + border_size) / 2.0f - // In this manner we should be able to propagate correctly the valid region along all levels of the pyramid - invalid_border_left = (a_shape[0] % 2) ? std::ceil(invalid_border_left) : std::floor(invalid_border_left); - invalid_border_top = (a_shape[1] % 2) ? std::ceil(invalid_border_top) : std::floor(invalid_border_top); - - // Set the anchor point - anchor.set(0, static_cast<int>(invalid_border_left)); - anchor.set(1, static_cast<int>(invalid_border_top)); - - // Compute shape - // Calculate the right and bottom invalid borders at the previous level of the pyramid - const float prev_invalid_border_right = static_cast<float>(a_shape[0] - (a_valid_region.anchor.x() + a_valid_region.shape[0])); - const float prev_invalid_border_bottom = static_cast<float>(a_shape[1] - (a_valid_region.anchor.y() + a_valid_region.shape[1])); - - // Calculate the right and bottom invalid borders at the current level of the pyramid - const float invalid_border_right = std::ceil((prev_invalid_border_right + static_cast<float>(border_size)) / 2.0f); - const float invalid_border_bottom = std::ceil((prev_invalid_border_bottom + static_cast<float>(border_size)) / 2.0f); - - const int valid_shape_x = std::max(0, static_cast<int>(shape.x()) - static_cast<int>(invalid_border_left) - static_cast<int>(invalid_border_right)); - const int valid_shape_y = std::max(0, static_cast<int>(shape.y()) - static_cast<int>(invalid_border_top) - static_cast<int>(invalid_border_bottom)); - - shape.set(0, valid_shape_x); - shape.set(1, valid_shape_y); - } - - return valid_region; -} - -/** Create a valid region for Laplacian Pyramid based on tensor shape and valid region at level "i - 1" and border mode - * - * @note The border size is 2 in case of Laplacian Pyramid - * - * @param[in] a_shape Shape used at level "i - 1" of Laplacian Pyramid - * @param[in] a_valid_region Valid region used at level "i - 1" of Laplacian Pyramid - * @param[in] border_undefined (Optional) Boolean indicating if the border mode is undefined. - * - * return The valid region for the level "i" of Laplacian Pyramid - */ -inline ValidRegion shape_to_valid_region_laplacian_pyramid(const TensorShape &a_shape, const ValidRegion &a_valid_region, bool border_undefined = false) -{ - ValidRegion valid_region = shape_to_valid_region_gaussian_pyramid_half(a_shape, a_valid_region, border_undefined); - - if(border_undefined) - { - const BorderSize gaussian5x5_border(2); - - auto border_left = static_cast<int>(gaussian5x5_border.left); - auto border_right = static_cast<int>(gaussian5x5_border.right); - auto border_top = static_cast<int>(gaussian5x5_border.top); - auto border_bottom = static_cast<int>(gaussian5x5_border.bottom); - - valid_region.anchor.set(0, valid_region.anchor[0] + border_left); - valid_region.anchor.set(1, valid_region.anchor[1] + border_top); - valid_region.shape.set(0, std::max(0, static_cast<int>(valid_region.shape[0]) - border_right - border_left)); - valid_region.shape.set(1, std::max(0, static_cast<int>(valid_region.shape[1]) - border_top - border_bottom)); - } - - return valid_region; -} - /** Write the value after casting the pointer according to @p data_type. * * @warning The type of the value must match the specified data type. @@ -566,110 +470,6 @@ inline T create_tensor(const TensorShape &shape, Format format, IRuntimeContext return create_tensor<T>(info, ctx); } -/** Create and initialize a multi-image of the given type. - * - * @param[in] shape Tensor shape. - * @param[in] format Format type. - * - * @return Initialized tensor of given type. - */ -template <typename T> -inline T create_multi_image(const TensorShape &shape, Format format) -{ - T multi_image; - multi_image.init(shape.x(), shape.y(), format); - - return multi_image; -} - -/** Create and initialize a HOG (Histogram of Oriented Gradients) of the given type. - * - * @param[in] hog_info HOGInfo object - * - * @return Initialized HOG of given type. - */ -template <typename T> -inline T create_HOG(const HOGInfo &hog_info) -{ - T hog; - hog.init(hog_info); - - return hog; -} - -/** Create and initialize a Pyramid of the given type. - * - * @param[in] pyramid_info The PyramidInfo object. - * - * @return Initialized Pyramid of given type. - */ -template <typename T> -inline T create_pyramid(const PyramidInfo &pyramid_info) -{ - T pyramid; - pyramid.init_auto_padding(pyramid_info); - - return pyramid; -} - -/** Initialize a convolution matrix. - * - * @param[in, out] conv The input convolution matrix. - * @param[in] width The width of the convolution matrix. - * @param[in] height The height of the convolution matrix. - * @param[in] seed The random seed to be used. - */ -inline void init_conv(int16_t *conv, unsigned int width, unsigned int height, std::random_device::result_type seed) -{ - std::mt19937 gen(seed); - std::uniform_int_distribution<int16_t> distribution_int16(-32768, 32767); - - for(unsigned int i = 0; i < width * height; ++i) - { - conv[i] = distribution_int16(gen); - } -} - -/** Initialize a separable convolution matrix. - * - * @param[in, out] conv The input convolution matrix. - * @param[in] width The width of the convolution matrix. - * @param[in] height The height of the convolution matrix. - * @param[in] seed The random seed to be used. - */ -inline void init_separable_conv(int16_t *conv, unsigned int width, unsigned int height, std::random_device::result_type seed) -{ - std::mt19937 gen(seed); - // Set it between -128 and 127 to ensure the matrix does not overflow - std::uniform_int_distribution<int16_t> distribution_int16(-128, 127); - - int16_t *conv_row = new int16_t[width]; - int16_t *conv_col = new int16_t[height]; - - conv_row[0] = conv_col[0] = 1; - for(unsigned int i = 1; i < width; ++i) - { - conv_row[i] = distribution_int16(gen); - } - - for(unsigned int i = 1; i < height; ++i) - { - conv_col[i] = distribution_int16(gen); - } - - // Multiply two matrices - for(unsigned int i = 0; i < width; ++i) - { - for(unsigned int j = 0; j < height; ++j) - { - conv[i * width + j] = conv_col[i] * conv_row[j]; - } - } - - delete[] conv_row; - delete[] conv_col; -} - /** Create a vector with a uniform distribution of floating point values across the specified range. * * @param[in] num_values The number of values to be created. @@ -694,44 +494,6 @@ inline std::vector<T> generate_random_real(unsigned int num_values, T min, T max return v; } -/** Create a vector of random keypoints for pyramid representation. - * - * @param[in] shape The shape of the input tensor. - * @param[in] num_keypoints The number of keypoints to be created. - * @param[in] seed The random seed to be used. - * @param[in] num_levels The number of pyramid levels. - * - * @return A vector that contains the requested number of random keypoints - */ -inline std::vector<KeyPoint> generate_random_keypoints(const TensorShape &shape, size_t num_keypoints, std::random_device::result_type seed, size_t num_levels = 1) -{ - std::vector<KeyPoint> keypoints; - std::mt19937 gen(seed); - - // Calculate distribution bounds - const auto min = static_cast<int>(std::pow(2, num_levels)); - const auto max_width = static_cast<int>(shape.x()); - const auto max_height = static_cast<int>(shape.y()); - - ARM_COMPUTE_ERROR_ON(min > max_width || min > max_height); - - // Create distributions - std::uniform_int_distribution<> dist_w(min, max_width); - std::uniform_int_distribution<> dist_h(min, max_height); - - for(unsigned int i = 0; i < num_keypoints; i++) - { - KeyPoint keypoint; - keypoint.x = dist_w(gen); - keypoint.y = dist_h(gen); - keypoint.tracking_status = 1; - - keypoints.push_back(keypoint); - } - - return keypoints; -} - template <typename T, typename ArrayAccessor_T> inline void fill_array(ArrayAccessor_T &&array, const std::vector<T> &v) { @@ -814,6 +576,67 @@ inline void sync_tensor_if_necessary(TensorType &tensor) { ARM_COMPUTE_UNUSED(tensor); } + +/** Construct and return object for dimensions' state filled with the given value + * + * @param[in] value The value to fill + * + * @return Constructed class + */ +inline ITensorInfo::TensorDimsState construct_dims_state(int32_t value) +{ + auto states = ITensorInfo::TensorDimsState{}; + std::fill(states.begin(), states.end(), value); + return states; +} + +/** Construct and return object for dimensions' state filled with the value for dynamic state + * + * @return Constructed class filled with the value for dynamic state + */ +inline ITensorInfo::TensorDimsState construct_dynamic_dims_state() +{ + return construct_dims_state(ITensorInfo::get_dynamic_state_value()); +} + +/** Construct and return object for dimensions' state filled with the value for non-dynamic state + * + * @return Constructed class filled with the value for non-dynamic state + */ +inline ITensorInfo::TensorDimsState construct_static_dims_state() +{ + return construct_dims_state(ITensorInfo::get_static_state_value()); +} + +/** Set the dimension states of the given tensor to dynamic + * + * @param[in] t The tensor to set to dynamic state + * + */ +template <typename TensorType> +void set_tensor_dynamic(TensorType &t) +{ + t.info()->set_tensor_dims_state(construct_dynamic_dims_state()); +} + +/** Set the dimension states of the given tensor to state + * + * @param[in] t The tensor to set to static state + * + */ +template <typename TensorType> +void set_tensor_static(TensorType &t) +{ + t.info()->set_tensor_dims_state(construct_static_dims_state()); +} + +inline experimental::dynamic_fusion::Conv2dAttributes convert_pad_stride_info_to_conv_attr(const PadStrideInfo &info, const Size2D &dialation) +{ + const Padding2D info_pad(info.pad_left(), info.pad_right(), info.pad_top(), info.pad_bottom()); + const Size2D info_stride(info.stride().first, info.stride().second); + return arm_compute::experimental::dynamic_fusion::Conv2dAttributes().pad(info_pad).stride(info_stride).dilation(dialation); +} + } // namespace test } // namespace arm_compute #endif /* ARM_COMPUTE_TEST_UTILS_H */ diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt new file mode 100644 index 0000000000..a3a352681e --- /dev/null +++ b/tests/benchmark/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (c) 2023 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +target_sources(arm_compute_benchmark PRIVATE NEON/Scale.cpp) diff --git a/tests/benchmark/fixtures/ScaleFixture.h b/tests/benchmark/fixtures/ScaleFixture.h index 953872ea64..81f34bd538 100644 --- a/tests/benchmark/fixtures/ScaleFixture.h +++ b/tests/benchmark/fixtures/ScaleFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,6 @@ template <typename TensorType, typename Function, typename Accessor> class ScaleFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy) { constexpr float max_width = 8192.0f; diff --git a/tests/benchmark/fixtures/ScaleLayerFixture.h b/tests/benchmark/fixtures/ScaleLayerFixture.h index a6a798f5b1..59cc12c77d 100644 --- a/tests/benchmark/fixtures/ScaleLayerFixture.h +++ b/tests/benchmark/fixtures/ScaleLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,6 @@ template <typename TensorType, typename Function, typename Accessor, typename T> class ScaleLayerFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, float sx, float sy, DataType data_type) { constexpr float max_width = 8192.0f; @@ -63,8 +62,8 @@ public: scale_layer.configure(&src, &dst, ScaleKernelInfo{ policy, border_mode, constant_border_value, sampling_policy }); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); diff --git a/tests/benchmark_examples/RunExample.cpp b/tests/benchmark_examples/RunExample.cpp index e5dfe74d0e..3e56ea2e64 100644 --- a/tests/benchmark_examples/RunExample.cpp +++ b/tests/benchmark_examples/RunExample.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021,2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #define BENCHMARK_EXAMPLES #include "utils/Utils.cpp" +#include "arm_compute/core/Version.h" #include "arm_compute/runtime/Scheduler.h" #include "tests/framework/Framework.h" #include "tests/framework/Macros.h" @@ -128,7 +129,17 @@ int run_example(int argc, char **argv, std::unique_ptr<Example> example) CLGEMMHeuristicsHandle gemm_h; if(opencl_is_available()) { - auto ctx_dev_err = create_opencl_context_and_device(); + CLBackendType backend_type = CLBackendType::Native; + for(auto &arg : example_args->value()) + { + if(arg.find("--target=clvk") != std::string::npos) + { + backend_type = CLBackendType::Clvk; + break; + } + } + + auto ctx_dev_err = create_opencl_context_and_device(backend_type); ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context"); CLScheduler::get() .default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), nullptr, &gemm_h); diff --git a/tests/datasets/ActivationFunctionsDataset.h b/tests/datasets/ActivationFunctionsDataset.h index 1f3313c476..9b0d775376 100644 --- a/tests/datasets/ActivationFunctionsDataset.h +++ b/tests/datasets/ActivationFunctionsDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,7 +53,10 @@ public: ActivationLayerInfo::ActivationFunction::SQRT, ActivationLayerInfo::ActivationFunction::SQUARE, ActivationLayerInfo::ActivationFunction::TANH, - ActivationLayerInfo::ActivationFunction::IDENTITY + ActivationLayerInfo::ActivationFunction::IDENTITY, +#ifdef __aarch64__ + ActivationLayerInfo::ActivationFunction::GELU, +#endif /* __aarch64__ */ }) { } diff --git a/tests/datasets/BatchToSpaceDataset.h b/tests/datasets/BatchToSpaceDataset.h index 1edd457aad..2670af50df 100644 --- a/tests/datasets/BatchToSpaceDataset.h +++ b/tests/datasets/BatchToSpaceDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,15 +38,17 @@ namespace datasets class BatchToSpaceLayerDataset { public: - using type = std::tuple<TensorShape, TensorShape, TensorShape>; + using type = std::tuple<TensorShape, std::vector<int32_t>, CropInfo, TensorShape>; struct iterator { - iterator(std::vector<TensorShape>::const_iterator src_it, - std::vector<TensorShape>::const_iterator block_shape_it, - std::vector<TensorShape>::const_iterator dst_it) + iterator(std::vector<TensorShape>::const_iterator src_it, + std::vector<std::vector<int32_t>>::const_iterator block_shape_it, + std::vector<CropInfo>::const_iterator crop_info_it, + std::vector<TensorShape>::const_iterator dst_it) : _src_it{ std::move(src_it) }, _block_shape_it{ std::move(block_shape_it) }, + _crop_info_it{ std::move(crop_info_it) }, _dst_it{ std::move(dst_it) } { } @@ -56,44 +58,48 @@ public: std::stringstream description; description << "In=" << *_src_it << ":"; description << "BlockShape=" << *_block_shape_it << ":"; + description << "CropInfo=" << *_crop_info_it << ":"; description << "Out=" << *_dst_it; return description.str(); } BatchToSpaceLayerDataset::type operator*() const { - return std::make_tuple(*_src_it, *_block_shape_it, *_dst_it); + return std::make_tuple(*_src_it, *_block_shape_it, *_crop_info_it, *_dst_it); } iterator &operator++() { ++_src_it; ++_block_shape_it; + ++_crop_info_it; ++_dst_it; return *this; } private: - std::vector<TensorShape>::const_iterator _src_it; - std::vector<TensorShape>::const_iterator _block_shape_it; - std::vector<TensorShape>::const_iterator _dst_it; + std::vector<TensorShape>::const_iterator _src_it; + std::vector<std::vector<int32_t>>::const_iterator _block_shape_it; + std::vector<CropInfo>::const_iterator _crop_info_it; + std::vector<TensorShape>::const_iterator _dst_it; }; iterator begin() const { - return iterator(_src_shapes.begin(), _block_shape_shapes.begin(), _dst_shapes.begin()); + return iterator(_src_shapes.begin(), _block_shapes.begin(), _crop_infos.begin(), _dst_shapes.begin()); } int size() const { - return std::min(_src_shapes.size(), std::min(_block_shape_shapes.size(), _dst_shapes.size())); + return std::min(std::min(std::min(_src_shapes.size(), _block_shapes.size()), _crop_infos.size()), _dst_shapes.size()); } - void add_config(TensorShape src, TensorShape block_shape, TensorShape dst) + void add_config(const TensorShape &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst) { _src_shapes.emplace_back(std::move(src)); - _block_shape_shapes.emplace_back(std::move(block_shape)); + _block_shapes.emplace_back(std::move(block_shape)); + _crop_infos.emplace_back(std::move(crop_info)); _dst_shapes.emplace_back(std::move(dst)); } @@ -102,35 +108,60 @@ protected: BatchToSpaceLayerDataset(BatchToSpaceLayerDataset &&) = default; private: - std::vector<TensorShape> _src_shapes{}; - std::vector<TensorShape> _block_shape_shapes{}; - std::vector<TensorShape> _dst_shapes{}; + std::vector<TensorShape> _src_shapes{}; + std::vector<std::vector<int32_t>> _block_shapes{}; + std::vector<CropInfo> _crop_infos{}; + std::vector<TensorShape> _dst_shapes{}; }; +/** Follow NCHW data layout across all datasets. I.e. + * TensorShape(Width(X), Height(Y), Channel(Z), Batch(W)) + */ + class SmallBatchToSpaceLayerDataset final : public BatchToSpaceLayerDataset { public: SmallBatchToSpaceLayerDataset() { - add_config(TensorShape(1U, 1U, 1U, 4U), TensorShape(2U), TensorShape(2U, 2U, 1U, 1U)); - add_config(TensorShape(3U, 1U, 1U, 4U), TensorShape(2U), TensorShape(6U, 2U, 1U, 1U)); - add_config(TensorShape(1U, 2U, 2U, 4U), TensorShape(2U), TensorShape(2U, 4U, 2U, 1U)); - add_config(TensorShape(1U, 3U, 1U, 8U), TensorShape(2U), TensorShape(2U, 6U, 1U, 2U)); - add_config(TensorShape(3U, 4U, 1U, 4U), TensorShape(2U), TensorShape(6U, 8U, 1U, 1U)); - add_config(TensorShape(1U, 1U, 1U, 8U), TensorShape(4U, 2U), TensorShape(4U, 2U, 1U, 1U)); - add_config(TensorShape(3U, 1U, 1U, 8U), TensorShape(2U, 4U), TensorShape(6U, 4U, 1U, 1U)); + // Block size = 1 (effectively no batch to space) + add_config(TensorShape(1U, 1U, 1U, 4U), { 1U, 1U }, CropInfo(), TensorShape(1U, 1U, 1U, 4U)); + add_config(TensorShape(8U, 2U, 4U, 3U), { 1U, 1U }, CropInfo(), TensorShape(8U, 2U, 4U, 3U)); + // Same block size in both x and y + add_config(TensorShape(3U, 2U, 1U, 4U), { 2U, 2U }, CropInfo(), TensorShape(6U, 4U, 1U, 1U)); + add_config(TensorShape(1U, 3U, 2U, 9U), { 3U, 3U }, CropInfo(), TensorShape(3U, 9U, 2U, 1U)); + // Different block size in x and y + add_config(TensorShape(5U, 7U, 7U, 4U), { 2U, 1U }, CropInfo(), TensorShape(10U, 7U, 7U, 2U)); + add_config(TensorShape(3U, 3U, 1U, 8U), { 1U, 2U }, CropInfo(), TensorShape(3U, 6U, 1U, 4U)); + add_config(TensorShape(5U, 2U, 2U, 6U), { 3U, 2U }, CropInfo(), TensorShape(15U, 4U, 2U, 1U)); } }; +/** Relative small shapes that are still large enough to leave room for testing cropping of the output shape + */ +class SmallBatchToSpaceLayerWithCroppingDataset final : public BatchToSpaceLayerDataset +{ +public: + SmallBatchToSpaceLayerWithCroppingDataset() + { + // Crop in both dims + add_config(TensorShape(5U, 3U, 2U, 8U), { 2U, 2U }, CropInfo(1U, 1U, 2U, 1U), TensorShape(8U, 3U, 2U, 2U)); + // Left crop in x dim + add_config(TensorShape(1U, 1U, 1U, 20U), { 4U, 5U }, CropInfo(2U, 1U, 0U, 2U), TensorShape(1U, 3U, 1U, 1U)); + // Left crop in y dim + add_config(TensorShape(3U, 1U, 1U, 8U), { 2U, 4U }, CropInfo(0U, 0U, 2U, 1U), TensorShape(6U, 1U, 1U, 1U)); + } +}; class LargeBatchToSpaceLayerDataset final : public BatchToSpaceLayerDataset { public: LargeBatchToSpaceLayerDataset() { - add_config(TensorShape(64U, 32U, 2U, 4U), TensorShape(2U), TensorShape(128U, 64U, 2U, 1U)); - add_config(TensorShape(128U, 16U, 2U, 16U), TensorShape(2U), TensorShape(512U, 64U, 2U, 1U)); - add_config(TensorShape(16U, 8U, 2U, 8U), TensorShape(4U, 2U), TensorShape(64U, 16U, 2U, 1U)); - add_config(TensorShape(8U, 16U, 2U, 8U), TensorShape(2U, 4U), TensorShape(16U, 64U, 2U, 1U)); + // Same block size in both x and y + add_config(TensorShape(64U, 32U, 2U, 4U), { 2U, 2U }, CropInfo(), TensorShape(128U, 64U, 2U, 1U)); + add_config(TensorShape(128U, 16U, 2U, 18U), { 3U, 3U }, CropInfo(), TensorShape(384U, 48U, 2U, 2U)); + // Different block size in x and y + add_config(TensorShape(16U, 8U, 2U, 8U), { 4U, 1U }, CropInfo(), TensorShape(64U, 8U, 2U, 2U)); + add_config(TensorShape(8U, 16U, 2U, 8U), { 2U, 4U }, CropInfo(), TensorShape(16U, 64U, 2U, 1U)); } }; } // namespace datasets diff --git a/tests/datasets/ChannelShuffleLayerDataset.h b/tests/datasets/ChannelShuffleLayerDataset.h index afab893234..a851480fa1 100644 --- a/tests/datasets/ChannelShuffleLayerDataset.h +++ b/tests/datasets/ChannelShuffleLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -105,6 +105,7 @@ class SmallRandomChannelShuffleLayerDataset final : public ChannelShuffleLayerDa public: SmallRandomChannelShuffleLayerDataset() { + add_config(TensorShape(1U, 1U, 605U, 16U), 5); add_config(TensorShape(15U, 16U, 4U, 12U), 2); add_config(TensorShape(21U, 11U, 12U, 7U), 4); add_config(TensorShape(21U, 11U, 12U, 7U), 6); diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h index 86804fb4c6..17e03368ac 100644 --- a/tests/datasets/DepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET -#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET +#ifndef ACL_TESTS_DATASETS_DEPTHWISECONVOLUTIONLAYERDATASET_H +#define ACL_TESTS_DATASETS_DEPTHWISECONVOLUTIONLAYERDATASET_H #include "utils/TypePrinter.h" @@ -121,13 +121,13 @@ public: SmallDepthwiseConvolutionLayerDataset() { add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(23U, 27U, 5U), Size2D(3U, 5U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(3U, 3U, 2U), Size2D(2U, 2U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(33U, 27U, 7U), Size2D(7U, 3U), PadStrideInfo(3, 2, 1, 0)); // Asymmetric padding add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); // Ceil rounding - add_config(TensorShape(7U, 8U, 5U, 9U), Size2D(8U, 6U), PadStrideInfo(2, 3, 1, 1, 1, 3, DimensionRoundingType::CEIL), Size2D(1U, 2U)); + add_config(TensorShape(7U, 8U, 5U, 9U), Size2D(8U, 6U), PadStrideInfo(2, 3, 1, 1, 1, 3, DimensionRoundingType::CEIL)); } }; @@ -138,20 +138,50 @@ public: LargeDepthwiseConvolutionLayerDataset() { add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 1)); - add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(17U, 31U, 2U), Size2D(13U, 9U), PadStrideInfo(1, 2, 1, 1)); add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0)); add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), Size2D(1U, 1U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 4U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1)); + add_config(TensorShape(133U, 127U, 55U), Size2D(1U, 1U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(233U, 109U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 111U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 87U, 55U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 79U, 77U), Size2D(3U, 4U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(67U, 211U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1)); // Asymmetric padding add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + // Padding greater than kernel size. + add_config(TensorShape(128, 56, 56), Size2D(4, 4), PadStrideInfo(2, 2, 0, 10, 0, 10, DimensionRoundingType::FLOOR)); + } +}; + +class LargeDepthwiseConvolutionLayerDatasetFp16Subset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeDepthwiseConvolutionLayerDatasetFp16Subset() + { + add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 109U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 111U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(67U, 211U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1)); + // Asymmetric padding + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); + // Padding greater than kernel size. + add_config(TensorShape(128, 56, 56), Size2D(4, 4), PadStrideInfo(2, 2, 0, 10, 0, 10, DimensionRoundingType::FLOOR)); + } +}; + +/** Dataset containing large kernel size for generic depthwise convolution. */ +class LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset() + { + add_config(TensorShape(6U, 210U, 8U), Size2D(4U, 194U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -186,22 +216,39 @@ class LargeDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvoluti public: LargeDepthwiseConvolutionLayerDataset3x3() { - add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1)); - add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1)); add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 0)); + add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 2)); + + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 2)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1)); - add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1)); - add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1)); - add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U, 5U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1)); - // Width and height are a multipile of the processing tile size + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 2, 1)); + + add_config(TensorShape(77U, 209U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(123U, 76U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(133U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 0)); + add_config(TensorShape(77U, 95U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1)); + + // Width and height are a multiple of the processing tile size + add_config(TensorShape(32U, 21U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1)); + } +}; + +class LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset() + { + add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 2)); + + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 2)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1)); + + add_config(TensorShape(123U, 76U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(77U, 95U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1)); + + // Width and height are a multiple of the processing tile size add_config(TensorShape(32U, 21U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1)); } }; @@ -221,8 +268,6 @@ public: add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, DimensionRoundingType::CEIL)); - // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported - // add_config(TensorShape(9U, 9U, 1U), Size2D(3U, 3U), PadStrideInfo(2, 2, 2, 2, DimensionRoundingType::CEIL), Size2D(2U, 2U)); } }; /** Dataset containing optimized, 3x3 depthwise convolution shapes. */ @@ -232,14 +277,14 @@ public: LargeOptimizedDepthwiseConvolutionLayerDataset3x3() { // Stride 1 - add_config(TensorShape(233U, 277U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(233U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(233U, 173U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(133U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); add_config(TensorShape(7U, 7U, 21U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); add_config(TensorShape(28U, 28U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); add_config(TensorShape(28U, 28U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); // Stride 2 - add_config(TensorShape(233U, 277U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(233U, 277U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(133U, 97U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(153U, 77U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL)); add_config(TensorShape(8U, 8U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::FLOOR)); add_config(TensorShape(8U, 8U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); add_config(TensorShape(8U, 8U, 33U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); @@ -260,14 +305,31 @@ public: add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U)); // Stride 2 add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); - // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported - // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U)); add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 2, 2, 2, 2, DimensionRoundingType::CEIL)); - // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported - // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U)); + } +}; + +/** Dataset containing in-place 1x1 depthwise convolution shapes. + * + * For a depthwise convolution op to be in-place: + * * Output has the same shape as the input; + * * 1x1 filter + * * stride == 1 + * * dilations == 1 + * * No paddings +*/ +class SmallInPlaceDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + SmallInPlaceDepthwiseConvolutionLayerDataset() + { + add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(11U, 13U, 16U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0)); } }; } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET */
\ No newline at end of file +#endif // ACL_TESTS_DATASETS_DEPTHWISECONVOLUTIONLAYERDATASET_H diff --git a/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h b/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h index 9e2a3cf548..a58650a5e4 100644 --- a/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET -#define ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET +#ifndef ACL_TESTS_DATASETS_DILATEDDEPTHWISECONVOLUTIONLAYERDATASET_H +#define ACL_TESTS_DATASETS_DILATEDDEPTHWISECONVOLUTIONLAYERDATASET_H #include "utils/TypePrinter.h" @@ -48,6 +48,7 @@ public: add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 2U), PadStrideInfo(1, 1, 0, 0), Size2D(2U, 1U)); add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 2U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U)); add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 2U), PadStrideInfo(2, 2, 0, 0), Size2D(1U, 2U)); + add_config(TensorShape(7U, 8U, 5U, 9U), Size2D(8U, 6U), PadStrideInfo(2, 3, 1, 1, 1, 3, DimensionRoundingType::CEIL), Size2D(1U, 2U)); add_config(TensorShape(7U, 8U, 1U), Size2D(2U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(2U, 2U)); add_config(TensorShape(23U, 27U, 5U), Size2D(3U, 5U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 1U)); @@ -96,15 +97,16 @@ public: LargeDepthwiseDilatedConvolutionLayerDataset() { add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U)); - add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(1U, 2U)); add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(1U, 3U)); + add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(1U, 2U)); add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); - add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U)); - add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 0), Size2D(3U, 2U)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); - add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 2U)); - add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1), Size2D(2U, 2U)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 5U)); + + add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U)); + add_config(TensorShape(233U, 177U, 77U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 0), Size2D(3U, 2U)); + add_config(TensorShape(77U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); + add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 2U)); + add_config(TensorShape(233U, 177U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1), Size2D(2U, 2U)); + add_config(TensorShape(177U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 5U)); // Asymmetric padding add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR), Size2D(3U, 2U)); add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR), Size2D(4U, 4U)); @@ -113,6 +115,24 @@ public: } }; +class LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset() + { + add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U)); + add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(1U, 3U)); + add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); + + add_config(TensorShape(77U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); + add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 2U)); + add_config(TensorShape(177U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 5U)); + // Asymmetric padding + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR), Size2D(4U, 4U)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR), Size2D(2U, 2U)); + } +}; + /** Dataset containing large, 3x3 depthwise convolution shapes with dilation. */ class LargeDepthwiseDilatedConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset { @@ -120,24 +140,44 @@ public: LargeDepthwiseDilatedConvolutionLayerDataset3x3() { add_config(TensorShape(32U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1), Size2D(2U, 1U)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1), Size2D(2U, 2U)); - add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0), Size2D(2U, 2U)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 3U)); - add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 0), Size2D(2U, 1U)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1), Size2D(3U, 3U)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 2U)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1), Size2D(4U, 4U)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1), Size2D(2U, 5U)); + + add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0), Size2D(2U, 2U)); + add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 0), Size2D(2U, 1U)); add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 0), Size2D(2U, 2U)); + + add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 5U)); + add_config(TensorShape(233U, 77U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 0), Size2D(4U, 4U)); + add_config(TensorShape(77U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(4U, 4U)); + add_config(TensorShape(77U, 111U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(3U, 3U)); + } +}; + +class LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset final : public DepthwiseConvolutionLayerDataset +{ +public: + LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset() + { + add_config(TensorShape(32U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1), Size2D(2U, 1U)); + + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1), Size2D(3U, 3U)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1), Size2D(4U, 4U)); add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1), Size2D(2U, 5U)); - add_config(TensorShape(233U, 277U, 55U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0), Size2D(3U, 3U)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(4U, 4U)); - add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 5U)); - add_config(TensorShape(333U, 277U, 77U, 5U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1), Size2D(4U, 4U)); - add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(3U, 3U)); + + add_config(TensorShape(21U, 31U, 9U, 10U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 0), Size2D(2U, 2U)); + + add_config(TensorShape(77U, 111U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(3U, 3U)); } }; } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET */
\ No newline at end of file +#endif // ACL_TESTS_DATASETS_DILATEDDEPTHWISECONVOLUTIONLAYERDATASET_H diff --git a/tests/datasets/DynamicFusionDataset.h b/tests/datasets/DynamicFusionDataset.h new file mode 100644 index 0000000000..5a1453b9ab --- /dev/null +++ b/tests/datasets/DynamicFusionDataset.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TESTS_DATASETS_DYNAMICFUSIONDATASET +#define TESTS_DATASETS_DYNAMICFUSIONDATASET + +#include "utils/TypePrinter.h" + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class DynamicFusionThreeInputs +{ +public: + using type = std::tuple<TensorShape, TensorShape, TensorShape>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator shape0_it, + std::vector<TensorShape>::const_iterator shape1_it, + std::vector<TensorShape>::const_iterator shape2_it) + : _shape0_it{ std::move(shape0_it) }, + _shape1_it{ std::move(shape1_it) }, + _shape2_it{ std::move(shape2_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "shape0=" << *_shape0_it << ":"; + description << "shape1=" << *_shape1_it << ":"; + description << "shape2=" << *_shape2_it << ":"; + + return description.str(); + } + + DynamicFusionThreeInputs::type operator*() const + { + return std::make_tuple(*_shape0_it, *_shape1_it, *_shape2_it); + } + + iterator &operator++() + { + ++_shape0_it; + ++_shape1_it; + ++_shape2_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _shape0_it; + std::vector<TensorShape>::const_iterator _shape1_it; + std::vector<TensorShape>::const_iterator _shape2_it; + }; + + iterator begin() const + { + return iterator(_shape0_shapes.begin(), _shape1_shapes.begin(), _shape2_shapes.begin()); + } + + int size() const + { + return std::min(_shape0_shapes.size(), std::min(_shape1_shapes.size(), _shape2_shapes.size())); + } + + void add_config(TensorShape shape0, TensorShape shape1, TensorShape shape2) + { + _shape0_shapes.emplace_back(std::move(shape0)); + _shape1_shapes.emplace_back(std::move(shape1)); + _shape2_shapes.emplace_back(std::move(shape2)); + } + +protected: + DynamicFusionThreeInputs() = default; + DynamicFusionThreeInputs(DynamicFusionThreeInputs &&) = default; + +private: + std::vector<TensorShape> _shape0_shapes{}; + std::vector<TensorShape> _shape1_shapes{}; + std::vector<TensorShape> _shape2_shapes{}; +}; + +class DynamicFusionElementwiseBinaryTwoOpsSmallShapes final : public DynamicFusionThreeInputs +{ +public: + DynamicFusionElementwiseBinaryTwoOpsSmallShapes() + { + add_config(TensorShape{ 9U, 9U, 5U }, TensorShape{ 9U, 9U, 5U }, TensorShape{ 9U, 9U, 5U }); + add_config(TensorShape{ 9U, 9U, 5U }, TensorShape{ 1U, 1U, 1U } /* Broadcast in X, Y, Z*/, TensorShape{ 9U, 9U, 5U }); + add_config(TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 1U, 1U } /* Broadcast in Y and Z*/, TensorShape{ 27U, 13U, 2U }); + add_config(TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 1U, 1U } /* Broadcast in Y and Z*/); + } +}; + +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* TESTS_DATASETS_DYNAMICFUSIONDATASET */ diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h index 7ab068c211..b0ad4879ba 100644 --- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h +++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET -#define ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET +#ifndef ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H +#define ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H #include "utils/TypePrinter.h" @@ -40,21 +40,17 @@ namespace datasets class GEMMLowpFusedOffsetOutputDataset { public: - using type = std::tuple<TensorShape, TensorShape, TensorShape, int32_t, int32_t, GEMMLowpOutputStageInfo>; + using type = std::tuple<TensorShape, TensorShape, TensorShape, GEMMLowpOutputStageType>; struct iterator { iterator(std::vector<TensorShape>::const_iterator a_it, std::vector<TensorShape>::const_iterator b_it, std::vector<TensorShape>::const_iterator c_it, - std::vector<int32_t>::const_iterator a_offset_it, - std::vector<int32_t>::const_iterator b_offset_it, - std::vector<GEMMLowpOutputStageInfo>::const_iterator output_stage_it) + std::vector<GEMMLowpOutputStageType>::const_iterator output_stage_it) : _a_it{ std::move(a_it) }, _b_it{ std::move(b_it) }, _c_it{ std::move(c_it) }, - _a_offset_it{ std::move(a_offset_it) }, - _b_offset_it{ std::move(b_offset_it) }, _output_stage_it{ std::move(output_stage_it) } { } @@ -65,33 +61,14 @@ public: description << "A=" << *_a_it << ":"; description << "B=" << *_b_it << ":"; description << "C=" << *_c_it << ":"; - description << "a_offset=" << *_a_offset_it << ":"; - description << "b_offset=" << *_b_offset_it << ":"; - description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":"; - description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":"; - description << "output_multiplier={"; - for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it) - { - description << (*it) << ", "; - } - description << "}:"; - description << "output_shift={"; - - for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it) - { - description << (*it) << ", "; - } - description << "}:"; - description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":"; - description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":"; - description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":"; + description << "output_type=" << string_from_gemmlowp_output_stage(*_output_stage_it) << ":"; return description.str(); } GEMMLowpFusedOffsetOutputDataset::type operator*() const { - return std::make_tuple(*_a_it, *_b_it, *_c_it, *_a_offset_it, *_b_offset_it, *_output_stage_it); + return std::make_tuple(*_a_it, *_b_it, *_c_it, *_output_stage_it); } iterator &operator++() @@ -99,8 +76,6 @@ public: ++_a_it; ++_b_it; ++_c_it; - ++_a_offset_it; - ++_b_offset_it; ++_output_stage_it; return *this; @@ -110,45 +85,27 @@ public: std::vector<TensorShape>::const_iterator _a_it; std::vector<TensorShape>::const_iterator _b_it; std::vector<TensorShape>::const_iterator _c_it; - std::vector<int32_t>::const_iterator _a_offset_it; - std::vector<int32_t>::const_iterator _b_offset_it; - std::vector<GEMMLowpOutputStageInfo>::const_iterator _output_stage_it; + std::vector<GEMMLowpOutputStageType>::const_iterator _output_stage_it; }; iterator begin() const { - return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _a_offset.begin(), _b_offset.begin(), _output_stage.begin()); + return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _output_stage.begin()); } int size() const { - return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), std::min(_a_offset.size(), std::min(_b_offset.size(), _output_stage.size()))))); + return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), _output_stage.size()))); } - void add_config(TensorShape a, TensorShape b, TensorShape c, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) + void add_config(TensorShape a, TensorShape b, TensorShape c, GEMMLowpOutputStageType output_stage) { _a_shapes.emplace_back(std::move(a)); _b_shapes.emplace_back(std::move(b)); _c_shapes.emplace_back(std::move(c)); - _a_offset.emplace_back(std::move(a_offset)); - _b_offset.emplace_back(std::move(b_offset)); _output_stage.emplace_back(std::move(output_stage)); } - GEMMLowpOutputStageInfo OutputStageInfo(GEMMLowpOutputStageType type, int32_t offset, int32_t multiplier, int32_t shift, int32_t min, int32_t max) - { - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(); - output_stage.type = type; - output_stage.gemmlowp_offset = offset; - output_stage.gemmlowp_multiplier = multiplier; - output_stage.gemmlowp_shift = shift; - output_stage.gemmlowp_min_bound = min; - output_stage.gemmlowp_max_bound = max; - output_stage.gemmlowp_multipliers.push_back(multiplier); - output_stage.gemmlowp_shifts.push_back(shift); - return output_stage; - } - protected: GEMMLowpFusedOffsetOutputDataset() = default; GEMMLowpFusedOffsetOutputDataset(GEMMLowpFusedOffsetOutputDataset &&) = default; @@ -157,9 +114,7 @@ private: std::vector<TensorShape> _a_shapes{}; std::vector<TensorShape> _b_shapes{}; std::vector<TensorShape> _c_shapes{}; - std::vector<int32_t> _a_offset{}; - std::vector<int32_t> _b_offset{}; - std::vector<GEMMLowpOutputStageInfo> _output_stage{}; + std::vector<GEMMLowpOutputStageType> _output_stage{}; }; class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset @@ -167,45 +122,72 @@ class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff public: SmallGEMMLowpFusedOffsetOutputUint8Dataset() { - add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210)); - add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210)); - add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 200, 2, 13, 10, 210)); - add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210)); - - add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, 210)); - add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, 210)); - add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 10, 10, 210)); - add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, 10, 210)); + add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; -class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffsetOutputDataset +class SmallGEMMLowpFusedBatchedMatMulDataset final : public GEMMLowpFusedOffsetOutputDataset { public: - SmallGEMMLowpFusedOffsetOutputInt8Dataset() + SmallGEMMLowpFusedBatchedMatMulDataset() + { + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + } +}; + +class SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset +{ +public: + SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset() + { + add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + } +}; + +class SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset +{ +public: + SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset() { - add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 13, -10, 110)); - add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110)); - add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110)); - add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -40, 2, 13, -10, 110)); - - add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 10, -10, 110)); - add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 10, -10, 110)); - add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 10, -10, 110)); - add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, -10, 110)); + add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; -class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset +class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffsetOutputDataset { public: - SmallGEMMLowpFusedOffsetOutputPerChannelDataset() + SmallGEMMLowpFusedOffsetOutputInt8Dataset() { - add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210)); - add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210)); - add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210)); - add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210)); - add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210)); + add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; @@ -214,15 +196,12 @@ class LargeGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff public: LargeGEMMLowpFusedOffsetOutputUint8Dataset() { - add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 18, 10, 210)); - add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 18, 10, 210)); - add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 200, 2, 18, 10, 210)); - add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 18, 10, 210)); - - add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 15, 10, 210)); - add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 15, 10, 210)); - add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, 10, 210)); - add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 15, 10, 210)); + add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + } }; @@ -231,18 +210,17 @@ class LargeGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs public: LargeGEMMLowpFusedOffsetOutputInt8Dataset() { - add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110)); - add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110)); - add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110)); - add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110)); - - add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 15, -10, 110)); - add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 15, -10, 110)); - add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 15, -10, 110)); - add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, -10, 110)); + add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), GEMMLowpOutputStageType::QUANTIZE_DOWN); + add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); + add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT); } }; } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET */ +#endif // ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H diff --git a/tests/datasets/GatherDataset.h b/tests/datasets/GatherDataset.h index 29a99d5239..74ea3b4a06 100644 --- a/tests/datasets/GatherDataset.h +++ b/tests/datasets/GatherDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -106,6 +106,64 @@ private: std::vector<int> _axis{}; }; +class SmallGatherMultiDimIndicesDataset final : public GatherDataset +{ +public: + SmallGatherMultiDimIndicesDataset() + { + add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 1); + add_config(TensorShape(15U, 15U), TensorShape(3U, 2U, 2U), 1); + add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 1); + add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 1); + add_config(TensorShape(1U, 5U, 3U), TensorShape(1U, 7U, 3U), 1); + + add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 0); + add_config(TensorShape(9U), TensorShape(3U, 2U, 4U), 0); + add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 0); + + add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U), 2); + add_config(TensorShape(8U, 2U, 3U), TensorShape(4U, 2U, 5U), 2); + } +}; + +class CLSmallGatherMultiDimIndicesDataset final : public GatherDataset +{ +public: + CLSmallGatherMultiDimIndicesDataset() + { + add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 0); + add_config(TensorShape(15U, 15U), TensorShape(3U, 2U, 2U), 0); + add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 0); + add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 0); + + add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 0); + add_config(TensorShape(9U), TensorShape(3U, 2U, 4U), 0); + add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 0); + + add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U),0); + + add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 1); + add_config(TensorShape(15U, 15U), TensorShape(3U, 2U, 2U), 1); + add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 1); + add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 1); + + add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 1); + add_config(TensorShape(9U), TensorShape(3U, 2U, 4U), 1); + add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 1); + + add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U),1); + + add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 2); + add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 2); + add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 2); + + add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 2); + add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 2); + + add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U),2); + } +}; + class SmallGatherDataset final : public GatherDataset { public: diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 1cffc9a221..c299f2460b 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET -#define ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET +#ifndef ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H +#define ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H #include "tests/datasets/ConvolutionLayerDataset.h" @@ -44,18 +44,31 @@ public: { // Kernel size 3 // Batch size 1 - add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 256U), TensorShape(256U), TensorShape(51U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 3U, 256U, 256U), TensorShape(256U), TensorShape(54U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(12U, 12U, 512U), PadStrideInfo(1, 1, 0, 0)); - // Batch size 3, 2 and 4 - add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(110U, 111U, 128U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 128U), TensorShape(128U), TensorShape(51U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 1, 1)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1)); + } +}; + +class LargeWinogradConvolutionLayer3x3DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer3x3DatasetFp16Subset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1)); } }; @@ -66,18 +79,31 @@ public: { // Kernel size 3 // Batch size 1 - add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 1U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 256U), TensorShape(256U), TensorShape(51U, 56U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 1U, 256U, 256U), TensorShape(256U), TensorShape(56U, 56U, 256U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 512U), TensorShape(512U), TensorShape(26U, 28U, 512U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(12U, 14U, 512U), PadStrideInfo(1, 1, 0, 0)); - // Batch size 3, 2 and 4 - add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 0)); - add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(110U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 1U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 1U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 1U, 128U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0)); + } +}; + +class LargeWinogradConvolutionLayer3x1DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer3x1DatasetFp16Subset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0)); } }; @@ -88,18 +114,31 @@ public: { // Kernel size 3 // Batch size 1 - add_config(TensorShape(224U, 222U, 64U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(112U, 113U, 64U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(112U, 112U, 128U), TensorShape(1U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 256U), TensorShape(256U), TensorShape(53U, 56U, 256U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(56U, 56U, 256U), TensorShape(1U, 3U, 256U, 256U), TensorShape(256U), TensorShape(56U, 54U, 256U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(28U, 28U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(14U, 12U, 512U), PadStrideInfo(1, 1, 0, 0)); - // Batch size 3, 2 and 4 - add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 1)); - add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(224U, 222U, 32U), TensorShape(1U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 113U, 32U), TensorShape(1U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(1U, 3U, 128U, 128U), TensorShape(128U), TensorShape(56U, 54U, 128U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1)); + } +}; + +class LargeWinogradConvolutionLayer1x3DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x3DatasetFp16Subset() + { + // Kernel size 3 + // Batch size 1 + add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1)); + add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1)); + + // Batch > 1 + add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1)); } }; @@ -110,15 +149,27 @@ public: { // Kernel size 5 // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 32U), TensorShape(32U), TensorShape(220U, 220U, 32U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U, 2U), PadStrideInfo(1, 1, 0, 0)); + // Batch > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2)); + } +}; + +class LargeWinogradConvolutionLayer5x5DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer5x5DatasetFp16Subset() + { + // Kernel size 5 + // Batch size 1 + add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2)); + + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -128,15 +179,26 @@ public: LargeWinogradConvolutionLayer5x1Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 2, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 0)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 2, 0)); + // Batch > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0)); + } +}; + +class LargeWinogradConvolutionLayer5x1DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer5x1DatasetFp16Subset() + { + // Batch size 1 + add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0)); + add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0)); + + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -146,15 +208,12 @@ public: LargeWinogradConvolutionLayer7x1Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(218U, 224U, 64U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 3, 0)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 32U), TensorShape(32U), TensorShape(218U, 224U, 32U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(7U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 3, 0)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 3, 0)); + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 3, 0)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -164,15 +223,26 @@ public: LargeWinogradConvolutionLayer1x7Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 218U, 64U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 0, 3)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 32U), TensorShape(32U), TensorShape(224U, 218U, 32U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 3)); + // Batch > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3)); + } +}; + +class LargeWinogradConvolutionLayer1x7DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x7DatasetFp16Subset() + { + // Batch size 1 + add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3)); + + // Batch > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -182,15 +252,26 @@ public: LargeWinogradConvolutionLayer1x5Dataset() { // Batch size 1 - add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 0, 2)); - add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2)); add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 2)); - // Batch size 2, 3 and 4 - add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 2)); + // Batch size > 1 + add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0)); + } +}; + +class LargeWinogradConvolutionLayer1x5DatasetFp16Subset final : public ConvolutionLayerDataset +{ +public: + LargeWinogradConvolutionLayer1x5DatasetFp16Subset() + { + // Batch size 1 + add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2)); + add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0)); + + // Batch size > 1 add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0)); } }; @@ -213,6 +294,16 @@ public: } }; +class VeryLargeConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + VeryLargeConvolutionLayerDataset() + { + // Tensor size > 1e7 bytes && weight dimensions > 7 + add_config(TensorShape(336U, 336U, 32U), TensorShape(9U, 9U, 32U, 64U), TensorShape(64U), TensorShape(168U, 168U, 64U), PadStrideInfo(2, 2, 4, 4)); + } +}; + class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset { public: @@ -233,4 +324,4 @@ public: } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET */ +#endif // ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H diff --git a/tests/datasets/LargeGEMMDataset.h b/tests/datasets/LargeGEMMDataset.h index 6cdff7f559..e45319ef57 100644 --- a/tests/datasets/LargeGEMMDataset.h +++ b/tests/datasets/LargeGEMMDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_LARGE_GEMM_DATASET -#define ARM_COMPUTE_TEST_LARGE_GEMM_DATASET +#ifndef ACL_TESTS_DATASETS_LARGEGEMMDATASET_H +#define ACL_TESTS_DATASETS_LARGEGEMMDATASET_H #include "tests/datasets/GEMMDataset.h" @@ -79,7 +79,20 @@ public: add_config(TensorShape(1729U, 17U, 10U, 3U), TensorShape(128U, 1729U), TensorShape(128U), TensorShape(128U, 17U, 10U, 3U), 1.0f, 0.3f); } }; + +class LargeAccumulateGEMMDataset final : public GEMMDataset +{ +public: + LargeAccumulateGEMMDataset() + { + add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), TensorShape(871U, 429U), 1.0f, 0.0f); + add_config(TensorShape(1021U, 1U), TensorShape(783U, 1021U), TensorShape(783U, 1U), TensorShape(783U, 1U), 1.0f, 0.0f); + add_config(TensorShape(1021U, 1U), TensorShape(783U, 1021U), TensorShape(783U, 1U), TensorShape(783U, 1U), 1.0f, 0.0f); + add_config(TensorShape(941U, 1U), TensorShape(623U, 941U), TensorShape(623U, 1U), TensorShape(623U, 1U), 1.0f, 0.0f); + } +}; + } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_LARGE_GEMM_DATASET */ +#endif // ACL_TESTS_DATASETS_LARGEGEMMDATASET_H diff --git a/tests/datasets/LargeMatMulDataset.h b/tests/datasets/LargeMatMulDataset.h new file mode 100644 index 0000000000..8f6c000d37 --- /dev/null +++ b/tests/datasets/LargeMatMulDataset.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_DATASETS_LARGEMATMULDATASET +#define ACL_TESTS_DATASETS_LARGEMATMULDATASET + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/datasets/MatMulDataset.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class LargeMatMulDataset final : public MatMulDataset +{ +public: + LargeMatMulDataset() + { + add_config(TensorShape(21U, 13U, 3U, 2U), TensorShape(33U, 21U, 3U, 2U), TensorShape(33U, 13U, 3U, 2U)); + add_config(TensorShape(38U, 12U, 1U, 5U), TensorShape(21U, 38U, 1U, 5U), TensorShape(21U, 12U, 1U, 5U)); + add_config(TensorShape(45U, 38U, 3U, 2U), TensorShape(21U, 45U, 3U, 2U), TensorShape(21U, 38U, 3U, 2U)); + } +}; + +class HighDimensionalMatMulDataset final : public MatMulDataset +{ +public: + HighDimensionalMatMulDataset() + { + add_config(TensorShape(5U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor + } +}; + +class LargeMatMulDatasetRhsExportToCLImageRhsNT final : public MatMulDataset +{ +public: + // For shape choices, please refer to the explanations given in SmallMatMulDatasetRhsExportToCLImageRhsNT + LargeMatMulDatasetRhsExportToCLImageRhsNT() + { + add_config(TensorShape(21U, 13U, 3U, 2U), TensorShape(32U, 21U, 3U, 2U), TensorShape(32U, 13U, 3U, 2U)); + add_config(TensorShape(38U, 12U, 1U, 5U, 2U), TensorShape(20U, 38U, 1U, 5U, 2U), TensorShape(20U, 12U, 1U, 5U, 2U)); + add_config(TensorShape(45U, 38U, 3U, 2U, 3U), TensorShape(20U, 45U, 3U, 2U, 3U), TensorShape(20U, 38U, 3U, 2U, 3U)); + } +}; +class LargeMatMulDatasetRhsExportToCLImageRhsT final : public MatMulDataset +{ +public: + // For shape choices, please refer to the explanations given in SmallMatMulDatasetRhsExportToCLImageRhsT + LargeMatMulDatasetRhsExportToCLImageRhsT() + { + add_config(TensorShape(28U, 13U, 3U, 2U), TensorShape(32U, 28U, 3U, 2U), TensorShape(32U, 13U, 3U, 2U)); + add_config(TensorShape(40U, 12U, 1U, 5U, 2U), TensorShape(20U, 40U, 1U, 5U, 2U), TensorShape(20U, 12U, 1U, 5U, 2U)); + add_config(TensorShape(44U, 38U, 3U, 2U, 3U), TensorShape(20U, 44U, 3U, 2U, 3U), TensorShape(20U, 38U, 3U, 2U, 3U)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ACL_TESTS_DATASETS_LARGEMATMULDATASET */ diff --git a/tests/datasets/system_tests/yolo/v2/YOLOV2PoolingLayerDataset.h b/tests/datasets/LargeMatMulMMULDataset.h index 88cac5e449..23e0b3e5c8 100644 --- a/tests/datasets/system_tests/yolo/v2/YOLOV2PoolingLayerDataset.h +++ b/tests/datasets/LargeMatMulMMULDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_YOLOV2_POOLING_LAYER_DATASET -#define ARM_COMPUTE_TEST_YOLOV2_POOLING_LAYER_DATASET -#include "tests/datasets/PoolingLayerDataset.h" - -#include "utils/TypePrinter.h" +#ifndef ACL_TESTS_DATASETS_LARGEMATMULMMULDATASET +#define ACL_TESTS_DATASETS_LARGEMATMULMMULDATASET #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "tests/datasets/MatMulDataset.h" namespace arm_compute { @@ -37,24 +35,30 @@ namespace test { namespace datasets { -class YOLOV2PoolingLayerDataset final : public PoolingLayerDataset +/** MatMul MMUL shapes are similar to MatMul shapes except that K has to be a multiple of MMUL_K0 which is 4 (e.g. see src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp for the definition) + */ +class LargeMatMulMMULDataset final : public MatMulDataset +{ +public: + LargeMatMulMMULDataset() + { + add_config(TensorShape(24U, 13U, 3U, 2U), TensorShape(33U, 24U, 3U, 2U), TensorShape(33U, 13U, 3U, 2U)); + add_config(TensorShape(36U, 12U, 1U, 5U), TensorShape(21U, 36U, 1U, 5U), TensorShape(21U, 12U, 1U, 5U)); + add_config(TensorShape(44U, 38U, 3U, 2U), TensorShape(21U, 44U, 3U, 2U), TensorShape(21U, 38U, 3U, 2U)); + } +}; + +class HighDimensionalMatMulMMULDataset final : public MatMulDataset { public: - YOLOV2PoolingLayerDataset() + HighDimensionalMatMulMMULDataset() { - // pool1 - add_config(TensorShape(416U, 416U, 32U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - // pool2 - add_config(TensorShape(208U, 208U, 64U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - // pool5 - add_config(TensorShape(104U, 104U, 128U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - // pool8 - add_config(TensorShape(52U, 52U, 256U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); - // pool13 - add_config(TensorShape(26U, 26U, 512U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))); + add_config(TensorShape(4U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 4U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor } }; + } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_YOLOV2_POOLING_LAYER_DATASET */ + +#endif /* ACL_TESTS_DATASETS_LARGEMATMULMMULDATASET */ diff --git a/tests/datasets/MatMulDataset.h b/tests/datasets/MatMulDataset.h new file mode 100644 index 0000000000..9c1c5fb05d --- /dev/null +++ b/tests/datasets/MatMulDataset.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_DATASETS_MATMULDATASET +#define ACL_TESTS_DATASETS_MATMULDATASET + +#include "arm_compute/core/TensorShape.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class MatMulDataset +{ +public: + using type = std::tuple<TensorShape, TensorShape, TensorShape>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator a_it, + std::vector<TensorShape>::const_iterator b_it, + std::vector<TensorShape>::const_iterator dst_it) + : _a_it{ std::move(a_it) }, + _b_it{ std::move(b_it) }, + _dst_it{ std::move(dst_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "A=" << *_a_it << ":"; + description << "B=" << *_b_it << ":"; + description << "Out=" << *_dst_it << ":"; + return description.str(); + } + + MatMulDataset::type operator*() const + { + return std::make_tuple(*_a_it, *_b_it, *_dst_it); + } + + iterator &operator++() + { + ++_a_it; + ++_b_it; + ++_dst_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _a_it; + std::vector<TensorShape>::const_iterator _b_it; + std::vector<TensorShape>::const_iterator _dst_it; + }; + + iterator begin() const + { + return iterator(_a_shapes.begin(), _b_shapes.begin(), _dst_shapes.begin()); + } + + int size() const + { + return std::min(_a_shapes.size(), std::min(_b_shapes.size(), _dst_shapes.size())); + } + + void add_config(TensorShape a, TensorShape b, TensorShape dst) + { + _a_shapes.emplace_back(std::move(a)); + _b_shapes.emplace_back(std::move(b)); + _dst_shapes.emplace_back(std::move(dst)); + } + +protected: + MatMulDataset() = default; + MatMulDataset(MatMulDataset &&) = default; + +private: + std::vector<TensorShape> _a_shapes{}; + std::vector<TensorShape> _b_shapes{}; + std::vector<TensorShape> _dst_shapes{}; +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ACL_TESTS_DATASETS_MATMULDATASET */ diff --git a/tests/datasets/MatMulLowpMMULDataset.h b/tests/datasets/MatMulLowpMMULDataset.h new file mode 100644 index 0000000000..1b22e1061f --- /dev/null +++ b/tests/datasets/MatMulLowpMMULDataset.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_DATASETS_MATMULLOWPMMULDATASET_H +#define ACL_TESTS_DATASETS_MATMULLOWPMMULDATASET_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/datasets/MatMulDataset.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +/** MatMulLowp MMUL shapes are similar to MatMul MMUL shapes except that K has to be a + * multiple of MMUL_K0 which is 16 (e.g. see src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.cpp for the definition) + */ +class SmallMatMulLowpMMULDataset final : public MatMulDataset +{ +public: + SmallMatMulLowpMMULDataset() + { + add_config(TensorShape(16U, 4U), TensorShape(4U, 16U), TensorShape(4U, 4U)); // same as mmul block + add_config(TensorShape(96U, 1U), TensorShape(1U, 96U), TensorShape(1U, 1U)); // vector x vector + add_config(TensorShape(32U, 4U, 2U), TensorShape(16U, 32U, 2U), TensorShape(16U, 4U, 2U)); + add_config(TensorShape(48U, 2U), TensorShape(17U, 48U), TensorShape(17U, 2U)); + add_config(TensorShape(32U, 6U), TensorShape(7U, 32U), TensorShape(7U, 6U)); + } +}; + +// This dataset is for smaller number of tests that will still use small shapes +// e.g. not repeating everything for QASYMM8 while we're already testing for QASYMM8_SIGNED +class SmallMatMulLowpMMULDatasetSubset final : public MatMulDataset +{ +public: + SmallMatMulLowpMMULDatasetSubset() + { + add_config(TensorShape(32U, 4U, 2U), TensorShape(16U, 32U, 2U), TensorShape(16U, 4U, 2U)); + add_config(TensorShape(32U, 6U), TensorShape(7U, 32U), TensorShape(7U, 6U)); + } +}; + +class SmallMatMulLowpMMULWithBiasDataset final : public MatMulDataset +{ +public: + SmallMatMulLowpMMULWithBiasDataset() + { + add_config(TensorShape(32U, 4U, 2U, 2U), TensorShape(16U, 32U, 2U, 2U), TensorShape(16U, 4U, 2U, 2U)); + } +}; + +class LargeMatMulLowpMMULDataset final : public MatMulDataset +{ +public: + LargeMatMulLowpMMULDataset() + { + add_config(TensorShape(192U, 38U, 3U, 2U), TensorShape(21U, 192U, 3U, 2U), TensorShape(21U, 38U, 3U, 2U)); + } +}; + +class HighDimensionalMatMulLowpMMULDataset final : public MatMulDataset +{ +public: + HighDimensionalMatMulLowpMMULDataset() + { + add_config(TensorShape(16U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 16U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor + } +}; + +} // namespace datasets +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_DATASETS_MATMULLOWPMMULDATASET_H diff --git a/tests/datasets/Pooling3dLayerDataset.h b/tests/datasets/Pooling3dLayerDataset.h new file mode 100644 index 0000000000..cfe970e8be --- /dev/null +++ b/tests/datasets/Pooling3dLayerDataset.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_POOLING_3D_LAYER_DATASET +#define ARM_COMPUTE_TEST_POOLING_3D_LAYER_DATASET + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class Pooling3dLayerDataset +{ +public: + using type = std::tuple<TensorShape, Pooling3dLayerInfo>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator src_it, + std::vector<Pooling3dLayerInfo>::const_iterator infos_it) + : _src_it{ std::move(src_it) }, + _infos_it{ std::move(infos_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "In=" << *_src_it << ":"; + description << "Info=" << *_infos_it << ":"; + return description.str(); + } + + Pooling3dLayerDataset::type operator*() const + { + return std::make_tuple(*_src_it, *_infos_it); + } + + iterator &operator++() + { + ++_src_it; + ++_infos_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _src_it; + std::vector<Pooling3dLayerInfo>::const_iterator _infos_it; + }; + + iterator begin() const + { + return iterator(_src_shapes.begin(), _infos.begin()); + } + + int size() const + { + return std::min(_src_shapes.size(), _infos.size()); + } + + void add_config(TensorShape src, Pooling3dLayerInfo info) + { + _src_shapes.emplace_back(std::move(src)); + _infos.emplace_back(std::move(info)); + } + +protected: + Pooling3dLayerDataset() = default; + Pooling3dLayerDataset(Pooling3dLayerDataset &&) = default; + +private: + std::vector<TensorShape> _src_shapes{}; + std::vector<Pooling3dLayerInfo> _infos{}; +}; + +// Special pooling dataset +class Pooling3dLayerDatasetSpecial final : public Pooling3dLayerDataset +{ +public: + Pooling3dLayerDatasetSpecial() + { + // Special cases + add_config(TensorShape(2U, 3U, 4U, 2U, 4U), Pooling3dLayerInfo(PoolingType::AVG, /*pool size*/ Size3D(2, 2, 1), /*pool strides*/ Size3D(3, 3, 1), /*pool padding*/ Padding3D(0, 0, 0), true)); + add_config(TensorShape(20U, 22U, 10U, 2U), Pooling3dLayerInfo(PoolingType::AVG, Size3D(100, 100, 100), Size3D(5, 5, 5), Padding3D(50, 50, 50), true)); + add_config(TensorShape(10U, 20U, 32U, 3U, 2U), Pooling3dLayerInfo(PoolingType::MAX, /*pool size*/ 3, /*pool strides*/ Size3D(2, 2, 2), Padding3D(1, 1, 1, 1, 1, 1), false, false, + DimensionRoundingType::FLOOR)); + add_config(TensorShape(14U, 10U, 10U, 3U, 5U), Pooling3dLayerInfo(PoolingType::AVG, Size3D(3, 3, 3), /*pool strides*/ Size3D(3, 3, 3), Padding3D(2, 1, 2), true, false, DimensionRoundingType::CEIL)); + add_config(TensorShape(14U, 10U, 10U, 2U, 4U), Pooling3dLayerInfo(PoolingType::AVG, Size3D(3, 3, 3), /*pool strides*/ Size3D(3, 3, 3), Padding3D(2, 1, 2), false, false, DimensionRoundingType::CEIL)); + add_config(TensorShape(15U, 13U, 13U, 3U, 5U), Pooling3dLayerInfo(PoolingType::AVG, Size3D(4, 4, 4), /*pool strides*/ Size3D(2, 2, 2), Padding3D(2, 2, 2), true, false, DimensionRoundingType::CEIL)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_POOLING_3D_LAYER_DATASET */ diff --git a/tests/datasets/PoolingLayerDataset.h b/tests/datasets/PoolingLayerDataset.h index 01b2491eb2..1557240fd2 100644 --- a/tests/datasets/PoolingLayerDataset.h +++ b/tests/datasets/PoolingLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -106,7 +106,7 @@ public: PoolingLayerDatasetSpecial() { // Special cases - add_config(TensorShape(2U, 3U, 4U, 1U), PoolingLayerInfo(PoolingType::AVG, Size2D(3, 3), DataLayout::NCHW, PadStrideInfo(3, 3, 0, 0), true)); + add_config(TensorShape(2U, 3U, 4U, 1U), PoolingLayerInfo(PoolingType::AVG, Size2D(2, 2), DataLayout::NCHW, PadStrideInfo(3, 3, 0, 0), true)); add_config(TensorShape(60U, 52U, 3U, 2U), PoolingLayerInfo(PoolingType::AVG, Size2D(100, 100), DataLayout::NCHW, PadStrideInfo(5, 5, 50, 50), true)); // Asymmetric padding add_config(TensorShape(112U, 112U, 32U), PoolingLayerInfo(PoolingType::MAX, 3, DataLayout::NCHW, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))); diff --git a/tests/datasets/RandomBatchNormalizationLayerDataset.h b/tests/datasets/RandomBatchNormalizationLayerDataset.h index 5a49dd702b..4ccb2eaaba 100644 --- a/tests/datasets/RandomBatchNormalizationLayerDataset.h +++ b/tests/datasets/RandomBatchNormalizationLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,9 +42,9 @@ class SmallRandomBatchNormalizationLayerDataset final : public BatchNormalizatio public: SmallRandomBatchNormalizationLayerDataset() { - add_config(TensorShape(15U, 16U, 2U, 12U), TensorShape(2U), 0.1f); + add_config(TensorShape(1U, 16U, 2U, 12U), TensorShape(2U), 0.1f); add_config(TensorShape(21U, 11U, 12U, 7U), TensorShape(12U), 0.1f); - add_config(TensorShape(7U, 3U, 6U, 11U), TensorShape(6U), 0.1f); + add_config(TensorShape(32U, 3U, 6U, 11U), TensorShape(6U), 0.1f); } }; class LargeRandomBatchNormalizationLayerDataset final : public BatchNormalizationLayerDataset diff --git a/tests/datasets/ReorderLayerDataset.h b/tests/datasets/ReorderLayerDataset.h new file mode 100644 index 0000000000..8e1a8422b2 --- /dev/null +++ b/tests/datasets/ReorderLayerDataset.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_DATASETS_REORDERLAYERDATASET +#define ACL_TESTS_DATASETS_REORDERLAYERDATASET + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +/** [ReorderLayer datasets] **/ +class ReorderLayerDataset +{ +public: + using type = std::tuple<TensorShape, TensorShape, WeightFormat, WeightFormat>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator in_it, + std::vector<TensorShape>::const_iterator out_it, + std::vector<WeightFormat>::const_iterator _wf_in_it, + std::vector<WeightFormat>::const_iterator _wf_out_it) + : _in_it{ std::move(in_it) }, + _out_it{ std::move(out_it) }, + _wf_in_it{ std::move(_wf_in_it) }, + _wf_out_it{ std::move(_wf_out_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "In=" << *_in_it << ":"; + description << "Out=" << *_out_it << ":"; + description << "Wf_In=" << *_wf_in_it << ":"; + description << "Wf_Out=" << *_wf_out_it; + return description.str(); + } + + ReorderLayerDataset::type operator*() const + { + return std::make_tuple(*_in_it, *_out_it, *_wf_in_it, *_wf_out_it); + } + + iterator &operator++() + { + ++_in_it; + ++_out_it; + ++_wf_in_it; + ++_wf_out_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _in_it; + std::vector<TensorShape>::const_iterator _out_it; + std::vector<WeightFormat>::const_iterator _wf_in_it; + std::vector<WeightFormat>::const_iterator _wf_out_it; + }; + + iterator begin() const + { + return iterator(_in_shapes.begin(), _out_shapes.begin(), _in_wfs.begin(), _out_wfs.begin()); + } + + int size() const + { + return std::min(_in_shapes.size(), std::min(_out_shapes.size(), std::min(_in_wfs.size(), _out_wfs.size()))); + } + + void add_config(TensorShape in, TensorShape out, WeightFormat in_wf, WeightFormat out_wf) + { + _in_shapes.emplace_back(std::move(in)); + _out_shapes.emplace_back(std::move(out)); + _in_wfs.emplace_back(std::move(in_wf)); + _out_wfs.emplace_back(std::move(out_wf)); + } + + // protected: + ReorderLayerDataset() = default; + ReorderLayerDataset(ReorderLayerDataset &&) = default; + + private: + std::vector<TensorShape> _in_shapes{}; + std::vector<TensorShape> _out_shapes{}; + std::vector<WeightFormat> _in_wfs{}; + std::vector<WeightFormat> _out_wfs{}; +}; + +/** [ReorderLayer datasets] **/ + +class ReorderLayerDatasetBlock4 final : public ReorderLayerDataset +{ + public: + ReorderLayerDatasetBlock4() + { + add_config(TensorShape(10U, 9U), TensorShape(10U, 12U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(16U, 16U), TensorShape(16U, 16U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(10U, 511U), TensorShape(10U, 512U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(234U, 301U), TensorShape(234U, 304U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(1024U, 1024U), TensorShape(1024U, 1024U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(10U, 9U, 1U, 1U), TensorShape(10U, 12U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(16U, 16U, 1U, 1U), TensorShape(16U, 16U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(10U, 511U, 1U, 1U), TensorShape(10U, 512U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(234U, 301U, 1U, 1U), TensorShape(234U, 304U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4); + add_config(TensorShape(1024U, 1024U, 1U, 1U), TensorShape(1024U, 1024U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4); + } +}; + +class ReorderLayerDatasetBlock8 final : public ReorderLayerDataset +{ + public: + ReorderLayerDatasetBlock8() + { + add_config(TensorShape(10U, 9U), TensorShape(10U, 16U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(16U, 16U), TensorShape(16U, 16U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(10U, 511U), TensorShape(10U, 512U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(234U, 301U), TensorShape(234U, 304U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(1024U, 1024U), TensorShape(1024U, 1024U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(10U, 9U, 1U, 1U), TensorShape(10U, 16U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(16U, 16U, 1U, 1U), TensorShape(16U, 16U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(10U, 511U, 1U, 1U), TensorShape(10U, 512U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(234U, 301U, 1U, 1U), TensorShape(234U, 304U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8); + add_config(TensorShape(1024U, 1024U, 1U, 1U), TensorShape(1024U, 1024U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8); + } +}; + +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ACL_TESTS_DATASETS_REORDERLAYERDATASET */ diff --git a/tests/datasets/ReshapeLayerDataset.h b/tests/datasets/ReshapeLayerDataset.h index d1a1667683..015f9157aa 100644 --- a/tests/datasets/ReshapeLayerDataset.h +++ b/tests/datasets/ReshapeLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_RESHAPE_LAYER_DATASET -#define ARM_COMPUTE_TEST_RESHAPE_LAYER_DATASET +#ifndef ACL_TESTS_DATASETS_RESHAPELAYERDATASET_H +#define ACL_TESTS_DATASETS_RESHAPELAYERDATASET_H #include "utils/TypePrinter.h" @@ -111,9 +111,10 @@ public: add_config(TensorShape(17U, 3U, 12U), TensorShape(1U, 1U, 612U)); add_config(TensorShape(26U, 26U, 32U), TensorShape(13U, 13U, 128U)); add_config(TensorShape(31U, 23U, 4U, 7U), TensorShape(2U, 14U, 713U)); + add_config(TensorShape(8U, 8U, 8U), TensorShape(8U, 64U)); } }; } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_RESHAPE_LAYER_DATASET */ +#endif // ACL_TESTS_DATASETS_RESHAPELAYERDATASET_H diff --git a/tests/datasets/ScaleValidationDataset.h b/tests/datasets/ScaleValidationDataset.h index 25112f155f..8987c3a1c1 100644 --- a/tests/datasets/ScaleValidationDataset.h +++ b/tests/datasets/ScaleValidationDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET -#define ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET +#ifndef TESTS_DATASETS_SCALEVALIDATIONDATASET +#define TESTS_DATASETS_SCALEVALIDATIONDATASET -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/InterpolationPolicyDataset.h" #include "tests/datasets/SamplingPolicyDataset.h" #include "tests/datasets/ShapeDatasets.h" @@ -140,18 +136,16 @@ const auto ScaleAlignCornersSamplingPolicySet = combine(framework::dataset::make }), framework::dataset::make("AlignCorners", { true })); -/** Generated shapes: Used by Neon precommit and nightly +/** Generated shapes: used by precommit and nightly for CPU tests * - 2D shapes with 0, 1, 2 vector iterations * - 3D shapes with 0, 1 vector iterations * - 4D shapes with 0 vector iterations */ -#define SCALE_SHAPE_DATASET(element_per_iteration) \ - concat(concat(concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(), \ - ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()), \ - ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()), \ - ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 0>()), \ - ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 1>()), \ - ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>()) +#define SCALE_SHAPE_DATASET(element_per_iteration) \ + concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(), \ + ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()), \ + ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 1>()), \ + ScaleShapesBaseDataSet<40, 3, (element_per_iteration), 0>()) // To prevent long precommit time for OpenCL, shape set for OpenCL is separated into below two parts. /** Generated shapes for precommits to achieve essential coverage. Used by CL precommit and nightly @@ -166,20 +160,36 @@ framework::dataset::make("AlignCorners", { true })); * - 3D shapes with 0 vector iterations (1 vector iteration is covered by SCALE_PRECOMMIT_SHAPE_DATASET) * - 4D shapes with 0 vector iterations */ -#define SCALE_NIGHTLY_SHAPE_DATASET(element_per_iteration) \ - concat(concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(), \ - ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()), \ - ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()), \ - ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 0>()), \ +#define SCALE_NIGHTLY_SHAPE_DATASET(element_per_iteration) \ + concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(), \ + ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()), \ + ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 0>()), \ ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>()) -/** Generating dataset for non-quantized data tyeps with the given shapes */ +/** Generating dataset for non-quantized data types with the given shapes */ #define ASSEMBLE_DATASET(shape, samping_policy_set) \ combine(combine(combine(combine((shape), ScaleDataLayouts), \ ScaleInterpolationPolicySet), \ datasets::BorderModes()), \ samping_policy_set) +#define ASSEMBLE_DATASET_DYNAMIC_FUSION(shape, samping_policy_set) \ + combine(combine(combine((shape), framework::dataset::make("DataLayout", { DataLayout::NHWC })), \ + ScaleInterpolationPolicySet), \ + samping_policy_set) + +#define ASSEMBLE_S8_DATASET(shape, samping_policy_set) \ + combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)), \ + framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR })), \ + framework::dataset::make("BorderMode", { BorderMode::REPLICATE })), \ + samping_policy_set) + +#define ASSEMBLE_NHWC_DATASET(shape, samping_policy_set) \ + combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)), \ + ScaleInterpolationPolicySet), \ + framework::dataset::make("BorderMode", { BorderMode::CONSTANT, BorderMode::REPLICATE })), \ + samping_policy_set) + /** Generating dataset for quantized data tyeps with the given shapes */ #define ASSEMBLE_QUANTIZED_DATASET(shape, sampling_policy_set, quantization_info_set) \ combine(combine(combine(combine(combine(shape, \ @@ -189,7 +199,24 @@ framework::dataset::make("AlignCorners", { true })); datasets::BorderModes()), \ sampling_policy_set) +#define ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(shape, sampling_policy_set, quantization_info_set) \ + combine(combine(combine(combine(shape, \ + quantization_info_set), \ + framework::dataset::make("DataLayout", { DataLayout::NHWC })), \ + ScaleInterpolationPolicySet), \ + sampling_policy_set) + +/** Generating dataset for quantized data tyeps with the given shapes */ +#define ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(shape, sampling_policy_set, input_quant_info_set, output_quant_info_set) \ + combine(combine(combine(combine(combine(combine(shape, \ + input_quant_info_set), \ + output_quant_info_set), \ + framework::dataset::make("DataLayout", { DataLayout::NHWC })), \ + framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR })), \ + framework::dataset::make("BorderMode", { BorderMode::REPLICATE })), \ + sampling_policy_set) + } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET */ +#endif /* TESTS_DATASETS_SCALEVALIDATIONDATASET */ diff --git a/tests/datasets/ScatterDataset.h b/tests/datasets/ScatterDataset.h new file mode 100644 index 0000000000..8fd4448d2d --- /dev/null +++ b/tests/datasets/ScatterDataset.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_DATASETS_SCATTERDATASET_H +#define ACL_TESTS_DATASETS_SCATTERDATASET_H + +#include "arm_compute/core/TensorShape.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ + +class ScatterDataset +{ +public: + using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator src_it, + std::vector<TensorShape>::const_iterator updates_it, + std::vector<TensorShape>::const_iterator indices_it, + std::vector<TensorShape>::const_iterator dst_it) + : _src_it{ std::move(src_it) }, + _updates_it{ std::move(updates_it) }, + _indices_it{std::move(indices_it)}, + _dst_it{ std::move(dst_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "A=" << *_src_it << ":"; + description << "B=" << *_updates_it << ":"; + description << "C=" << *_indices_it << ":"; + description << "Out=" << *_dst_it << ":"; + return description.str(); + } + + ScatterDataset::type operator*() const + { + return std::make_tuple(*_src_it, *_updates_it, *_indices_it, *_dst_it); + } + + iterator &operator++() + { + ++_src_it; + ++_updates_it; + ++_indices_it; + ++_dst_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _src_it; + std::vector<TensorShape>::const_iterator _updates_it; + std::vector<TensorShape>::const_iterator _indices_it; + std::vector<TensorShape>::const_iterator _dst_it; + }; + + iterator begin() const + { + return iterator(_src_shapes.begin(), _update_shapes.begin(), _indices_shapes.begin(), _dst_shapes.begin()); + } + + int size() const + { + return std::min(_src_shapes.size(), std::min(_indices_shapes.size(), std::min(_update_shapes.size(), _dst_shapes.size()))); + } + + void add_config(TensorShape a, TensorShape b, TensorShape c, TensorShape dst) + { + _src_shapes.emplace_back(std::move(a)); + _update_shapes.emplace_back(std::move(b)); + _indices_shapes.emplace_back(std::move(c)); + _dst_shapes.emplace_back(std::move(dst)); + } + +protected: + ScatterDataset() = default; + ScatterDataset(ScatterDataset &&) = default; + +private: + std::vector<TensorShape> _src_shapes{}; + std::vector<TensorShape> _update_shapes{}; + std::vector<TensorShape> _indices_shapes{}; + std::vector<TensorShape> _dst_shapes{}; +}; + + +// 1D dataset for simple scatter tests. +class Small1DScatterDataset final : public ScatterDataset +{ +public: + Small1DScatterDataset() + { + add_config(TensorShape(6U), TensorShape(6U), TensorShape(1U, 6U), TensorShape(6U)); + add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U)); + } +}; + +// This dataset represents the (m+1)-D updates/dst case. +class SmallScatterMultiDimDataset final : public ScatterDataset +{ +public: + SmallScatterMultiDimDataset() + { + // NOTE: Config is src, updates, indices, output. + // - In this config, the dim replaced is the final number (largest tensor dimension) + // - Largest "updates" dim should match y-dim of indices. + // - src/updates/dst should all have same number of dims. Indices should be 2D. + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U), TensorShape(1U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U)); + add_config(TensorShape(17U, 3U, 2U, 4U), TensorShape(17U, 3U, 2U, 7U), TensorShape(1U, 7U), TensorShape(17U, 3U, 2U, 4U)); + } +}; + +// This dataset represents the (m+1)-D updates tensor, (m+n)-d output tensor cases +class SmallScatterMultiIndicesDataset final : public ScatterDataset +{ +public: + SmallScatterMultiIndicesDataset() + { + // NOTE: Config is src, updates, indices, output. + // NOTE: indices.shape.x = src.num_dimensions - updates.num_dimensions + 1 + + // index length is 2 + add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 4U), TensorShape(2U, 4U), TensorShape(6U, 5U, 2U)); + add_config(TensorShape(17U, 3U, 3U, 2U), TensorShape(17U, 3U, 2U), TensorShape(2U, 2U), TensorShape(17U, 3U, 3U, 2U)); + add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U)); + add_config(TensorShape(5U, 4U, 3U, 3U, 2U, 4U), TensorShape(5U, 4U, 3U, 3U, 5U), TensorShape(2U, 5U), TensorShape(5U, 4U, 3U, 3U, 2U, 4U)); + + // index length is 3 + add_config(TensorShape(4U, 3U, 2U, 2U), TensorShape(4U, 2U), TensorShape(3U, 2U), TensorShape(4U, 3U, 2U, 2U)); + add_config(TensorShape(17U, 4U, 3U, 2U, 2U), TensorShape(17U, 4U, 4U), TensorShape(3U, 4U), TensorShape(17U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 5U, 3U), TensorShape(3U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + + // index length is 4 + add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 3U), TensorShape(4U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + + // index length is 5 + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 3U), TensorShape(5U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + } +}; + +// This dataset represents the (m+k)-D updates tensor, (k+1)-d indices tensor and (m+n)-d output tensor cases +class SmallScatterBatchedDataset final : public ScatterDataset +{ +public: + SmallScatterBatchedDataset() + { + // NOTE: Config is src, updates, indices, output. + // NOTE: Updates/Indices tensors are now batched. + // NOTE: indices.shape.x = (updates_batched) ? (src.num_dimensions - updates.num_dimensions) + 2 : (src.num_dimensions - updates.num_dimensions) + 1 + // k is the number of batch dimensions + // k = 2 + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U), TensorShape(1U, 2U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 5U, 6U, 2U), TensorShape(3U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + + // k = 3 + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U, 2U), TensorShape(1U, 2U, 2U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 5U, 3U, 6U, 2U), TensorShape(3U, 3U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + + // k = 4 + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 6U, 2U, 3U, 2U), TensorShape(4U, 6U, 2U, 3U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + + // k = 5 + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 3U, 4U, 3U, 2U, 2U), TensorShape(4U, 3U, 4U, 3U, 2U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + } +}; + +class SmallScatterScalarDataset final : public ScatterDataset +{ +public: + // batched scalar case + SmallScatterScalarDataset() + { + add_config(TensorShape(6U, 5U), TensorShape(6U), TensorShape(2U, 6U), TensorShape(6U, 5U)); + add_config(TensorShape(6U, 5U), TensorShape(6U, 6U), TensorShape(2U, 6U, 6U), TensorShape(6U, 5U)); + add_config(TensorShape(3U, 3U, 6U, 5U), TensorShape(6U, 6U), TensorShape(4U, 6U, 6U), TensorShape(3U, 3U, 6U, 5U)); + } +}; + +// This dataset is for data types that does not require full testing. It contains selected tests from the above. +class SmallScatterMixedDataset final : public ScatterDataset +{ +public: + SmallScatterMixedDataset() + { + add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U)); + add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U)); + add_config(TensorShape(6U, 5U), TensorShape(6U, 6U), TensorShape(2U, 6U, 6U), TensorShape(6U, 5U)); + add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U)); + add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 2U, 2U), TensorShape(2U, 2U, 2U), TensorShape(6U, 5U, 2U)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_DATASETS_SCATTERDATASET_H diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index a7f1a44286..c1e61444a8 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -88,9 +88,9 @@ public: Small2DShapes() : ShapeDataset("Shape", { - TensorShape{ 7U, 7U }, - TensorShape{ 27U, 13U }, - TensorShape{ 128U, 64U } + TensorShape{ 1U, 7U }, + TensorShape{ 5U, 13U }, + TensorShape{ 32U, 64U } }) { } @@ -135,7 +135,7 @@ public: Tiny4DShapes() : ShapeDataset("Shape", { - TensorShape{ 7U, 7U, 5U, 3U }, + TensorShape{ 2U, 7U, 5U, 3U }, TensorShape{ 17U, 13U, 7U, 2U }, }) { @@ -165,12 +165,32 @@ public: : ShapeDataset("Shape", { // Batch size 1 - TensorShape{ 9U, 9U }, + TensorShape{ 1U, 9U }, TensorShape{ 27U, 13U, 2U }, }) { } }; +/** Data set containing small tensor shapes with none of the dimensions equal to 1 (unit). */ +class SmallNoneUnitShapes final : public ShapeDataset +{ +public: + SmallNoneUnitShapes() + : ShapeDataset("Shape", + { + // Batch size 1 + TensorShape{ 13U, 11U }, + TensorShape{ 16U, 16U }, + TensorShape{ 24U, 26U, 5U }, + TensorShape{ 7U, 7U, 17U, 2U }, + // Batch size 4 + TensorShape{ 27U, 13U, 2U, 4U }, + // Arbitrary batch size + TensorShape{ 8U, 7U, 5U, 5U } + }) + { + } +}; /** Data set containing small tensor shapes. */ class SmallShapes final : public ShapeDataset { @@ -179,12 +199,12 @@ public: : ShapeDataset("Shape", { // Batch size 1 - TensorShape{ 11U, 11U }, - TensorShape{ 16U, 16U }, + TensorShape{ 3U, 11U }, + TensorShape{ 1U, 16U }, TensorShape{ 27U, 13U, 7U }, TensorShape{ 7U, 7U, 17U, 2U }, - // Batch size 4 - TensorShape{ 27U, 13U, 2U, 4U }, + // Batch size 4 and 2 SIMD iterations + TensorShape{ 33U, 13U, 2U, 4U }, // Arbitrary batch size TensorShape{ 11U, 11U, 3U, 5U } }) @@ -192,6 +212,25 @@ public: } }; +/** Data set containing small tensor shapes. */ +class SmallShapesNoBatches final : public ShapeDataset +{ +public: + SmallShapesNoBatches() + : ShapeDataset("Shape", + { + // Batch size 1 + TensorShape{ 3U, 11U }, + TensorShape{ 1U, 16U }, + TensorShape{ 27U, 13U, 7U }, + TensorShape{ 7U, 7U, 17U }, + TensorShape{ 33U, 13U, 2U }, + TensorShape{ 11U, 11U, 3U } + }) + { + } +}; + /** Data set containing pairs of tiny tensor shapes that are broadcast compatible. */ class TinyShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> { @@ -211,6 +250,25 @@ public: { } }; +/** Data set containing pairs of tiny tensor shapes that are broadcast compatible and can do in_place calculation. */ +class TinyShapesBroadcastInplace final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> +{ +public: + TinyShapesBroadcastInplace() + : ZipDataset<ShapeDataset, ShapeDataset>( + ShapeDataset("Shape0", + { + TensorShape{ 9U }, + TensorShape{ 10U, 2U, 14U, 2U }, + }), + ShapeDataset("Shape1", + { + TensorShape{ 9U, 1U, 9U }, + TensorShape{ 10U }, + })) + { + } +}; /** Data set containing pairs of small tensor shapes that are broadcast compatible. */ class SmallShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> { @@ -243,6 +301,52 @@ public: } }; +class TemporaryLimitedSmallShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> +{ +public: + TemporaryLimitedSmallShapesBroadcast() + : ZipDataset<ShapeDataset, ShapeDataset>( + ShapeDataset("Shape0", + { + TensorShape{ 1U, 3U, 4U, 2U }, // LHS broadcast X + TensorShape{ 6U, 4U, 2U, 3U }, // RHS broadcast X + TensorShape{ 7U, 1U, 1U, 4U }, // LHS broadcast Y, Z + TensorShape{ 8U, 5U, 6U, 3U }, // RHS broadcast Y, Z + TensorShape{ 1U, 1U, 1U, 2U }, // LHS broadcast X, Y, Z + TensorShape{ 2U, 6U, 4U, 3U }, // RHS broadcast X, Y, Z + }), + ShapeDataset("Shape1", + { + TensorShape{ 5U, 3U, 4U, 2U }, + TensorShape{ 1U, 4U, 2U, 3U }, + TensorShape{ 7U, 2U, 3U, 4U }, + TensorShape{ 8U, 1U, 1U, 3U }, + TensorShape{ 4U, 7U, 3U, 2U }, + TensorShape{ 1U, 1U, 1U, 3U }, + })) + { + } +}; + +class TemporaryLimitedLargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> +{ +public: + TemporaryLimitedLargeShapesBroadcast() + : ZipDataset<ShapeDataset, ShapeDataset>( + ShapeDataset("Shape0", + { + TensorShape{ 127U, 25U, 5U }, + TensorShape{ 485, 40U, 10U } + }), + ShapeDataset("Shape1", + { + TensorShape{ 1U, 1U, 1U }, // Broadcast in X, Y, Z + TensorShape{ 485U, 1U, 1U }, // Broadcast in Y, Z + })) + { + } +}; + /** Data set containing medium tensor shapes. */ class MediumShapes final : public ShapeDataset { @@ -320,6 +424,19 @@ public: } }; +/** Data set containing large tensor shapes. */ +class LargeShapesNoBatches final : public ShapeDataset +{ +public: + LargeShapesNoBatches() + : ShapeDataset("Shape", + { + TensorShape{ 582U, 131U, 2U }, + }) + { + } +}; + /** Data set containing pairs of large tensor shapes that are broadcast compatible. */ class LargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset> { @@ -501,6 +618,21 @@ public: } }; +/** Data set containing small 5D tensor shapes. */ +class Small5dShapes final : public ShapeDataset +{ +public: + Small5dShapes() + : ShapeDataset("Shape", + { + TensorShape{ 5U, 5U, 7U, 4U, 3U }, + TensorShape{ 5U, 5U, 4U, 13U, 2U }, + TensorShape{ 5U, 5U, 3U, 5U, 2U }, + }) + { + } +}; + /** Data set containing large 5x5 tensor shapes. */ class Large5x5Shapes final : public ShapeDataset { @@ -514,6 +646,19 @@ public: } }; +/** Data set containing large 5D tensor shapes. */ +class Large5dShapes final : public ShapeDataset +{ +public: + Large5dShapes() + : ShapeDataset("Shape", + { + TensorShape{ 30U, 40U, 30U, 32U, 3U } + }) + { + } +}; + /** Data set containing small 5x1 tensor shapes. */ class Small5x1Shapes final : public ShapeDataset { @@ -651,6 +796,7 @@ public: SmallDeconvolutionShapes() : ShapeDataset("InputShape", { + // Multiple Vector Loops for FP32 TensorShape{ 5U, 4U, 3U, 2U }, TensorShape{ 5U, 5U, 3U }, TensorShape{ 11U, 13U, 4U, 3U } @@ -659,6 +805,19 @@ public: } }; +class SmallDeconvolutionShapesWithLargerChannels final : public ShapeDataset +{ +public: + SmallDeconvolutionShapesWithLargerChannels() + : ShapeDataset("InputShape", + { + // Multiple Vector Loops for all data types + TensorShape{ 5U, 5U, 35U } + }) + { + } +}; + /** Data set containing tiny tensor shapes for direct convolution. */ class TinyDirectConvolutionShapes final : public ShapeDataset { @@ -689,6 +848,23 @@ public: } }; +class SmallDirectConv3DShapes final : public ShapeDataset +{ +public: + SmallDirectConv3DShapes() + : ShapeDataset("InputShape", + { + // Batch size 2 + TensorShape{ 1U, 3U, 4U, 5U, 2U }, + // Batch size 3 + TensorShape{ 7U, 27U, 3U, 6U, 3U }, + // Batch size 1 + TensorShape{ 32U, 37U, 13U, 1U, 1U }, + }) + { + } +}; + /** Data set containing small tensor shapes for direct convolution. */ class SmallDirectConvolutionTensorShiftShapes final : public ShapeDataset { @@ -839,7 +1015,7 @@ public: SoftmaxLayerSmallShapes() : ShapeDataset("Shape", { - TensorShape{ 9U, 9U }, + TensorShape{ 1U, 9U }, TensorShape{ 256U, 10U }, TensorShape{ 353U, 8U }, TensorShape{ 781U, 5U }, diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index 66640dd943..67eade1e64 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -181,7 +181,17 @@ public: } }; -// TODO (COMPMID-1749) +class SmallConvolutionLayerPrePaddingDataset final : public ConvolutionLayerDataset +{ +public: + SmallConvolutionLayerPrePaddingDataset() + { + // output shape is calculated by accounting pre-padding layer as well -- all the data is in nchw + add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(17U, 16U, 19U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(12U, 13U, 16U), PadStrideInfo(3, 2, 2, 0)); + } +}; + class SmallConvolutionLayerReducedDataset final : public ConvolutionLayerDataset { public: diff --git a/tests/datasets/SmallGEMMDataset.h b/tests/datasets/SmallGEMMDataset.h index 7d2b42a0d6..99c7abbf64 100644 --- a/tests/datasets/SmallGEMMDataset.h +++ b/tests/datasets/SmallGEMMDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SMALL_GEMM_DATASET -#define ARM_COMPUTE_TEST_SMALL_GEMM_DATASET +#ifndef ACL_TESTS_DATASETS_SMALLGEMMDATASET_H +#define ACL_TESTS_DATASETS_SMALLGEMMDATASET_H #include "tests/datasets/GEMMDataset.h" @@ -50,6 +50,7 @@ public: add_config(TensorShape(32U, 1U), TensorShape(17U, 32U), TensorShape(17U, 1U), TensorShape(17U, 1U), 0.4f, 0.7f); } }; + class SmallGEMMOutput3DDataset final : public GEMMDataset { public: @@ -77,7 +78,37 @@ public: add_config(TensorShape(16U, 16U, 5U, 3U), TensorShape(8U, 16U), TensorShape(8U), TensorShape(8U, 16U, 5U, 3U), 1.0f, 0.3f); } }; + +class SmallBatchedMatMulDataset final : public GEMMDataset +{ +public: + SmallBatchedMatMulDataset() + { + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U), TensorShape(2U, 3U), 1.0f, 0.0f); + add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U), TensorShape(7U, 15U), 1.0f, 0.0f); + add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U), TensorShape(36U, 17U), 1.0f, 0.0f); + add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U), TensorShape(5U, 4U, 3U), 1.0f, 0.0f); + add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U, 36U), TensorShape(29U), TensorShape(29U, 7U, 36U), 1.0f, 0.0f); + add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U), TensorShape(5U, 17U, 32U), 1.0f, 0.0f); + add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U), TensorShape(19U, 256U, 32U), 1.0f, 0.0f); + // Broadcast in RHS's batch dimension + add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U), TensorShape(29U), TensorShape(29U, 7U, 36U), 1.0f, 0.0f); + add_config(TensorShape(15U, 7U, 36U, 2U), TensorShape(29U, 15U), TensorShape(29U), TensorShape(29U, 7U, 36U, 2U), 1.0f, 0.0f); + } +}; + +class SmallAccumulateGEMMDataset final : public GEMMDataset +{ +public: + SmallAccumulateGEMMDataset() + { + add_config(TensorShape(8U, 2U), TensorShape(16U, 8U), TensorShape(16U, 2U), TensorShape(16U, 2U), 1.0f, 0.0f); + add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U), TensorShape(23U, 1U), 1.0f, 0.0f); + add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), TensorShape(33U, 13U), 1.0f, 0.0f); + } +}; + } // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SMALL_GEMM_DATASET */ +#endif // ACL_TESTS_DATASETS_SMALLGEMMDATASET_H diff --git a/tests/datasets/SmallGEMMLowpDataset.h b/tests/datasets/SmallGEMMLowpDataset.h index 1b6c65307b..929940d2d9 100644 --- a/tests/datasets/SmallGEMMLowpDataset.h +++ b/tests/datasets/SmallGEMMLowpDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,11 +58,10 @@ public: SmallGEMMLowpOutput3DDataset() { add_config(TensorShape(21U, 14U), TensorShape(34U, 21U), TensorShape(34U, 7U, 2U), 0, 0); - add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U, 1U), -2, 13); - add_config(TensorShape(38U, 12U), TensorShape(21U, 38U), TensorShape(21U, 4U, 3U), 0, 4); - add_config(TensorShape(32U, 1U), TensorShape(17U, 32U), TensorShape(17U, 1U, 1U), -2, 1); - add_config(TensorShape(16U, 16U), TensorShape(8U, 16U), TensorShape(8U, 8U, 2U), 5, 9); - add_config(TensorShape(16U, 16U, 5U), TensorShape(8U, 16U, 5U), TensorShape(8U, 8U, 2U, 5U), -7, 2); + add_config(TensorShape(31U, 1U), TensorShape(3U, 31U), TensorShape(3U, 1U, 1U), -2, 13); + add_config(TensorShape(38U, 12U), TensorShape(1U, 38U), TensorShape(1U, 4U, 3U), 0, 4); + add_config(TensorShape(16U, 16U), TensorShape(11U, 16U), TensorShape(11U, 8U, 2U), 2, -1); + add_config(TensorShape(16U, 16U, 5U), TensorShape(13U, 16U, 5U), TensorShape(13U, 8U, 2U, 5U), -3, 2); } }; class SmallGEMMLowpInputOutput3DDataset final : public GEMMLowpDataset @@ -71,13 +70,28 @@ public: SmallGEMMLowpInputOutput3DDataset() { add_config(TensorShape(21U, 14U, 13U), TensorShape(34U, 21U), TensorShape(34U, 14U, 13U), 0, 0); - add_config(TensorShape(31U, 1U, 3U), TensorShape(23U, 31U), TensorShape(23U, 1U, 3U), 0, 0); + add_config(TensorShape(31U, 1U, 3U), TensorShape(1U, 31U), TensorShape(1U, 1U, 3U), 0, 0); add_config(TensorShape(38U, 12U, 2U), TensorShape(21U, 38U), TensorShape(21U, 12U, 2U), -2, 13); - add_config(TensorShape(32U, 1U, 4U, 3U), TensorShape(17U, 32U), TensorShape(17U, 1U, 4U, 3U), 0, 4); - add_config(TensorShape(16U, 16U, 3U, 2U), TensorShape(8U, 16U), TensorShape(8U, 16U, 3U, 2U), -2, 0); + add_config(TensorShape(16U, 16U, 3U, 2U), TensorShape(15U, 16U), TensorShape(15U, 16U, 3U, 2U), -2, 0); add_config(TensorShape(16U, 16U, 5U, 3U), TensorShape(8U, 16U), TensorShape(8U, 16U, 5U, 3U), -9, 1); } }; + +class SmallGEMMLowpBatchedMatMulDataset final : public GEMMLowpDataset +{ +public: + SmallGEMMLowpBatchedMatMulDataset() + { + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0); + add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), 0, 0); + add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13); + add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -2, 0); + add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U, 36U), TensorShape(29U, 7U, 36U), -9, 1); + add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), -3, 2); + add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 13); + } +}; + } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/datasets/SmallMatMulDataset.h b/tests/datasets/SmallMatMulDataset.h new file mode 100644 index 0000000000..bb4cdad54b --- /dev/null +++ b/tests/datasets/SmallMatMulDataset.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_DATASETS_SMALLMATMULDATASET +#define ACL_TESTS_DATASETS_SMALLMATMULDATASET + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/datasets/MatMulDataset.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class SmallMatMulDataset final : public MatMulDataset +{ +public: + SmallMatMulDataset() + { + add_config(TensorShape(3U, 4U, 2U, 2U), TensorShape(2U, 3U, 2U, 2U), TensorShape(2U, 4U, 2U, 2U)); + add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U)); + add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U)); + add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U)); + add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U)); + } +}; + +class SmallerMatMulDataset final : public MatMulDataset +{ +public: + SmallerMatMulDataset() + { + add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U)); + add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U)); + add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U)); + } +}; + +class TinyMatMulDataset final : public MatMulDataset +{ +public: + TinyMatMulDataset() + { + add_config(TensorShape(1U), TensorShape(1U), TensorShape(1U)); + add_config(TensorShape(2U, 2U), TensorShape(2U, 2U), TensorShape(2U, 2U)); + } +}; + +class SmallMatMulDatasetRhsExportToCLImageRhsT final : public MatMulDataset +{ +public: + // Some considerations: + // 1) K dimension should be a multiple of 4 + // See (2), (3), and (4) in SmallMatMulDatasetRhsExportToCLImageRhsNT + SmallMatMulDatasetRhsExportToCLImageRhsT() + { + add_config(TensorShape(8U /*K*/, 3U /*M*/, 2U, 1U, 2U), TensorShape(20U /*N*/, 8U /*K*/, 2U, 1U, 2U), TensorShape(20U /*N*/, 3U /*M*/, 2U, 1U, 2U)); + } +}; + +class SmallMatMulDatasetRhsExportToCLImageRhsNT final : public MatMulDataset +{ +public: + // Some considerations: + // (1) N (Dimension 0 of Rhs matrix) dimension should be a multiple of 4 + // (2) Having N=20 enables us to test all possible N0 values, i.e. 4, 8, 16 + // (3) It's important to have more than one loop iterations in the K dimension + // K has been chosen in accordance with K0 + // (4) The 5-th dimension has been chosen as non-unit because export_to_cl_iamge checks + // were using dim1 * dim2 * dim3 to calculate the CLImage height; however, in our case + // the tensor can be > 4D. To stress that case, the fifth dimension is chosen to be non-unit as well + SmallMatMulDatasetRhsExportToCLImageRhsNT() + { + add_config(TensorShape(7U, 3U, 2U, 1U, 2U), TensorShape(20U, 7U, 2U, 1U, 2U), TensorShape(20U, 3U, 2U, 1U, 2U)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ACL_TESTS_DATASETS_SMALLMATMULDATASET */ diff --git a/tests/ILutAccessor.h b/tests/datasets/SmallMatMulMMULDataset.h index 39fa202d2c..9e517488af 100644 --- a/tests/ILutAccessor.h +++ b/tests/datasets/SmallMatMulMMULDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,52 +21,46 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ILUTACCESSOR_H -#define ARM_COMPUTE_TEST_ILUTACCESSOR_H -#include "arm_compute/core/Coordinates.h" +#ifndef ACL_TESTS_DATASETS_SMALLMATMULMMULDATASET +#define ACL_TESTS_DATASETS_SMALLMATMULMMULDATASET + +#include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "tests/datasets/MatMulDataset.h" namespace arm_compute { namespace test { -/** Common interface to provide information and access to Lut like - * structures. +namespace datasets +{ +/** MatMul MMUL shapes are similar to MatMul shapes except that K has to be a multiple of MMUL_K0 which is 4 (e.g. see src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp for the definition) */ -template <typename T> -class ILutAccessor +class SmallMatMulMMULDataset final : public MatMulDataset { public: - /** Lut value type */ - using value_type = T; - - /** Pure virtual destructor. */ - virtual ~ILutAccessor() = default; - - /** Number of elements of the Lut. - * - * @return the number of elements. - */ - virtual int num_elements() const = 0; - - /** Read access to the specified element. - * - * @param[in] input_value Lut input value, from numericlimits<T>:min to numericlimits<T>:max. - * - * @return Output desired element. - */ - virtual const T &operator[](T input_value) const = 0; + SmallMatMulMMULDataset() + { + add_config(TensorShape(8U, 4U, 2U, 2U), TensorShape(2U, 8U, 2U, 2U), TensorShape(2U, 4U, 2U, 2U)); + add_config(TensorShape(28U, 1U), TensorShape(23U, 28U), TensorShape(23U, 1U)); + add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U)); + add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U)); + add_config(TensorShape(8U, 6U), TensorShape(7U, 8U), TensorShape(7U, 6U)); + } +}; - /** Write access to the specified element. - * - * @param[in] input_value Lut input value, from numericlimits<T>:min to numericlimits<T>:max. - * - * @return Output desired element. - */ - virtual T &operator[](T input_value) = 0; +class TinyMatMulMMULDataset final : public MatMulDataset +{ +public: + TinyMatMulMMULDataset() + { + add_config(TensorShape(4U, 4U), TensorShape(4U, 4U), TensorShape(4U, 4U)); + } }; +} // namespace datasets } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ILUTACCESSOR_H */ + +#endif /* ACL_TESTS_DATASETS_SMALLMATMULMMULDATASET */ diff --git a/tests/datasets/dynamic_fusion/PoolingLayerDataset.h b/tests/datasets/dynamic_fusion/PoolingLayerDataset.h new file mode 100644 index 0000000000..c4911f4940 --- /dev/null +++ b/tests/datasets/dynamic_fusion/PoolingLayerDataset.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "utils/TypePrinter.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" + + +using Pool2dAttributes = arm_compute::experimental::dynamic_fusion::Pool2dAttributes; + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ + +class DynamicFusionPoolingLayerDataset +{ +public: + using type = std::tuple<TensorShape, Pool2dAttributes>; + + struct iterator + { + iterator(std::vector<TensorShape>::const_iterator src_it, + std::vector<Pool2dAttributes>::const_iterator infos_it) + : _src_it{ std::move(src_it) }, + _infos_it{ std::move(infos_it) } + { + } + + std::string description() const + { + std::stringstream description; + description << "In=" << *_src_it << ":"; + description << "Info=" << *_infos_it << ":"; + return description.str(); + } + + DynamicFusionPoolingLayerDataset::type operator*() const + { + return std::make_tuple(*_src_it, *_infos_it); + } + + iterator &operator++() + { + ++_src_it; + ++_infos_it; + + return *this; + } + + private: + std::vector<TensorShape>::const_iterator _src_it; + std::vector<Pool2dAttributes>::const_iterator _infos_it; + }; + + iterator begin() const + { + return iterator(_src_shapes.begin(), _infos.begin()); + } + + int size() const + { + return std::min(_src_shapes.size(), _infos.size()); + } + + void add_config(TensorShape src, Pool2dAttributes info) + { + _src_shapes.emplace_back(std::move(src)); + _infos.emplace_back(std::move(info)); + } + +protected: + DynamicFusionPoolingLayerDataset() = default; + DynamicFusionPoolingLayerDataset(DynamicFusionPoolingLayerDataset &&) = default; + +private: + std::vector<TensorShape> _src_shapes{}; + std::vector<Pool2dAttributes> _infos{}; +}; + +// Special pooling dataset +class PoolingLayerDatasetSpecialDynamicFusion final : public DynamicFusionPoolingLayerDataset +{ +public: + PoolingLayerDatasetSpecialDynamicFusion() + { + // NCHW DataLayout + // Special cases + add_config(TensorShape(2U, 3U, 4U, 1U), Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).stride(Size2D(3,3))); + add_config(TensorShape(60U, 52U, 3U, 2U), Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(100,100)).stride(Size2D(5,5)).pad(Padding2D(50,50,50,50))); + // Asymmetric padding + add_config(TensorShape(112U, 112U, 32U), Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(3,3)).pad(Padding2D(0,1,0,1)).stride(Size2D(2,2))); + add_config(TensorShape(14U, 14U, 832U), Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(2,2)).stride(Size2D(1,1)).pad(Padding2D(0,0,0,0))); + + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/datasets/system_tests/yolo/v2/YOLOV2ActivationLayerDataset.h b/tests/datasets/system_tests/yolo/v2/YOLOV2ActivationLayerDataset.h deleted file mode 100644 index ec6f470dd7..0000000000 --- a/tests/datasets/system_tests/yolo/v2/YOLOV2ActivationLayerDataset.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_YOLOV2_ACTIVATION_LAYER_DATASET -#define ARM_COMPUTE_TEST_YOLOV2_ACTIVATION_LAYER_DATASET - -#include "tests/framework/datasets/Datasets.h" - -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace datasets -{ -class YOLOV2ActivationLayerRELUDataset final : public - framework::dataset::CartesianProductDataset<framework::dataset::InitializerListDataset<TensorShape>, framework::dataset::SingletonDataset<ActivationLayerInfo>> -{ -public: - YOLOV2ActivationLayerRELUDataset() - : CartesianProductDataset - { - framework::dataset::make("Shape", { // relu1 - TensorShape(416U, 416U, 32U), - // relu2 - TensorShape(208U, 208U, 64U), - // relu3, relu5 - TensorShape(104U, 104U, 128U), - // relu4 - TensorShape(104U, 104U, 64U), - // relu6, relu8 - TensorShape(52U, 52U, 256U), - // relu7 - TensorShape(52U, 52U, 128U), - // relu9, relu11, relu13 - TensorShape(26U, 26U, 512U), - // relu10, relu12 - TensorShape(26U, 26U, 256U), - // relu14, relu16, relu18, relu19, relu20, relu21 - TensorShape(13U, 13U, 1024U), - // relu15, relu17 - TensorShape(13U, 13U, 512U) }), - framework::dataset::make("Info", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) - } - { - } - YOLOV2ActivationLayerRELUDataset(YOLOV2ActivationLayerRELUDataset &&) = default; - ~YOLOV2ActivationLayerRELUDataset() = default; -}; - -class YOLOV2ActivationLayerLINEARDataset final : public - framework::dataset::CartesianProductDataset<framework::dataset::InitializerListDataset<TensorShape>, framework::dataset::SingletonDataset<ActivationLayerInfo>> -{ -public: - YOLOV2ActivationLayerLINEARDataset() - : CartesianProductDataset - { - framework::dataset::make("Shape", { // linear22 - TensorShape(15U, 15U, 425U) }), - framework::dataset::make("Info", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR)) - } - { - } - YOLOV2ActivationLayerLINEARDataset(YOLOV2ActivationLayerLINEARDataset &&) = default; - ~YOLOV2ActivationLayerLINEARDataset() = default; -}; - -class YOLOV2ActivationLayerDataset final : public framework::dataset::JoinDataset<YOLOV2ActivationLayerRELUDataset, YOLOV2ActivationLayerLINEARDataset> -{ -public: - YOLOV2ActivationLayerDataset() - : JoinDataset - { - YOLOV2ActivationLayerRELUDataset(), - YOLOV2ActivationLayerLINEARDataset() - } - { - } - YOLOV2ActivationLayerDataset(YOLOV2ActivationLayerDataset &&) = default; - ~YOLOV2ActivationLayerDataset() = default; -}; -} // namespace datasets -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_YOLOV2_ACTIVATION_LAYER_DATASET */ diff --git a/tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h b/tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h deleted file mode 100644 index 9997ffc0c0..0000000000 --- a/tests/datasets/system_tests/yolo/v2/YOLOV2BatchNormalizationLayerDataset.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_YOLOV2_BATCHNORMALIZATION_LAYER_DATASET -#define ARM_COMPUTE_TEST_YOLOV2_BATCHNORMALIZATION_LAYER_DATASET - -#include "tests/datasets/BatchNormalizationLayerDataset.h" - -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace datasets -{ -class YOLOV2BatchNormalizationLayerDataset final : public BatchNormalizationLayerDataset -{ -public: - YOLOV2BatchNormalizationLayerDataset() - { - // conv1_bn - add_config(TensorShape(416U, 416U, 32U), TensorShape(32U), 0.00001f); - // conv2_bn - add_config(TensorShape(208U, 208U, 64U), TensorShape(64U), 0.00001f); - // conv3_bn, conv5_bn - add_config(TensorShape(104U, 104U, 128U), TensorShape(128U), 0.00001f); - // conv4_bn - add_config(TensorShape(104U, 104U, 64U), TensorShape(64U), 0.00001f); - // conv6_bn, conv8_bn - add_config(TensorShape(52U, 52U, 256U), TensorShape(256U), 0.00001f); - // conv7_bn - add_config(TensorShape(52U, 52U, 128U), TensorShape(128U), 0.00001f); - // conv9_bn, conv11_bn, conv13_bn - add_config(TensorShape(26U, 26U, 512U), TensorShape(512U), 0.00001f); - // conv10_bn, conv12_bn - add_config(TensorShape(26U, 26U, 256U), TensorShape(256U), 0.00001f); - // conv14_bn, conv16_bn, conv18_bn, conv19_bn, conv20_bn, conv21_bn - add_config(TensorShape(13U, 13U, 1024U), TensorShape(1024U), 0.00001f); - // conv15_bn, conv17_bn - add_config(TensorShape(13U, 13U, 512U), TensorShape(512U), 0.00001f); - } -}; -} // namespace datasets -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_YOLOV2_BATCHNORMALIZATION_LAYER_DATASET */ diff --git a/tests/datasets/system_tests/yolo/v2/YOLOV2ConvolutionLayerDataset.h b/tests/datasets/system_tests/yolo/v2/YOLOV2ConvolutionLayerDataset.h deleted file mode 100644 index 67d57bf680..0000000000 --- a/tests/datasets/system_tests/yolo/v2/YOLOV2ConvolutionLayerDataset.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_YOLOV2_CONVOLUTION_LAYER_DATASET -#define ARM_COMPUTE_TEST_YOLOV2_CONVOLUTION_LAYER_DATASET - -#include "tests/datasets/ConvolutionLayerDataset.h" - -#include "utils/TypePrinter.h" - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace datasets -{ -class YOLOV2ConvolutionLayerDataset final : public ConvolutionLayerDataset -{ -public: - YOLOV2ConvolutionLayerDataset() - { - // conv1 - add_config(TensorShape(416U, 416U, 3U), TensorShape(3U, 3U, 3U, 32U), TensorShape(32U), TensorShape(416U, 416U, 32U), PadStrideInfo(1, 1, 1, 1)); - // conv2 - add_config(TensorShape(208U, 208U, 32U), TensorShape(3U, 3U, 32U, 64U), TensorShape(64U), TensorShape(208U, 208U, 64U), PadStrideInfo(1, 1, 1, 1)); - // conv3, conv5 - add_config(TensorShape(104U, 104U, 64U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(104U, 104U, 128U), PadStrideInfo(1, 1, 1, 1)); - // conv4 - add_config(TensorShape(104U, 104U, 128U), TensorShape(1U, 1U, 128U, 64U), TensorShape(64U), TensorShape(104U, 104U, 64U), PadStrideInfo(1, 1, 0, 0)); - // conv6, conv8 - add_config(TensorShape(52U, 52U, 128U), TensorShape(3U, 3U, 128U, 256U), TensorShape(256U), TensorShape(52U, 52U, 256U), PadStrideInfo(1, 1, 1, 1)); - // conv7 - add_config(TensorShape(52U, 52U, 256U), TensorShape(1U, 1U, 256U, 128U), TensorShape(128U), TensorShape(52U, 52U, 128U), PadStrideInfo(1, 1, 0, 0)); - // conv9, conv11, conv13 - add_config(TensorShape(26U, 26U, 256U), TensorShape(3U, 3U, 256U, 512U), TensorShape(512U), TensorShape(26U, 26U, 512U), PadStrideInfo(1, 1, 1, 1)); - // conv10, conv12 - add_config(TensorShape(26U, 26U, 512U), TensorShape(1U, 1U, 512U, 256U), TensorShape(256U), TensorShape(26U, 26U, 256U), PadStrideInfo(1, 1, 0, 0)); - // conv14, conv16, conv18 - add_config(TensorShape(13U, 13U, 512U), TensorShape(3U, 3U, 512U, 1024U), TensorShape(1024U), TensorShape(13U, 13U, 1024U), PadStrideInfo(1, 1, 1, 1)); - // conv15, conv17 - add_config(TensorShape(13U, 13U, 1024U), TensorShape(1U, 1U, 1024U, 512U), TensorShape(512U), TensorShape(13U, 13U, 512U), PadStrideInfo(1, 1, 0, 0)); - // conv19, conv20 - add_config(TensorShape(13U, 13U, 1024U), TensorShape(3U, 3U, 1024U, 1024U), TensorShape(1024U), TensorShape(13U, 13U, 1024U), PadStrideInfo(1, 1, 1, 1)); - // conv21 - add_config(TensorShape(13U, 13U, 3072U), TensorShape(3U, 3U, 3072U, 1024U), TensorShape(1024U), TensorShape(13U, 13U, 1024U), PadStrideInfo(1, 1, 1, 1)); - // conv22 - add_config(TensorShape(13U, 13U, 1024U), TensorShape(1U, 1U, 1024U, 425U), TensorShape(425U), TensorShape(13U, 13U, 425U), PadStrideInfo(1, 1, 0, 0)); - } -}; -} // namespace datasets -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_YOLOV2_CONVOLUTION_LAYER_DATASET */ diff --git a/tests/framework/Asserts.h b/tests/framework/Asserts.h index b8a8fe091c..7adfa8f2f3 100644 --- a/tests/framework/Asserts.h +++ b/tests/framework/Asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include <sstream> #include <type_traits> +#include "utils/TypePrinter.h" + namespace arm_compute { namespace test @@ -42,6 +44,11 @@ inline int make_printable(int8_t value) return value; } +inline std::string make_printable(const arm_compute::WeightFormat wf) +{ + return arm_compute::to_string(wf); +} + inline unsigned int make_printable(uint8_t value) { return value; @@ -153,6 +160,7 @@ ARM_COMPUTE_TEST_COMP_FACTORY(ASSERT, Assertion, !=, NOT_EQUAL, throw arm_comput arm_compute::test::framework::Framework::get().clear_test_info(); \ } while(false) +#if defined(ARM_COMPUTE_ASSERTS_ENABLED) #define ARM_COMPUTE_EXPECT_THROW(X, LEVEL) \ do \ { \ @@ -175,6 +183,17 @@ ARM_COMPUTE_TEST_COMP_FACTORY(ASSERT, Assertion, !=, NOT_EQUAL, throw arm_comput } \ arm_compute::test::framework::Framework::get().clear_test_info(); \ } while(false) +#else // defined(ARM_COMPUTE_ASSERTS_ENABLED) +#define ARM_COMPUTE_EXPECT_THROW(X, LEVEL) \ + do \ + { \ + std::stringstream msg; \ + msg << "'" #X "' Skipped: asserts disabled, cannot throw\n"; \ + arm_compute::test::framework::Framework::get().print_test_info(msg); \ + arm_compute::test::framework::Framework::get().log_info(msg.str()); \ + arm_compute::test::framework::Framework::get().clear_test_info(); \ + } while(false) +#endif // defined(ARM_COMPUTE_ASSERTS_ENABLED) #define ARM_COMPUTE_ASSERT_FAIL(MSG) \ do \ diff --git a/tests/framework/BUILD.bazel b/tests/framework/BUILD.bazel new file mode 100644 index 0000000000..17d5a15a11 --- /dev/null +++ b/tests/framework/BUILD.bazel @@ -0,0 +1,63 @@ +# Copyright (c) 2023 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cc_library( + name = "framework", + srcs = glob( + [ + "*.cpp", + "command_line/*.cpp", + "printers/*.cpp", + "datasets/*.cpp", + "instruments/*.cpp", + "instruments/*.hpp", + ], + exclude = [ + "**/*PMU*", + "**/*OpenCL*", + "**/*MaliCounter*", + ], + ), + hdrs = glob([ + "*.h", + "command_line/*.h", + "printers/*.h", + "datasets/*.h", + "instruments/*.h", + ]), + copts = [ + "-Wno-overloaded-virtual", + ] + select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = True, + visibility = ["//visibility:public"], + deps = [ + "//arm_compute:core_headers", + "//arm_compute:graph_headers", + "//arm_compute:runtime_headers", + "//support", + "//utils", + ], +) diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp index 436aac0a34..bfb955c525 100644 --- a/tests/framework/Framework.cpp +++ b/tests/framework/Framework.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -130,10 +130,12 @@ Framework &Framework::get() void Framework::init(const FrameworkConfig &config) { _test_filter.reset(new TestFilter(config.mode, config.name_filter, config.id_filter)); - _num_iterations = config.num_iterations; - _log_level = config.log_level; - _cooldown_sec = config.cooldown_sec; - _configure_only = config.configure_only; + _num_iterations = config.num_iterations; + _log_level = config.log_level; + _cooldown_sec = config.cooldown_sec; + _configure_only = config.configure_only; + _print_rerun_cmd = config.print_rerun_cmd; + _seed = config.seed; _instruments = std::set<framework::InstrumentsDescription>(std::begin(config.instruments), std::end(config.instruments)); } @@ -209,6 +211,7 @@ void Framework::log_test_end(const TestInfo &info) { func_on_all_printers([&](Printer * p) { + p->print_profiler_header(_test_results.at(info).header_data); p->print_measurements(_test_results.at(info).measurements); }); } @@ -291,13 +294,13 @@ bool Framework::error_on_missing_assets() const return _error_on_missing_assets; } -void Framework::run_test(const TestInfo &info, TestCaseFactory &test_factory) +TestResult::Status Framework::run_test(const TestInfo &info, TestCaseFactory &test_factory) { if(test_factory.status() == TestCaseFactory::Status::DISABLED) { log_test_skipped(info); set_test_result(info, TestResult(TestResult::Status::DISABLED)); - return; + return TestResult::Status::DISABLED; } log_test_start(info); @@ -528,14 +531,16 @@ void Framework::run_test(const TestInfo &info, TestCaseFactory &test_factory) { if(_stop_on_error) { - throw std::runtime_error("Abort on first error."); + throw std::runtime_error("Abandon on first error."); } } + result.header_data = profiler.header(); result.measurements = profiler.measurements(); set_test_result(info, result); log_test_end(info); + return result.status; } bool Framework::run() @@ -555,6 +560,7 @@ bool Framework::run() int id = 0; int id_run_test = 0; + ARM_COMPUTE_UNUSED(id_run_test); // Not used if ARM_COMPUTE_CL is not defined for(auto &test_factory : _test_factories) { @@ -578,9 +584,11 @@ bool Framework::run() CLScheduler::get().set_queue(new_queue); } #endif // ARM_COMPUTE_CL - - run_test(test_info, *test_factory); - + TestResult::Status result = run_test(test_info, *test_factory); + if((_print_rerun_cmd) && (result == TestResult::Status::CRASHED || result == TestResult::Status::FAILED)) + { + std::cout << "Rerun command: ./arm_compute_validation --filter='^" << test_info.name << "$' --seed=" << _seed << std::endl; + } ++id_run_test; // Run test delay @@ -630,6 +638,7 @@ void Framework::print_test_results(Printer &printer) const for(const auto &test : _test_results) { printer.print_test_header(test.first); + printer.print_profiler_header(test.second.header_data); printer.print_measurements(test.second.measurements); printer.print_test_footer(); } @@ -679,7 +688,7 @@ std::vector<TestInfo> Framework::test_infos() const for(const auto &factory : _test_factories) { - TestInfo test_info{ id, factory->name(), factory->mode(), factory->status() }; + const TestInfo test_info{ id, factory->name(), factory->mode(), factory->status() }; if(_test_filter->is_selected(test_info)) { diff --git a/tests/framework/Framework.h b/tests/framework/Framework.h index 4c2e86c6ea..2dded30038 100644 --- a/tests/framework/Framework.h +++ b/tests/framework/Framework.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,8 @@ struct FrameworkConfig float cooldown_sec{ -1.f }; /**< Delay between tests in seconds. */ LogLevel log_level{ LogLevel::NONE }; /**< Verbosity of the output. */ bool configure_only{ false }; /**< Only configure kernels */ + bool print_rerun_cmd{ false }; /**< Print the command to rerun the failed testcase */ + unsigned int seed{ 0 }; /**< The seed that is used to fill tensors with random values.*/ }; /** Information about a test case. @@ -121,8 +123,6 @@ public: * registering test cases. * * @param[in] name Name of the added test suite. - * - * @return Name of the current test suite. */ void push_suite(std::string name); @@ -230,13 +230,13 @@ public: /** Indicates if test execution is stopped after the first failed test. * - * @return True if the execution is going to be aborted after the first failed test. + * @return True if the execution is going to be stopped after the first failed test. */ bool stop_on_error() const; - /** Set whether to abort execution after the first failed test. + /** Set whether to stop execution after the first failed test. * - * @param[in] stop_on_error True if execution is going to be aborted after first failed test. + * @param[in] stop_on_error True if execution is going to be stopped after first failed test. */ void set_stop_on_error(bool stop_on_error); @@ -330,7 +330,7 @@ private: Framework(const Framework &) = delete; Framework &operator=(const Framework &) = delete; - void run_test(const TestInfo &info, TestCaseFactory &test_factory); + TestResult::Status run_test(const TestInfo &info, TestCaseFactory &test_factory); std::map<TestResult::Status, int> count_test_results() const; /** Returns the current test suite name. @@ -358,6 +358,8 @@ private: std::vector<Printer *> _printers{}; bool _configure_only{ false }; bool _new_fixture_call{ false }; + bool _print_rerun_cmd{ false }; + unsigned int _seed{ 0 }; using create_function = std::unique_ptr<Instrument>(); std::map<InstrumentsDescription, create_function *> _available_instruments{}; diff --git a/tests/framework/Macros.h b/tests/framework/Macros.h index 23c826657d..5ce0842864 100644 --- a/tests/framework/Macros.h +++ b/tests/framework/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,8 +49,8 @@ #define CONCAT(ARG0, ARG1) ARG0##ARG1 -#define VARIADIC_SIZE_IMPL(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, size, ...) size -#define VARIADIC_SIZE(...) VARIADIC_SIZE_IMPL(__VA_ARGS__, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define VARIADIC_SIZE_IMPL(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, size, ...) size +#define VARIADIC_SIZE(...) VARIADIC_SIZE_IMPL(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #define JOIN_PARAM1(OP, param) OP(0, param) #define JOIN_PARAM2(OP, param, ...) \ @@ -92,6 +92,12 @@ #define JOIN_PARAM14(OP, param, ...) \ OP(13, param) \ , JOIN_PARAM13(OP, __VA_ARGS__) +#define JOIN_PARAM15(OP, param, ...) \ + OP(14, param) \ + , JOIN_PARAM14(OP, __VA_ARGS__) +#define JOIN_PARAM16(OP, param, ...) \ + OP(15, param) \ + , JOIN_PARAM15(OP, __VA_ARGS__) #define JOIN_PARAM(OP, NUM, ...) \ CONCAT(JOIN_PARAM, NUM) \ (OP, __VA_ARGS__) @@ -121,13 +127,13 @@ void do_setup() override \ { \ framework::Framework::get().set_new_fixture_call(false); \ - apply(this, &FIXTURE::setup<As...>, _data); \ + apply(this, &FIXTURE::setup, _data); \ } #define FIXTURE_DATA_SETUP_NEW(FIXTURE) \ void do_setup() override \ { \ framework::Framework::get().set_new_fixture_call(true); \ - apply(this, &FIXTURE::setup<As...>, _data); \ + apply(this, &FIXTURE::setup, _data); \ configure_target(); \ if(!framework::Framework::get().configure_only()) \ { \ @@ -224,6 +230,11 @@ #define DISABLED_FIXTURE_TEST_CASE(TEST_NAME, FIXTURE, MODE) \ FIXTURE_TEST_CASE_IMPL(TEST_NAME, FIXTURE, MODE, arm_compute::test::framework::TestCaseFactory::Status::DISABLED) +#define EMPTY_BODY_FIXTURE_TEST_CASE(TEST_NAME, FIXTURE, MODE) \ + FIXTURE_TEST_CASE(TEST_NAME, FIXTURE, MODE) \ + { \ + } + #define FIXTURE_DATA_TEST_CASE_IMPL(TEST_NAME, FIXTURE, MODE, STATUS, DATASET) \ template <typename T> \ class TEST_NAME; \ @@ -313,4 +324,4 @@ // // TEST CASE MACROS END // -#endif /* ARM_COMPUTE_TEST_FRAMEWORK_MACROS */
\ No newline at end of file +#endif /* ARM_COMPUTE_TEST_FRAMEWORK_MACROS */ diff --git a/tests/framework/Profiler.cpp b/tests/framework/Profiler.cpp index b527eb4f09..a4a9beaa29 100644 --- a/tests/framework/Profiler.cpp +++ b/tests/framework/Profiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,6 +81,8 @@ void Profiler::test_stop() { _measurements[instrument->id() + "/" + measurement.first].push_back(measurement.second); } + + _header_data = instrument->instrument_header(); } } @@ -88,6 +90,11 @@ const Profiler::MeasurementsMap &Profiler::measurements() const { return _measurements; } + +const std::string &Profiler::header() const +{ + return _header_data; +} } // namespace framework } // namespace test } // namespace arm_compute diff --git a/tests/framework/Profiler.h b/tests/framework/Profiler.h index 588276fcd5..7df085c8f1 100644 --- a/tests/framework/Profiler.h +++ b/tests/framework/Profiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -87,9 +87,16 @@ public: */ const MeasurementsMap &measurements() const; + /** Return JSON formatted header data. + * + * @returns JSON formmated string + */ + const std::string &header() const; + private: std::vector<std::unique_ptr<Instrument>> _instruments{}; MeasurementsMap _measurements{}; + std::string _header_data{}; }; } // namespace framework } // namespace test diff --git a/tests/framework/SConscript b/tests/framework/SConscript index e805ac0e2c..450ffd77b0 100644 --- a/tests/framework/SConscript +++ b/tests/framework/SConscript @@ -1,4 +1,7 @@ -# Copyright (c) 2017 Arm Limited. +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2017-2022 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -27,8 +30,8 @@ Import('vars') # vars is imported from arm_compute: variables = [ - BoolVariable("pmu", "Enable PMU counters", False), - BoolVariable("mali", "Enable Mali hardware counters", False), + BoolVariable("pmu", "Enable the PMU cycle counter to measure execution time in benchmark tests. (Your device needs to support it)", False), + BoolVariable("mali", "Enable the collection of Arm® Mali™ hardware counters to measure execution time in benchmark tests. (Your device needs to have a Arm® Mali™ driver that supports it)", False), ] # We need a separate set of Variables for the Help message (Otherwise the global variables will get displayed twice) @@ -67,7 +70,7 @@ if not env['opencl']: files = [f for f in files if "OpenCL" not in os.path.basename(str(f))] if not framework_env['mali']: - # Remove Mali files + # Remove Arm® Mali™ files files = [f for f in files if "MaliCounter" not in os.path.basename(str(f))] else: framework_env.Append(CPPDEFINES = ['MALI_ENABLED']) diff --git a/tests/framework/TestResult.h b/tests/framework/TestResult.h index 10f10c1d86..18e54343dc 100644 --- a/tests/framework/TestResult.h +++ b/tests/framework/TestResult.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -73,6 +73,7 @@ struct TestResult Status status{ Status::NOT_RUN }; /**< Execution status */ Profiler::MeasurementsMap measurements{}; /**< Profiling information */ + std::string header_data{}; /**< Test header data */ }; } // namespace framework } // namespace test diff --git a/tests/framework/command_line/CommonOptions.cpp b/tests/framework/command_line/CommonOptions.cpp index 6fb37470c1..e6f1929bb1 100644 --- a/tests/framework/command_line/CommonOptions.cpp +++ b/tests/framework/command_line/CommonOptions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,9 @@ #include "../Framework.h" #include "../printers/Printers.h" +#if !defined(_WIN64) #include <unistd.h> +#endif // !defined(_WIN64) using namespace arm_compute::utils; @@ -43,7 +45,11 @@ CommonOptions::CommonOptions(CommandLineParser &parser) log_file(parser.add_option<SimpleOption<std::string>>("log-file")), log_level(), throw_errors(parser.add_option<ToggleOption>("throw-errors")), - color_output(parser.add_option<ToggleOption>("color-output", isatty(STDOUT_FILENO))), // Only enable colors by default if we're running in a terminal +#if !defined(_WIN64) + color_output(parser.add_option<ToggleOption>("color-output", isatty(STDOUT_FILENO))), // Only enable colors by default if we're running in a terminal +#else // !defined(_WIN64) + color_output(parser.add_option<ToggleOption>("color-output", 0)), +#endif // !defined(_WIN64) pretty_console(parser.add_option<ToggleOption>("pretty-console", false)), json_file(parser.add_option<SimpleOption<std::string>>("json-file")), pretty_file(parser.add_option<SimpleOption<std::string>>("pretty-file")), diff --git a/tests/framework/datasets/CartesianProductDataset.h b/tests/framework/datasets/CartesianProductDataset.h index 19ac4f6666..7b3ff12047 100644 --- a/tests/framework/datasets/CartesianProductDataset.h +++ b/tests/framework/datasets/CartesianProductDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -186,6 +186,20 @@ CartesianProductDataset<T, U> combine(T &&dataset1, U &&dataset2) * * @param[in] dataset1 First dataset. * @param[in] dataset2 Second dataset. + * @param[in] datasets Subsequent dataset. + * + * @return A grid dataset. + */ +template <typename T1, typename T2, typename... Ts> +auto combine(T1 &&dataset1, T2 &&dataset2, Ts &&... datasets) -> decltype(combine(std::forward<T1>(dataset1), combine(std::forward<T2>(dataset2), std::forward<Ts>(datasets)...))) +{ + return combine(std::forward<T1>(dataset1), combine(std::forward<T2>(dataset2), std::forward<Ts>(datasets)...)); +} + +/** Helper function to create a @ref CartesianProductDataset. + * + * @param[in] dataset1 First dataset. + * @param[in] dataset2 Second dataset. * * @return A grid dataset. */ diff --git a/tests/framework/datasets/ContainerDataset.h b/tests/framework/datasets/ContainerDataset.h index 3987e8f1b8..de77cb24d6 100644 --- a/tests/framework/datasets/ContainerDataset.h +++ b/tests/framework/datasets/ContainerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,6 @@ #include "Dataset.h" #include "support/StringSupport.h" -#include "tests/TypePrinter.h" #include <string> #include <tuple> diff --git a/tests/framework/datasets/ZipDataset.h b/tests/framework/datasets/ZipDataset.h index ce1bb37cab..0b963484c5 100644 --- a/tests/framework/datasets/ZipDataset.h +++ b/tests/framework/datasets/ZipDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -150,6 +150,20 @@ ZipDataset<T, U> zip(T &&dataset1, U &&dataset2) { return ZipDataset<T, U>(std::forward<T>(dataset1), std::forward<U>(dataset2)); } + +/** Helper function to create a @ref ZipDataset. + * + * @param[in] dataset1 First dataset. + * @param[in] dataset2 Second dataset. + * @param[in] datasets Subsequent datasets. + * + * @return A zip dataset. + */ +template <typename T1, typename T2, typename... Ts> +auto zip(T1 &&dataset1, T2 &&dataset2, Ts &&... datasets) -> decltype(zip(std::forward<T1>(dataset1), zip(std::forward<T2>(dataset2), std::forward<Ts>(datasets)...))) +{ + return zip(std::forward<T1>(dataset1), zip(std::forward<T2>(dataset2), std::forward<Ts>(datasets)...)); +} } // namespace dataset } // namespace framework } // namespace test diff --git a/tests/framework/instruments/Instrument.h b/tests/framework/instruments/Instrument.h index 3ea15825ad..1770a492ac 100644 --- a/tests/framework/instruments/Instrument.h +++ b/tests/framework/instruments/Instrument.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -117,6 +117,15 @@ public: return MeasurementsMap(); } + /** Return JSON formatted instrument header string. + * + * @return JSON formatted string + */ + virtual std::string instrument_header() const + { + return std::string{}; + } + /** Return the latest test measurements. * * @return the latest test measurements. diff --git a/tests/framework/instruments/Instruments.h b/tests/framework/instruments/Instruments.h index 8a6cec0e9c..d80032a032 100644 --- a/tests/framework/instruments/Instruments.h +++ b/tests/framework/instruments/Instruments.h @@ -24,12 +24,12 @@ #ifndef ARM_COMPUTE_TEST_INSTRUMENTS #define ARM_COMPUTE_TEST_INSTRUMENTS -#if !defined(BARE_METAL) && !defined(__APPLE__) +#if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) #include "MaliCounter.h" #include "OpenCLMemoryUsage.h" #include "OpenCLTimer.h" #include "PMUCounter.h" -#endif /* !defined(BARE_METAL) && !defined(__APPLE__) */ +#endif /* !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) */ #include "SchedulerTimer.h" #include "WallClockTimer.h" diff --git a/tests/framework/instruments/Measurement.h b/tests/framework/instruments/Measurement.h index af272a9945..2ec68d424b 100644 --- a/tests/framework/instruments/Measurement.h +++ b/tests/framework/instruments/Measurement.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -209,10 +209,10 @@ struct Measurement /** Stored value */ union - { - double floating_point; - long long int integer; - } v; + { + double floating_point; + long long int integer; + } v; bool is_floating_point; /**< Is the stored value floating point or integer ? */ }; diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp index 45eb4c5c60..e9f945bd95 100644 --- a/tests/framework/instruments/OpenCLTimer.cpp +++ b/tests/framework/instruments/OpenCLTimer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,13 @@ std::string OpenCLClock<output_timestamps>::id() const template <bool output_timestamps> OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor) - : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false) + : _kernels(), + _real_function(nullptr), +#ifdef ARM_COMPUTE_GRAPH_ENABLED + _real_graph_function(nullptr), +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ + _prefix(), + _timer_enabled(false) { auto q = CLScheduler::get().queue(); cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>(); @@ -91,19 +97,17 @@ void OpenCLClock<output_timestamps>::test_start() { // Start intercepting enqueues: ARM_COMPUTE_ERROR_ON(_real_function != nullptr); - ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr); - _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr; - _real_graph_function = graph::TaskExecutor::get().execute_function; - auto interceptor = [this]( - cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *gwo, - const size_t *gws, - const size_t *lws, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) + _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr; + auto interceptor = [this]( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t *gwo, + const size_t *gws, + const size_t *lws, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) { if(this->_timer_enabled) { @@ -138,7 +142,11 @@ void OpenCLClock<output_timestamps>::test_start() return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event); } }; + CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; +#ifdef ARM_COMPUTE_GRAPH_ENABLED + ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr); + _real_graph_function = graph::TaskExecutor::get().execute_function; // Start intercepting tasks: auto task_interceptor = [this](graph::ExecutionTask & task) { @@ -153,9 +161,8 @@ void OpenCLClock<output_timestamps>::test_start() this->_real_graph_function(task); this->_prefix = ""; }; - - CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor; graph::TaskExecutor::get().execute_function = task_interceptor; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ } template <bool output_timestamps> @@ -175,9 +182,11 @@ void OpenCLClock<output_timestamps>::test_stop() { // Restore real function CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function; + _real_function = nullptr; +#ifdef ARM_COMPUTE_GRAPH_ENABLED graph::TaskExecutor::get().execute_function = _real_graph_function; _real_graph_function = nullptr; - _real_function = nullptr; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ } template <bool output_timestamps> diff --git a/tests/framework/instruments/OpenCLTimer.h b/tests/framework/instruments/OpenCLTimer.h index 9904035c20..1812272435 100644 --- a/tests/framework/instruments/OpenCLTimer.h +++ b/tests/framework/instruments/OpenCLTimer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -67,9 +67,11 @@ private: }; std::list<kernel_info> _kernels; std::function<decltype(clEnqueueNDRangeKernel)> _real_function; - std::function<decltype(graph::execute_task)> _real_graph_function; - std::string _prefix; - bool _timer_enabled; +#ifdef ARM_COMPUTE_GRAPH_ENABLED + std::function<decltype(graph::execute_task)> _real_graph_function; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ + std::string _prefix; + bool _timer_enabled; #endif /* ARM_COMPUTE_CL */ private: diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp index c31cd42d19..b753485351 100644 --- a/tests/framework/instruments/SchedulerTimer.cpp +++ b/tests/framework/instruments/SchedulerTimer.cpp @@ -26,6 +26,7 @@ #include "Instruments.h" #include "WallClockTimer.h" #include "arm_compute/core/CPP/ICPPKernel.h" +#include "arm_compute/graph/DataLayerVisitor.h" #include "arm_compute/graph/INode.h" #include "support/Cast.h" @@ -53,8 +54,10 @@ class Interceptor final : public IScheduler { public: /** Default constructor. */ - Interceptor(std::list<struct SchedulerClock<output_timestamps>::kernel_info> &kernels, IScheduler &real_scheduler, ScaleFactor scale_factor) - : _kernels(kernels), _real_scheduler(real_scheduler), _timer(scale_factor), _prefix() + Interceptor(std::list<struct SchedulerClock<output_timestamps>::kernel_info> &kernels, + std::map<std::string, SchedulerTimer::LayerData> &layers, IScheduler &real_scheduler, + ScaleFactor scale_factor) + : _kernels(kernels), _layer_data_map(layers), _real_scheduler(real_scheduler), _timer(scale_factor), _prefix() { } @@ -126,14 +129,24 @@ protected: private: std::list<struct SchedulerClock<output_timestamps>::kernel_info> &_kernels; - IScheduler &_real_scheduler; - WallClock<output_timestamps> _timer; - std::string _prefix; + std::map<std::string, SchedulerTimer::LayerData> &_layer_data_map; + IScheduler &_real_scheduler; + WallClock<output_timestamps> _timer; + std::string _prefix; }; template <bool output_timestamps> SchedulerClock<output_timestamps>::SchedulerClock(ScaleFactor scale_factor) - : _kernels(), _real_scheduler(nullptr), _real_scheduler_type(), _real_graph_function(nullptr), _scale_factor(scale_factor), _interceptor(nullptr), _scheduler_users() + : _kernels(), + _layer_data_map(), + _real_scheduler(nullptr), + _real_scheduler_type(), +#ifdef ARM_COMPUTE_GRAPH_ENABLED + _real_graph_function(nullptr), +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ + _scale_factor(scale_factor), + _interceptor(nullptr), + _scheduler_users() { if(instruments_info != nullptr) { @@ -144,6 +157,7 @@ SchedulerClock<output_timestamps>::SchedulerClock(ScaleFactor scale_factor) template <bool output_timestamps> void SchedulerClock<output_timestamps>::test_start() { +#ifdef ARM_COMPUTE_GRAPH_ENABLED // Start intercepting tasks: ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr); _real_graph_function = graph::TaskExecutor::get().execute_function; @@ -156,6 +170,13 @@ void SchedulerClock<output_timestamps>::test_start() if(task.node != nullptr && !task.node->name().empty()) { scheduler->set_prefix(task.node->name() + "/"); + + if(_layer_data_map.find(task.node->name()) == _layer_data_map.end()) + { + arm_compute::graph::DataLayerVisitor dlv = {}; + task.node->accept(dlv); + _layer_data_map[task.node->name()] = dlv.layer_data(); + } } else { @@ -170,6 +191,7 @@ void SchedulerClock<output_timestamps>::test_start() scheduler->set_prefix(""); } }; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ ARM_COMPUTE_ERROR_ON(_real_scheduler != nullptr); _real_scheduler_type = Scheduler::get_type(); @@ -177,9 +199,11 @@ void SchedulerClock<output_timestamps>::test_start() if(_real_scheduler_type != Scheduler::Type::CUSTOM) { _real_scheduler = &Scheduler::get(); - _interceptor = std::make_shared<Interceptor<output_timestamps>>(_kernels, *_real_scheduler, _scale_factor); + _interceptor = std::make_shared<Interceptor<output_timestamps>>(_kernels, _layer_data_map, *_real_scheduler, _scale_factor); Scheduler::set(std::static_pointer_cast<IScheduler>(_interceptor)); +#ifdef ARM_COMPUTE_GRAPH_ENABLED graph::TaskExecutor::get().execute_function = task_interceptor; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ // Create an interceptor for each scheduler // TODO(COMPID-2638) : Allow multiple schedulers, now it assumes the same scheduler is used. @@ -188,7 +212,7 @@ void SchedulerClock<output_timestamps>::test_start() { if(user != nullptr && user->scheduler() != nullptr) { - user->intercept_scheduler(std::make_unique<Interceptor<output_timestamps>>(_kernels, *user->scheduler(), _scale_factor)); + user->intercept_scheduler(std::make_unique<Interceptor<output_timestamps>>(_kernels, _layer_data_map, *user->scheduler(), _scale_factor)); } }); } @@ -205,10 +229,12 @@ void SchedulerClock<output_timestamps>::test_stop() { // Restore real scheduler Scheduler::set(_real_scheduler_type); - _real_scheduler = nullptr; - _interceptor = nullptr; + _real_scheduler = nullptr; + _interceptor = nullptr; +#ifdef ARM_COMPUTE_GRAPH_ENABLED graph::TaskExecutor::get().execute_function = _real_graph_function; _real_graph_function = nullptr; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ // Restore schedulers std::for_each(std::begin(_scheduler_users), std::end(_scheduler_users), @@ -257,6 +283,36 @@ Instrument::MeasurementsMap SchedulerClock<output_timestamps>::measurements() co return measurements; } +template <bool output_timestamps> +std::string SchedulerClock<output_timestamps>::instrument_header() const +{ + std::string output{ "" }; + output += R"("layer_data" : {)"; + for(auto i_it = _layer_data_map.cbegin(), i_end = _layer_data_map.cend(); i_it != i_end; ++i_it) + { + output += "\"" + i_it->first + "\" : {"; + if(i_it->second.size() != 0) + { + // Print for each entry in layer + for(auto entry_it = i_it->second.cbegin(), entry_end = i_it->second.cend(); entry_it != entry_end; ++entry_it) + { + output += "\"" + entry_it->first + "\" : \"" + entry_it->second + "\""; + if(std::next(entry_it) != entry_end) + { + output += ","; + } + } + } + output += "}"; + if(std::next(i_it) != i_end) + { + output += ","; + } + } + output += "}"; + return output; +} + } // namespace framework } // namespace test } // namespace arm_compute diff --git a/tests/framework/instruments/SchedulerTimer.h b/tests/framework/instruments/SchedulerTimer.h index aa948d32a6..c437f2717c 100644 --- a/tests/framework/instruments/SchedulerTimer.h +++ b/tests/framework/instruments/SchedulerTimer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,6 +63,7 @@ template <bool output_timestamps> class SchedulerClock : public Instrument { public: + using LayerData = std::map<std::string, std::string>; /** Construct a Scheduler timer. * * @param[in] scale_factor Measurement scale factor. @@ -85,6 +86,7 @@ public: void start() override; void test_stop() override; Instrument::MeasurementsMap measurements() const override; + std::string instrument_header() const override; /** Kernel information */ struct kernel_info @@ -95,13 +97,16 @@ public: }; private: - std::list<kernel_info> _kernels; - IScheduler *_real_scheduler; - Scheduler::Type _real_scheduler_type; + std::list<kernel_info> _kernels; + std::map<std::string, LayerData> _layer_data_map; + IScheduler *_real_scheduler; + Scheduler::Type _real_scheduler_type; +#ifdef ARM_COMPUTE_GRAPH_ENABLED std::function<decltype(graph::execute_task)> _real_graph_function; - ScaleFactor _scale_factor; - std::shared_ptr<IScheduler> _interceptor; - std::vector<ISchedulerUser *> _scheduler_users; +#endif /* ARM_COMPUTE_GRAPH_ENABLED */ + ScaleFactor _scale_factor; + std::shared_ptr<IScheduler> _interceptor; + std::vector<ISchedulerUser *> _scheduler_users; }; using SchedulerTimer = SchedulerClock<false>; diff --git a/tests/framework/instruments/hwc_names.hpp b/tests/framework/instruments/hwc_names.hpp index e68bcbed82..c39f3bba7a 100644 --- a/tests/framework/instruments/hwc_names.hpp +++ b/tests/framework/instruments/hwc_names.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,7 @@ enum /* * "Short names" for hardware counters used by Streamline. Counters names are * stored in accordance with their memory layout in the binary counter block - * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block + * emitted by the Arm® Mali™ GPU. Each "master" in the GPU emits a fixed-size block * of 64 counters, and each GPU implements the same set of "masters" although * the counters each master exposes within its block of 64 may vary. * diff --git a/tests/framework/printers/JSONPrinter.cpp b/tests/framework/printers/JSONPrinter.cpp index 0995ff3594..84b2d23114 100644 --- a/tests/framework/printers/JSONPrinter.cpp +++ b/tests/framework/printers/JSONPrinter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -169,10 +169,18 @@ void JSONPrinter::print_info(const std::string &info) _infos.push_back(info); } +void JSONPrinter::print_profiler_header(const std::string &header_data) +{ + if(header_data.size() > 0) + { + print_separator(_first_test_entry); + } + *_stream << header_data; +} + void JSONPrinter::print_measurements(const Profiler::MeasurementsMap &measurements) { print_separator(_first_test_entry); - *_stream << R"("measurements" : {)"; for(auto i_it = measurements.cbegin(), i_end = measurements.cend(); i_it != i_end;) diff --git a/tests/framework/printers/JSONPrinter.h b/tests/framework/printers/JSONPrinter.h index ce587ade04..ad996708e7 100644 --- a/tests/framework/printers/JSONPrinter.h +++ b/tests/framework/printers/JSONPrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,6 +51,7 @@ public: void print_errors_footer() override; void print_error(const std::exception &error, bool expected) override; void print_info(const std::string &info) override; + void print_profiler_header(const std::string &header_data) override; void print_measurements(const Profiler::MeasurementsMap &measurements) override; void print_list_tests(const std::vector<TestInfo> &infos) override; diff --git a/tests/framework/printers/PrettyPrinter.cpp b/tests/framework/printers/PrettyPrinter.cpp index aa06eb9b4e..529ff2c2d9 100644 --- a/tests/framework/printers/PrettyPrinter.cpp +++ b/tests/framework/printers/PrettyPrinter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -116,6 +116,12 @@ void PrettyPrinter::print_list_tests(const std::vector<TestInfo> &infos) *_stream << "[" << info.id << ", " << info.mode << ", " << info.status << "] " << info.name << "\n"; } } + +void PrettyPrinter::print_profiler_header(const std::string &header_data) +{ + ARM_COMPUTE_UNUSED(header_data); +} + void PrettyPrinter::print_measurements(const Profiler::MeasurementsMap &measurements) { for(const auto &instrument : measurements) diff --git a/tests/framework/printers/PrettyPrinter.h b/tests/framework/printers/PrettyPrinter.h index ded0da04d8..b9d5d3957c 100644 --- a/tests/framework/printers/PrettyPrinter.h +++ b/tests/framework/printers/PrettyPrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,6 +55,7 @@ public: void print_errors_footer() override; void print_error(const std::exception &error, bool expected) override; void print_info(const std::string &info) override; + void print_profiler_header(const std::string &header_data) override; void print_measurements(const Profiler::MeasurementsMap &measurements) override; void print_list_tests(const std::vector<TestInfo> &infos) override; diff --git a/tests/framework/printers/Printer.h b/tests/framework/printers/Printer.h index 669b7f6a95..af0209788c 100644 --- a/tests/framework/printers/Printer.h +++ b/tests/framework/printers/Printer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018,2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -125,6 +125,12 @@ public: */ virtual void print_info(const std::string &info) = 0; + /** Print header data. + * + * @param[in] header_data JSON formmated header data. + */ + virtual void print_profiler_header(const std::string &header_data) = 0; + /** Print measurements for a test. * * @param[in] measurements Measurements as collected by a @ref Profiler. diff --git a/tests/main.cpp b/tests/main.cpp index cecd89bdb7..e862c7627e 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/Version.h" #include "support/StringSupport.h" #include "tests/AssetsLibrary.h" #include "tests/framework/DatasetModes.h" @@ -45,6 +46,7 @@ #include "utils/TypePrinter.h" #endif /* ARM_COMPUTE_CL */ #include "arm_compute/runtime/Scheduler.h" +#include "src/common/cpuinfo/CpuModel.h" #include <fstream> #include <initializer_list> @@ -115,7 +117,7 @@ int main(int argc, char **argv) auto filter_id = parser.add_option<utils::SimpleOption<std::string>>("filter-id"); filter_id->set_help("List of test ids. ... can be used to define a range."); auto stop_on_error = parser.add_option<utils::ToggleOption>("stop-on-error"); - stop_on_error->set_help("Abort execution after the first failed test (useful for debugging)"); + stop_on_error->set_help("Stop execution after the first failed test (useful for debugging)"); auto seed = parser.add_option<utils::SimpleOption<std::random_device::result_type>>("seed", std::random_device()()); seed->set_help("Global seed for random number generation"); auto list_tests = parser.add_option<utils::ToggleOption>("list-tests", false); @@ -126,6 +128,8 @@ int main(int argc, char **argv) error_on_missing_assets->set_help("Mark a test as failed instead of skipping it when assets are missing"); auto assets = parser.add_positional_option<utils::SimpleOption<std::string>>("assets"); assets->set_help("Path to the assets directory"); + auto print_rerun_command = parser.add_option<utils::ToggleOption>("rerun-cmd"); + print_rerun_command->set_help("Print out the command to rerun the exact failed testcase"); #ifdef ARM_COMPUTE_CL auto enable_tuner = parser.add_option<utils::ToggleOption>("enable-tuner"); enable_tuner->set_help("Enable OpenCL dynamic tuner"); @@ -183,7 +187,7 @@ int main(int argc, char **argv) CLGEMMHeuristicsHandle gemm_heuristics; if(opencl_is_available()) { - auto ctx_dev_err = create_opencl_context_and_device(); + auto ctx_dev_err = create_opencl_context_and_device(CLBackendType::Native); ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context"); gemm_heuristics.reload_from_file(mlgo_file->value()); CLScheduler::get().default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), &cl_tuner, &gemm_heuristics); @@ -235,13 +239,23 @@ int main(int argc, char **argv) #endif /* ARM_COMPUTE_CL */ const arm_compute::CPUInfo &cpu_info = Scheduler::get().cpu_info(); const unsigned int num_cpus = cpu_info.get_cpu_num(); + + p->print_entry("cpu_has_sve", support::cpp11::to_string(cpu_info.has_sve())); + p->print_entry("cpu_has_sve2", support::cpp11::to_string(cpu_info.has_sve2())); + p->print_entry("cpu_has_svef32mm", support::cpp11::to_string(cpu_info.has_svef32mm())); + p->print_entry("cpu_has_svei8mm", support::cpp11::to_string(cpu_info.has_svei8mm())); + p->print_entry("cpu_has_svebf16", support::cpp11::to_string(cpu_info.has_svebf16())); + p->print_entry("cpu_has_sme", support::cpp11::to_string(cpu_info.has_sme())); + p->print_entry("cpu_has_sme2", support::cpp11::to_string(cpu_info.has_sme2())); p->print_entry("cpu_has_fp16", support::cpp11::to_string(cpu_info.has_fp16())); + p->print_entry("cpu_has_bf16", support::cpp11::to_string(cpu_info.has_bf16())); p->print_entry("cpu_has_dotprod", support::cpp11::to_string(cpu_info.has_dotprod())); + p->print_entry("cpu_has_i8mm", support::cpp11::to_string(cpu_info.has_i8mm())); for(unsigned int j = 0; j < num_cpus; ++j) { const CPUModel model = cpu_info.get_cpu_model(j); - p->print_entry("CPU" + support::cpp11::to_string(j), cpu_model_to_string(model)); + p->print_entry("CPU" + support::cpp11::to_string(j), cpuinfo::cpu_model_to_string(model)); } p->print_entry("Iterations", support::cpp11::to_string(options.iterations->value())); p->print_entry("Threads", support::cpp11::to_string(threads->value())); @@ -259,14 +273,16 @@ int main(int argc, char **argv) // Initialize framework framework::FrameworkConfig fconfig; - fconfig.instruments = options.instruments->value(); - fconfig.name_filter = filter->value(); - fconfig.id_filter = filter_id->value(); - fconfig.num_iterations = options.iterations->value(); - fconfig.mode = dataset_mode->value(); - fconfig.log_level = options.log_level->value(); - fconfig.cooldown_sec = cooldown_sec->value(); - fconfig.configure_only = configure_only->value(); + fconfig.instruments = options.instruments->value(); + fconfig.name_filter = filter->value(); + fconfig.id_filter = filter_id->value(); + fconfig.num_iterations = options.iterations->value(); + fconfig.mode = dataset_mode->value(); + fconfig.log_level = options.log_level->value(); + fconfig.cooldown_sec = cooldown_sec->value(); + fconfig.configure_only = configure_only->value(); + fconfig.print_rerun_cmd = print_rerun_command->value(); + fconfig.seed = seed->value(); framework.init(fconfig); for(auto &p : printers) diff --git a/tests/validate_examples/RunExample.cpp b/tests/validate_examples/RunExample.cpp index 8ba3c2b295..36bf587551 100644 --- a/tests/validate_examples/RunExample.cpp +++ b/tests/validate_examples/RunExample.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,7 @@ #include "ValidateExample.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/Scheduler.h" +#include "arm_compute/core/Version.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/framework/Framework.h" @@ -150,7 +151,16 @@ int run_example(int argc, char **argv, std::unique_ptr<ValidateExample> example) #ifdef ARM_COMPUTE_CL if(opencl_is_available()) { - auto ctx_dev_err = create_opencl_context_and_device(); + CLBackendType backend_type = CLBackendType::Native; + for(auto &arg : example_args->value()) + { + if(arg.find("--target=clvk") != std::string::npos) + { + backend_type = CLBackendType::Clvk; + break; + } + } + auto ctx_dev_err = create_opencl_context_and_device(backend_type); ARM_COMPUTE_ERROR_ON_MSG(std::get<2>(ctx_dev_err) != CL_SUCCESS, "Failed to create OpenCL context"); CLScheduler::get().default_init_with_context(std::get<1>(ctx_dev_err), std::get<0>(ctx_dev_err), nullptr); } diff --git a/tests/validate_examples/cl_gemm.cpp b/tests/validate_examples/cl_gemm.cpp index 717ba77e17..8189b228c2 100644 --- a/tests/validate_examples/cl_gemm.cpp +++ b/tests/validate_examples/cl_gemm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,20 +32,19 @@ #include "arm_compute/runtime/CL/functions/CLGEMM.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "src/core/CL/kernels/CLIm2ColKernel.h" -#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "src/gpu/cl/kernels/ClCastKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpReductionKernel.h" +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h" +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" +#include "src/gpu/cl/kernels/ClIm2ColKernel.h" +#include "src/gpu/cl/kernels/ClWeightsReshapeKernel.h" #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" @@ -205,7 +204,11 @@ public: mm_gemmlowp.configure(&src0, &src1, nullptr, &tmp_dst); // Configure GEMMlowp output stage - mm_gemmlowp_output_stage.configure(&tmp_dst, add_bias ? &biases : nullptr, &dst, dst_multiplier, dst_shift, offset_dst); + GEMMLowpOutputStageInfo gemm_info{}; + gemm_info.gemmlowp_multiplier = dst_multiplier; + gemm_info.gemmlowp_shift = dst_shift; + gemm_info.gemmlowp_offset = offset_dst; + mm_gemmlowp_output_stage.configure(&tmp_dst, add_bias ? &biases : nullptr, &dst, gemm_info); tmp_dst.allocator()->allocate(); biases.allocator()->allocate(); fill(CLAccessor(biases), 3); @@ -393,9 +396,9 @@ private: CLTensor src0{}, src1{}, src2{}, dst{}; CLTensor tmp_dst{}, biases{}; - CLGEMM mm_gemm{}; - CLGEMMLowpMatrixMultiplyCore mm_gemmlowp{}; - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint mm_gemmlowp_output_stage{}; + CLGEMM mm_gemm{}; + CLGEMMLowpMatrixMultiplyCore mm_gemmlowp{}; + CLGEMMLowpOutputStage mm_gemmlowp_output_stage{}; size_t M{ 7 }, N{ 3 }, K{ 5 }, B{ 1 }; DataType data_type{ DataType::F32 }; diff --git a/tests/validate_examples/graph_validate_utils.h b/tests/validate_examples/graph_validate_utils.h index f6f47cc2c3..c1a83d1f40 100644 --- a/tests/validate_examples/graph_validate_utils.h +++ b/tests/validate_examples/graph_validate_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,8 +22,8 @@ * SOFTWARE. */ -#ifndef GRAPH_VALIDATE_UTILS_H -#define GRAPH_VALIDATE_UTILS_H +#ifndef ACL_TESTS_VALIDATE_EXAMPLES_GRAPH_VALIDATE_UTILS_H +#define ACL_TESTS_VALIDATE_EXAMPLES_GRAPH_VALIDATE_UTILS_H #include "arm_compute/graph.h" @@ -240,7 +240,6 @@ public: { arm_compute::graph::Target::NEON, arm_compute::graph::Target::CL, - arm_compute::graph::Target::GC, }; const std::set<arm_compute::DataType> supported_data_types @@ -288,7 +287,6 @@ public: * @param[out] os Output stream. * @param[in] common_params Example parameters to output * - * @return None. */ virtual void print_parameters(::std::ostream &os, const ExampleParams &common_params) { @@ -310,8 +308,6 @@ public: * * @param[in] options Options to consume * @param[out] common_params params structure to consume. - * - * @return consume_common_graph_parameters structure containing the common graph parameters */ void consume_common_graph_parameters(CommonGraphValidateOptions &options, CommonParams &common_params) { @@ -397,7 +393,6 @@ public: * @param[out] bias The tensor with the bias data. * @param[in] tensor Tensor result of the actual operation passed into the Accessor. * - * @return None. */ virtual void create_tensors(arm_compute::test::SimpleTensor<D> &src, arm_compute::test::SimpleTensor<D> &weights, @@ -446,8 +441,6 @@ public: * @param[in] seed seed for the randomization function * @param[in] low lower bound for random values * @param[in] high upper bound for random values - * - * @return None. */ void fill_tensor(arm_compute::test::SimpleTensor<uint8_t> &tensor, std::random_device::result_type seed, uint8_t low, uint8_t high) { @@ -474,8 +467,6 @@ public: * @param[in] seed seed for the randomization function * @param[in] low lower bound for random values * @param[in] high upper bound for random values - * - * @return None. */ void fill_tensor(arm_compute::test::SimpleTensor<int32_t> &tensor, std::random_device::result_type seed, int32_t low, int32_t high) { @@ -495,8 +486,6 @@ public: * @param[in] seed seed for the randomization function * @param[in] low lower bound for random values * @param[in] high upper bound for random values - * - * @return None. */ void fill_tensor(arm_compute::test::SimpleTensor<float> &tensor, std::random_device::result_type seed, float low, float high) { @@ -517,8 +506,6 @@ public: * @param[in] seed seed for the randomization function * @param[in] low lower bound for random values * @param[in] high upper bound for random values - * - * @return None. */ void fill_tensor(arm_compute::test::SimpleTensor<half> &tensor, std::random_device::result_type seed, half low, half high) { @@ -560,8 +547,6 @@ public: * * @param[in] tensor Tensor result of the actual operation passed into the Accessor. * @param[in] output Tensor result of the reference implementation. - * - * @return None. */ void validate(ITensor &tensor, arm_compute::test::SimpleTensor<D> output) { @@ -693,4 +678,4 @@ public: } // graph_validate_utils } // arm_compute -#endif //GRAPH_VALIDATE_UTILS_H +#endif // ACL_TESTS_VALIDATE_EXAMPLES_GRAPH_VALIDATE_UTILS_H diff --git a/tests/validation/CL/AbsLayer.cpp b/tests/validation/CL/AbsLayer.cpp index e6ba14b50e..0bad8f9b68 100644 --- a/tests/validation/CL/AbsLayer.cpp +++ b/tests/validation/CL/AbsLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp index fa95594157..133b39d154 100644 --- a/tests/validation/CL/ActivationLayer.cpp +++ b/tests/validation/CL/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -70,6 +70,7 @@ AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activ case ActivationLayerInfo::ActivationFunction::SOFT_RELU: case ActivationLayerInfo::ActivationFunction::ELU: case ActivationLayerInfo::ActivationFunction::SQRT: + case ActivationLayerInfo::ActivationFunction::GELU: return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f); case ActivationLayerInfo::ActivationFunction::TANH: return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f); diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp index 1d849ed0c7..8566972f81 100644 --- a/tests/validation/CL/ArgMinMax.cpp +++ b/tests/validation/CL/ArgMinMax.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,15 +22,11 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" -#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/ShapeDatasets.h" -#include "tests/datasets/SplitDataset.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ArgMinMaxFixture.h" @@ -46,6 +42,8 @@ namespace const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape", { TensorShape{ 1U, 7U, 1U, 3U }, + TensorShape{ 3U, 1U, 3U, 2U }, + TensorShape{ 2U, 1U, 3U, 2U }, TensorShape{ 149U, 5U, 1U, 2U }, TensorShape{ 166U, 5U, 1U, 2U }, TensorShape{ 322U, 5U, 1U, 2U }, @@ -53,6 +51,22 @@ const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape", TensorShape{ 2560, 2U, 2U, 2U }, }); +const auto ArgMinMaxSmallDatasetAxis0 = framework::dataset::make("Shape", +{ + TensorShape{ 1U, 5U }, + TensorShape{ 2U, 3U }, + TensorShape{ 1U }, + TensorShape{ 3U }, + TensorShape{ 2U }, + TensorShape{ 5U }, + TensorShape{ 17U }, + TensorShape{ 15U, 2U }, +}); + +const auto OpsDataset = framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }); +const auto AxisDataset = framework::dataset::make("Axis", { 0, 1, 2, 3 }); +const auto QInfoDataset = framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }); + const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", { TensorShape{ 517U, 123U, 13U, 2U } }); } // namespace @@ -85,47 +99,78 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // clang-format on // *INDENT-ON* -template <typename T> -using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T>; +template <typename T1, typename T2> +using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T1, T2>; + +using CLArgMinMaxValidationFixture_S32_S32 = CLArgMinMaxValidationFixture<int32_t, int32_t>; +using CLArgMinMaxValidationFixture_F16_S32 = CLArgMinMaxValidationFixture<half, int32_t>; +using CLArgMinMaxValidationFixture_F32_S32 = CLArgMinMaxValidationFixture<float, int32_t>; +using CLArgMinMaxValidationFixture_F32_S64 = CLArgMinMaxValidationFixture<float, int64_t>; TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmallAxis0, + CLArgMinMaxValidationFixture_S32_S32, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(ArgMinMaxSmallDatasetAxis0, + framework::dataset::make("DataTypeIn", DataType::S32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + framework::dataset::make("Axis", { 0 })), + OpsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunSmall, - CLArgMinMaxValidationFixture<int32_t>, + CLArgMinMaxValidationFixture_S32_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxSmallDataset, + framework::dataset::make("DataTypeIn", DataType::S32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, - CLArgMinMaxValidationFixture<int32_t>, + CLArgMinMaxValidationFixture_S32_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxLargeDataset, + framework::dataset::make("DataTypeIn", DataType::S32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(CLAccessor(_target), _reference); } + TEST_SUITE_END() // S32 TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, - CLArgMinMaxValidationFixture<half>, + CLArgMinMaxValidationFixture_F16_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxSmallDataset, + framework::dataset::make("DataTypeIn", DataType::F16)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, - CLArgMinMaxValidationFixture<half>, + CLArgMinMaxValidationFixture_F16_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxLargeDataset, + framework::dataset::make("DataTypeIn", DataType::F16)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -134,49 +179,77 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, - CLArgMinMaxValidationFixture<float>, + CLArgMinMaxValidationFixture_F32_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxSmallDataset, + framework::dataset::make("DataTypeIn", DataType::F32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunSmall_F32_S64, + CLArgMinMaxValidationFixture_F32_S64, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(ArgMinMaxSmallDataset, + framework::dataset::make("DataTypeIn", DataType::F32)), + framework::dataset::make("DataTypeOut", DataType::S64)), + AxisDataset), + OpsDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, - CLArgMinMaxValidationFixture<float>, + CLArgMinMaxValidationFixture_F32_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxLargeDataset, + framework::dataset::make("DataTypeIn", DataType::F32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(CLAccessor(_target), _reference); } + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float -template <typename T> -using CLArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T>; +template <typename T1, typename T2> +using CLArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T1, T2>; + +using CLArgMinMaxQuantizedValidationFixture_U8_S32 = CLArgMinMaxQuantizedValidationFixture<uint8_t, int32_t>; +using CLArgMinMaxQuantizedValidationFixture_S8_S32 = CLArgMinMaxQuantizedValidationFixture<int8_t, int32_t>; TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, - CLArgMinMaxQuantizedValidationFixture<uint8_t>, + CLArgMinMaxQuantizedValidationFixture_U8_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxSmallDataset, + framework::dataset::make("DataTypeIn", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(CLAccessor(_target), _reference); } - FIXTURE_DATA_TEST_CASE(RunLarge, - CLArgMinMaxQuantizedValidationFixture<uint8_t>, + CLArgMinMaxQuantizedValidationFixture_U8_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxLargeDataset, + framework::dataset::make("DataTypeIn", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -185,28 +258,32 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, - CLArgMinMaxQuantizedValidationFixture<int8_t>, + CLArgMinMaxQuantizedValidationFixture_S8_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxSmallDataset, + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(CLAccessor(_target), _reference); } - FIXTURE_DATA_TEST_CASE(RunLarge, - CLArgMinMaxQuantizedValidationFixture<int8_t>, + CLArgMinMaxQuantizedValidationFixture_S8_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxLargeDataset, + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE_END() // QASYMM8_SIGNED - TEST_SUITE_END() // Quantized TEST_SUITE_END() // ArgMinMax TEST_SUITE_END() // CL diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp index c74f6a3b23..1ed3a105dc 100644 --- a/tests/validation/CL/ArithmeticAddition.cpp +++ b/tests/validation/CL/ArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,26 +41,12 @@ namespace test { namespace validation { +/** Synced with tests/validation/dynamic_fusion/gpu/cl/Add.cpp from the dynamic fusion interface. + * Please check there for any differences in the coverage + */ namespace { /** Input data sets **/ -const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType", - DataType::U8)); -const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataType", - DataType::QASYMM8)); -const auto ArithmeticAdditionQASYMM8SignedDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)); -const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), - framework::dataset::make("DataType", - DataType::QSYMM16)); -const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), - framework::dataset::make("DataType", DataType::S16)); -const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataType", DataType::F16)); -const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataType", DataType::F32)); const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo() }); const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", @@ -68,6 +54,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(CL) @@ -76,22 +64,19 @@ TEST_SUITE(ArithmeticAddition) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false})), + framework::dataset::make("Expected", { true, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS); @@ -129,8 +114,10 @@ using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<CLTensor TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::U8)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -138,15 +125,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework TEST_SUITE_END() // U8 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S16)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::S16)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -159,48 +150,65 @@ using CLArithmeticAdditionQuantizedFixture = ArithmeticAdditionValidationQuantiz TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticAdditionQASYMM8Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } template <typename T> using CLArithmeticAdditionBroadcastQuantizedFixture = ArithmeticAdditionValidationQuantizedBroadcastFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(), - ArithmeticAdditionQASYMM8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), - framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 255.f, 10) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(1.f / 255.f, 10) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 10) })), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticAdditionQASYMM8SignedDataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 10) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticAdditionQSYMM16Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -213,16 +221,21 @@ using CLArithmeticAdditionFloatFixture = ArithmeticAdditionValidationFloatFixtur TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionFP16Dataset), +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(), + framework::dataset::make("DataType", + DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -230,24 +243,32 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half> TEST_SUITE_END() // FP16 TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", + DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(), + framework::dataset::make("DataType", + DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -256,27 +277,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framew template <typename T> using CLArithmeticAdditionBroadcastFloatFixture = ArithmeticAdditionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(), - ArithmeticAdditionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), - ArithmeticAdditionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapesBroadcast(), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(), - ArithmeticAdditionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapesBroadcast(), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp index 36567dc02a..94bacba7e5 100644 --- a/tests/validation/CL/ArithmeticDivision.cpp +++ b/tests/validation/CL/ArithmeticDivision.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,13 +51,16 @@ const auto ArithmeticDivisionFP16Dataset = combine(combine(framework::dataset::m framework::dataset::make("DataType", DataType::F16)); const auto ArithmeticDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); -const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo", -{ ActivationLayerInfo() }); -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +const auto ArithmeticDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S32)), + framework::dataset::make("DataType", DataType::S32)); +const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo() }); +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(CL) @@ -89,19 +92,44 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // clang-format on // *INDENT-ON* +using CLArithmeticDivisionIntegerFixture = ArithmeticDivisionValidationIntegerFixture<CLTensor, CLAccessor, CLArithmeticDivision, int>; + +TEST_SUITE(Integer) +TEST_SUITE(S32) + +FIXTURE_DATA_TEST_CASE(RunSmallInteger, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset), + EmptyActivationFunctionsDataset), + InPlaceDataSet)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunIntegerWithActivation, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset), + ActivationFunctionsDataset), + InPlaceDataSet)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +TEST_SUITE_END() +TEST_SUITE_END() + template <typename T> using CLArithmeticDivisionFloatFixture = ArithmeticDivisionValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>; TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset), + EmptyActivationFunctionsDataset), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP16Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP16Dataset), + ActivationFunctionsDataset), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); @@ -109,21 +137,24 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half> TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP32Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP32Dataset), + EmptyActivationFunctionsDataset), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP32Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP32Dataset), + ActivationFunctionsDataset), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticDivisionFP32Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticDivisionFP32Dataset), + EmptyActivationFunctionsDataset), + InPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -132,24 +163,27 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framew template <typename T> using CLArithmeticDivisionBroadcastFloatFixture = ArithmeticDivisionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(), ArithmeticDivisionFP32Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), ArithmeticDivisionFP32Dataset), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(), ArithmeticDivisionFP32Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp index 2709fcaedb..5825ce2e5d 100644 --- a/tests/validation/CL/ArithmeticSubtraction.cpp +++ b/tests/validation/CL/ArithmeticSubtraction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,27 +41,12 @@ namespace test { namespace validation { +/** Synced with tests/validation/dynamic_fusion/gpu/cl/Sub.cpp from the dynamic fusion interface. + * Please check there for any differences in the coverage + */ namespace { /** Input data sets **/ -const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), - framework::dataset::make("DataType", - DataType::U8)); -const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataType", - DataType::QASYMM8)); -const auto ArithmeticSubtractionQASYMM8SignedDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)); -const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), - framework::dataset::make("DataType", - DataType::QSYMM16)); -const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), - framework::dataset::make("DataType", DataType::S16)); -const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataType", DataType::F16)); -const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataType", DataType::F32)); const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo() }); const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", @@ -79,22 +64,19 @@ TEST_SUITE(ArithmeticSubtraction) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false})), + framework::dataset::make("Expected", { true, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS); @@ -159,7 +141,8 @@ using CLArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<CL TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionU8Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::U8)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -169,7 +152,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framew TEST_SUITE_END() // U8 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -177,7 +161,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framew validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset), +FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::S16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -193,11 +178,22 @@ using CLArithmeticSubtractionQuantizedFixture = ArithmeticSubtractionValidationQ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticSubtractionQASYMM8Dataset), + framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunTinyInPlace, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::TinyShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 255.f, 20) })), InPlaceDataSet)) { // Validate output @@ -206,12 +202,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticSubtractionQASYMM8SignedDataset), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 10) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -219,7 +215,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int8_t> TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM16) FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticSubtractionQSYMM16Dataset), + framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), @@ -237,7 +233,8 @@ using CLArithmeticSubtractionFloatFixture = ArithmeticSubtractionValidationFloat TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), EmptyActivationFunctionsDataset), OutOfPlaceDataSet)) @@ -246,7 +243,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, fram validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(), - ArithmeticSubtractionFP16Dataset), + framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), ActivationFunctionsDataset), InPlaceDataSet)) @@ -258,7 +255,7 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), EmptyActivationFunctionsDataset), OutOfPlaceDataSet)) @@ -267,7 +264,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<float>, fra validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), ActivationFunctionsDataset), InPlaceDataSet)) @@ -277,7 +274,7 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<fl } FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), EmptyActivationFunctionsDataset), OutOfPlaceDataSet)) @@ -290,7 +287,7 @@ template <typename T> using CLArithmeticSubtractionBroadcastFloatFixture = ArithmeticSubtractionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>; FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), EmptyActivationFunctionsDataset), OutOfPlaceDataSet)) @@ -298,8 +295,18 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticSubtractionBroadcastFloatF // Validate output validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInplace, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + EmptyActivationFunctionsDataset), + InPlaceDataSet)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapesBroadcast(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), ActivationFunctionsDataset), OutOfPlaceDataSet)) @@ -309,7 +316,7 @@ FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticSubtractionBroadc } FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapesBroadcast(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), EmptyActivationFunctionsDataset), OutOfPlaceDataSet)) diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp index 8b3bdbc3ea..3b87b9d1b5 100644 --- a/tests/validation/CL/BatchNormalizationLayer.cpp +++ b/tests/validation/CL/BatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ namespace { RelativeTolerance<float> rel_tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr AbsoluteTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ const auto act_infos = framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp index e90ac921c5..ca12b76e8a 100644 --- a/tests/validation/CL/BatchToSpaceLayer.cpp +++ b/tests/validation/CL/BatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,56 +50,38 @@ using CLBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<CLTensor, // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Wrong data type block shape - TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape - }), - framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(2U, 4U), 1, DataType::S32), - TensorInfo(TensorShape(4U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(2U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(2U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(2U, 2U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32), - - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - })), - framework::dataset::make("Expected", { true, true,true, false, false, false})), - input_info, block_shape_info, output_info, expected) -{ - bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))); - ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); -} -DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip( +DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx - TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Negative block shapes - TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blockx > blocky + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blocky > blockx + TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Mismatching data types + TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Negative block shapes + TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32),// Unsupported tensor rank + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid batch dimension) + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid spatial dimension) + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: correct tensor shape with cropping + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid tensor shape with cropping }), - framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })), - framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })), + framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2, 2, 2, 2, 2 })), + framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2, 2, 2, 2, 2 })), + framework::dataset::make("CropInfo", { + CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{3, 2, 1, 3}, CropInfo{3, 2, 1, 3} + })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 16U, 2U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 32U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16), TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32), TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(33U, 32U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(27, 12U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 2U, 4U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true,true, false, false, false})), - input_info, block_shape_x, block_shape_y, output_info, expected) + framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true, false})), + input_info, block_shape_x, block_shape_y, crop_info, output_info, expected) { - bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false))); + bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false), crop_info)); ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -114,6 +96,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<float>, framework::D // Validate output validate(CLAccessor(_target), _reference); } + +FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) @@ -131,6 +123,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<half>, framework::Da // Validate output validate(CLAccessor(_target), _reference); } + +FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) diff --git a/tests/validation/CL/BoundingBoxTransform.cpp b/tests/validation/CL/BoundingBoxTransform.cpp index 2a7f1667d6..2584b1a5b6 100644 --- a/tests/validation/CL/BoundingBoxTransform.cpp +++ b/tests/validation/CL/BoundingBoxTransform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h" #include "tests/CL/CLAccessor.h" -#include "tests/CL/CLArrayAccessor.h" #include "tests/Globals.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp index 2ca8b58040..2f943e84d8 100644 --- a/tests/validation/CL/Cast.cpp +++ b/tests/validation/CL/Cast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,10 @@ constexpr AbsoluteTolerance<float> one_tolerance(1); constexpr AbsoluteTolerance<float> zero_tolerance(0); /** Input data sets **/ +// QASYMM8 +const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32)); +const auto CastQSYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QSYMM8), framework::dataset::make("DataType", DataType::F32)); + // U8 const auto CastU8toS8Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8)); const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); @@ -119,6 +123,26 @@ const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", Da const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32)); const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); + +// U64 +const auto CastU64toU8Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U8)); +const auto CastU64toS8Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S8)); +const auto CastU64toU16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U16)); +const auto CastU64toS16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S16)); +const auto CastU64toU32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U32)); +const auto CastU64toS32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S32)); +const auto CastU64toF16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F16)); +const auto CastU64toF32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F32)); + +// S64 +const auto CastS64toU8Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U8)); +const auto CastS64toS8Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S8)); +const auto CastS64toU16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U16)); +const auto CastS64toS16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S16)); +const auto CastS64toU32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U32)); +const auto CastS64toS32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S32)); +const auto CastS64toF16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F16)); +const auto CastS64toF32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F32)); } // namespace TEST_SUITE(CL) @@ -149,6 +173,12 @@ using CLCastToF32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T } \ TEST_SUITE_END() +// QASYMM8 +CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, CLCastToF32Fixture<uint8_t>, CastQASYMM8toF32Dataset, zero_tolerance) +// QSYMM8 +CAST_SUITE(QSYMM8_to_F32, DataType::QSYMM8, DataType::F32, CLCastToF32Fixture<int8_t>, CastQSYMM8toF32Dataset, zero_tolerance) + + // U8 CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, CLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance) CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, CLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance) @@ -221,6 +251,26 @@ CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture<float>, CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance) CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance) +// S64 +CAST_SUITE(S64_to_U8, DataType::S64, DataType::U8, CLCastToU8Fixture<int64_t>, CastS64toU8Dataset, one_tolerance) +CAST_SUITE(S64_to_S8, DataType::S64, DataType::S8, CLCastToS8Fixture<int64_t>, CastS64toS8Dataset, one_tolerance) +CAST_SUITE(S64_to_U16, DataType::S64, DataType::U16, CLCastToU16Fixture<int64_t>, CastS64toU16Dataset, one_tolerance) +CAST_SUITE(S64_to_S16, DataType::S64, DataType::S16, CLCastToS16Fixture<int64_t>, CastS64toS16Dataset, one_tolerance) +CAST_SUITE(S64_to_U32, DataType::S64, DataType::U32, CLCastToU32Fixture<int64_t>, CastS64toU32Dataset, one_tolerance) +CAST_SUITE(S64_to_S32, DataType::S64, DataType::S32, CLCastToS32Fixture<int64_t>, CastS64toS32Dataset, one_tolerance) +CAST_SUITE(S64_to_F16, DataType::S64, DataType::F16, CLCastToF16Fixture<int64_t>, CastS64toF16Dataset, zero_tolerance) +CAST_SUITE(S64_to_F32, DataType::S64, DataType::F32, CLCastToF32Fixture<int64_t>, CastS64toF32Dataset, zero_tolerance) + +// U64 +CAST_SUITE(U64_to_U8, DataType::U64, DataType::U8, CLCastToU8Fixture<uint64_t>, CastU64toU8Dataset, one_tolerance) +CAST_SUITE(U64_to_S8, DataType::U64, DataType::S8, CLCastToS8Fixture<uint64_t>, CastU64toS8Dataset, one_tolerance) +CAST_SUITE(U64_to_U16, DataType::U64, DataType::U16, CLCastToU16Fixture<uint64_t>, CastU64toU16Dataset, one_tolerance) +CAST_SUITE(U64_to_S16, DataType::U64, DataType::S16, CLCastToS16Fixture<uint64_t>, CastU64toS16Dataset, one_tolerance) +CAST_SUITE(U64_to_U32, DataType::U64, DataType::U32, CLCastToU32Fixture<uint64_t>, CastU64toU32Dataset, one_tolerance) +CAST_SUITE(U64_to_S32, DataType::U64, DataType::S32, CLCastToS32Fixture<uint64_t>, CastU64toS32Dataset, one_tolerance) +CAST_SUITE(U64_to_F16, DataType::U64, DataType::F16, CLCastToF16Fixture<uint64_t>, CastU64toF16Dataset, zero_tolerance) +CAST_SUITE(U64_to_F32, DataType::U64, DataType::F32, CLCastToF32Fixture<uint64_t>, CastU64toF32Dataset, zero_tolerance) + TEST_SUITE_END() // Cast TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/CL/Col2Im.cpp b/tests/validation/CL/Col2Im.cpp index b651bf8918..4b004e2472 100644 --- a/tests/validation/CL/Col2Im.cpp +++ b/tests/validation/CL/Col2Im.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLCol2ImKernel.h" +#include "src/gpu/cl/kernels/ClCol2ImKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" @@ -40,7 +40,7 @@ namespace validation TEST_SUITE(CL) TEST_SUITE(Col2Im) -using CLCol2Im = CLSynthetizeFunction<CLCol2ImKernel>; +using ClCol2Im = ClSynthetizeOperatorWithBorder<opencl::kernels::ClCol2ImKernel>; /** Negative tests * @@ -59,7 +59,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto input = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::SIZET); const auto output = TensorInfo(TensorShape(3U, 4U, 10U, 1U, 2U), 1, DataType::F32); const auto conv_size = Size2D(3, 4); - const auto status = CLCol2ImKernel::validate(&input, &output, conv_size); + const auto status = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -68,7 +68,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto input = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32); const auto output = TensorInfo(TensorShape(3U, 4U, 10U, 1U, 2U), 1, DataType::F32, DataLayout::NHWC); const auto conv_size = Size2D(3, 4); - const auto status = CLCol2ImKernel::validate(&input, &output, conv_size); + const auto status = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -77,13 +77,13 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto input = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32); const auto output = TensorInfo(TensorShape(3U, 4U, 10U, 2U, 2U), 1, DataType::F32); const auto conv_size = Size2D(3, 4); - const auto status = CLCol2ImKernel::validate(&input, &output, conv_size); + const auto status = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } } template <typename T> -using CLCol2ImFixture = Col2ImValidationFixture<CLTensor, CLAccessor, CLCol2Im, T, true>; +using ClCol2ImFixture = Col2ImOpValidationFixture<CLTensor, CLAccessor, ClCol2Im, T, true>; /** Test kernel for single-precision floating point * @@ -99,7 +99,7 @@ using CLCol2ImFixture = Col2ImValidationFixture<CLTensor, CLAccessor, CLCol2Im, * Kernel tested col2im */ FIXTURE_DATA_TEST_CASE(FP32, - CLCol2ImFixture<float>, + ClCol2ImFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine( framework::dataset::make("InputShape", { TensorShape(8U, 16U, 3U, 1U), TensorShape(17U, 16U, 3U, 1U), TensorShape(7U, 16U, 3U, 1U) }), @@ -125,7 +125,7 @@ FIXTURE_DATA_TEST_CASE(FP32, * Kernel tested col2im */ FIXTURE_DATA_TEST_CASE(F16, - CLCol2ImFixture<half>, + ClCol2ImFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(17U, 16U, 3U, 1U)), @@ -151,7 +151,7 @@ FIXTURE_DATA_TEST_CASE(F16, * Kernel tested col2im */ FIXTURE_DATA_TEST_CASE(QASYMM8, - CLCol2ImFixture<uint8_t>, + ClCol2ImFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(17U, 16U, 3U, 1U)), diff --git a/tests/validation/CL/Comparisons.cpp b/tests/validation/CL/Comparisons.cpp index d015528b0e..dd3dbd8d59 100644 --- a/tests/validation/CL/Comparisons.cpp +++ b/tests/validation/CL/Comparisons.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,7 +59,7 @@ TEST_SUITE(Comparison) DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid output type TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching input types - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), }), @@ -75,7 +75,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), })), - framework::dataset::make("Expected", { false, false, false, false, true})), + framework::dataset::make("Expected", { false, false, true, false, true})), input1_info, input2_info, output_info, expected) { Status s = CLComparison::validate(&input1_info.clone()->set_is_resizable(false), diff --git a/tests/validation/CL/Convolution3D.cpp b/tests/validation/CL/Convolution3D.cpp new file mode 100644 index 0000000000..a2848560c3 --- /dev/null +++ b/tests/validation/CL/Convolution3D.cpp @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLConv3D.h" +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DirectConvolution3DFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +const RelativeTolerance<half> rel_tolerance_fp16(half(0.2)); /**< Relative tolerance for FP16 tests */ +constexpr float abs_tolerance_fp16(0.05f); /**< Absolute tolerance for FP16 tests */ +constexpr RelativeTolerance<float> rel_tolerance_fp32(0.05f); /**< Relative tolerance for FP32 tests */ +constexpr float abs_tolerance_fp32(0.0001f); /**< Absolute tolerance for FP32 tests*/ +constexpr AbsoluteTolerance<uint8_t> abs_tolerance_qasymm8(1); /**< Absolute tolerance for quantized tests */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DirectConvolution3D) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 5U, 3U), // Unsupported data layout + TensorShape(27U, 13U, 5U, 3U), // Unsupported activation enabled + TensorShape(27U, 13U, 5U, 3U), // Mismatching data type + TensorShape(27U, 13U, 5U, 3U), // Unsupported data type + TensorShape(27U, 13U, 5U, 3U), // Mismatching input feature maps + TensorShape(27U, 13U, 5U, 3U), // Mismatching output feature maps + TensorShape(27U, 13U, 5U, 3U), // Mismatching bias shape + TensorShape(27U, 13U, 5U, 3U), // Unsupported number of weights dimensions + TensorShape(27U, 13U, 5U, 3U), // Unsupported number of biases dimensions + TensorShape(27U, 13U, 5U, 3U), // Mismatching output shape + TensorShape(27U, 13U, 5U, 3U) + }), + framework::dataset::make("WeightsShape", { TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 32U, 3U, 3U, 3U), + TensorShape(8U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U, 2U), + TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U), + TensorShape(4U, 27U, 3U, 3U, 3U) + })), + framework::dataset::make("BiasesShape", { TensorShape(4U), + TensorShape(4U), + TensorShape(4U), + TensorShape(4U), + TensorShape(4U), + TensorShape(4U), + TensorShape(8U), + TensorShape(4U), + TensorShape(4U), + TensorShape(4U), + TensorShape(4U) + })), + framework::dataset::make("OutputShape", { TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U, 2U), + TensorShape(4U, 11U, 5U, 3U), + TensorShape(4U, 13U, 5U, 3U) + })), + framework::dataset::make("Conv3dInfo", { Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false), + Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false) + })), + framework::dataset::make("SrcDataType", { DataType::F32, + DataType::F32, + DataType::F32, + DataType::U32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32 + })), + framework::dataset::make("WeightsDataType", { DataType::F32, + DataType::F32, + DataType::F16, + DataType::U32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32, + DataType::F32 + })), + framework::dataset::make("DataLayout", { DataLayout::NCDHW, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC, + DataLayout::NDHWC + })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })), + input_shape, weights_shape, biases_shape, output_shape, conv3d_info, src_data_type, weights_data_type, data_layout, expected) +{ + TensorInfo input_info = TensorInfo(input_shape, 1, src_data_type); + TensorInfo weights_info = TensorInfo(weights_shape, 1, weights_data_type); + TensorInfo biases_info = TensorInfo(biases_shape, 1, src_data_type); + TensorInfo output_info = TensorInfo(output_shape, 1, src_data_type); + + input_info.set_data_layout(data_layout); + weights_info.set_data_layout(data_layout); + biases_info.set_data_layout(data_layout); + output_info.set_data_layout(data_layout); + + bool is_valid = bool(CLConv3D::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv3d_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} + +template <typename T> +using CLDirectConvolution3DFixture = DirectConvolution3DValidationFixture<CLTensor, CLAccessor, CLConv3D, T>; +template <typename T> +using CLDirectConvolution3DQuantizedFixture = DirectConvolution3DValidationQuantizedFixture<CLTensor, CLAccessor, CLConv3D, T>; + +TEST_SUITE(NDHWC) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NDHWC))) +{ + validate(CLAccessor(_target), _reference, rel_tolerance_fp16, tolerance_num, abs_tolerance_fp16); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NDHWC))) +{ + validate(CLAccessor(_target), _reference, rel_tolerance_fp32, 0.0, abs_tolerance_fp32); +} + +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // FP32 + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(CLAccessor(_target), _reference, abs_tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(CLAccessor(_target), _reference, abs_tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // NDHWC +TEST_SUITE_END() // DirectConvolution3D +TEST_SUITE_END() // CL + +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp index b66cfd97e7..8820a6a31e 100644 --- a/tests/validation/CL/ConvolutionLayer.cpp +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" @@ -37,12 +38,16 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ConvolutionLayerFixture.h" +/** Synced with tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp + * Please check there for any differences in the coverage + */ namespace arm_compute { namespace test { namespace validation { +using framework::dataset::make; namespace { class SmallConvolutionLayerDatasetCases final : public datasets::ConvolutionLayerDataset @@ -61,32 +66,32 @@ constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< T constexpr float tolerance_num = 0.07f; /**< Tolerance number */ /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { DataType::F16, - DataType::F32, - DataType::QASYMM8, - DataType::QASYMM8_SIGNED, + DataType::F32, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, }); /** Grouped CNN data types */ -const auto GroupedCNNDataTypes = framework::dataset::make("DataType", +const auto GroupedCNNDataTypes = make("DataType", { DataType::F16, - DataType::F32 + DataType::F32 }); -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +const auto ActivationFunctionsDataset = make("ActivationInfo", { ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) }); -const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo", +const auto ActivationFunctionsSmallDataset = make("ActivationInfo", { ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) }); } // namespace @@ -96,7 +101,7 @@ TEST_SUITE(ConvolutionLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM + make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Select GEMM TensorInfo(TensorShape(23U, 27U, 31U, 4U), 1, DataType::F32), // Select WINOGRAD @@ -106,7 +111,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Select GEMM TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::QASYMM8_SIGNED), // Select GEMM }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32), + make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 31U, 21U), 1, DataType::F32), @@ -116,7 +121,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::QASYMM8_SIGNED), })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), + make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), @@ -126,7 +131,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32), TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::QASYMM8_SIGNED), })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1), + make("ConvInfo", { PadStrideInfo(1, 2, 1, 1), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), @@ -136,7 +141,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z PadStrideInfo(1, 1, 2, 2), PadStrideInfo(1, 1, 2, 2), })), - framework::dataset::make("GpuTarget", { GPUTarget::BIFROST, + make("GpuTarget", { GPUTarget::BIFROST, GPUTarget::MIDGARD, GPUTarget::G71, GPUTarget::G71, @@ -146,7 +151,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z GPUTarget::BIFROST, GPUTarget::BIFROST, })), - framework::dataset::make("Dilation", { Size2D(1U, 1U), + make("Dilation", { Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), @@ -156,8 +161,8 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z Size2D(2U, 1U), Size2D(2U, 1U), })), - framework::dataset::make("EnableFastMath", { false, false, false, false, false, false, true, true, true })), - framework::dataset::make("Expected",{ ConvolutionMethod::GEMM, + make("EnableFastMath", { false, false, false, false, false, false, true, true, true })), + make("Expected",{ ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD, @@ -186,15 +191,16 @@ TEST_SUITE_END() // ConvolutionLayer TEST_SUITE(GEMMConvolutionLayer) template <typename T> using CLGEMMConvolutionLayerFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>; +template <typename T> +using CLGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, true>; +template <typename T> +using CLConvolutionValidationWithPaddingFixture = ConvolutionValidationWithPaddingFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>; TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ReshapeWeights", { true })), make("DataType", DataType::F16)), make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsSmallDataset)) { // Validate output @@ -205,93 +211,231 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ReshapeWeights", { true })), make("DataType", DataType::F32)), make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("Input", TensorShape(23U, 27U, 5U)), + make("Weights", TensorShape(3U, 3U, 5U, 2U))), + make("Bias", TensorShape(2U))), + make("Output", TensorShape(11U, 25U, 2U))), + make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + make("Dilation", Size2D(1, 1))), + make("ReshapeWeights", { true })), + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, CLConvolutionValidationWithPaddingFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerPrePaddingDataset(), + make("ReshapeWeights", { true })), + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() })), +make("PrePadLayer", { PaddingList({ { 1, 1 }, { 1, 1 } }) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float template <typename T> using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>; template <typename T> +using CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, true>; +template <typename T> using CLGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, int8_t>; -const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo", -{ - ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f) -}); -const auto QuantizedActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo", -{ - ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f) -}); - TEST_SUITE(Quantized) -const auto QuantizationData = framework::dataset::make("QuantizationInfo", +const auto QuantizationData = make("QuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(0.3f, 3), QuantizationInfo(1.1f, 10), }); + +/// @note: Every asymmetric quantized test has a version with or without activation because the quantization info given +/// is ignored when there is no activation. Instead of using the same quantization information for all the tensors, the +/// fixture generates separate quantization info for each input and the output tensor. +/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged +/// again, with the explicitly specified quantization info removed +const auto NoActivation = make("ActivationInfo", ActivationLayerInfo()); + +const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo()); + +const auto QuantizedActivationFunctionsSmallDataset = make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f) +}); + TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmallCases, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(SmallConvolutionLayerDatasetCases(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - QuantizationData), - QuantizedActivationFunctionsSmallDataset)) + combine(SmallConvolutionLayerDatasetCases(), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + IgnoredQuantizationInfo, + NoActivation)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunSmallCasesWithActivation, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine(SmallConvolutionLayerDatasetCases(), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + QuantizationData, + QuantizedActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - QuantizationData), - QuantizedActivationFunctionsSmallDataset)) + combine(datasets::SmallConvolutionLayerDataset(), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + IgnoredQuantizationInfo, + NoActivation)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine(datasets::SmallConvolutionLayerDataset(), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + QuantizationData, + QuantizedActivationFunctionsSmallDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL, + combine( + make("Input", TensorShape(23U, 27U, 5U)), + make("Weights", TensorShape(3U, 3U, 5U, 2U)), + make("Bias", TensorShape(2U)), + make("Output", TensorShape(11U, 25U, 2U)), + make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + IgnoredQuantizationInfo, + NoActivation)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL, + combine( + make("Input", TensorShape(23U, 27U, 5U)), + make("Weights", TensorShape(3U, 3U, 5U, 2U)), + make("Bias", TensorShape(2U)), + make("Output", TensorShape(11U, 25U, 2U)), + make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + QuantizationData, + QuantizedActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) - FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - QuantizationData), - QuantizedActivationFunctionsSmallDataset)) + combine(datasets::SmallConvolutionLayerDataset(), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8_SIGNED), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + IgnoredQuantizationInfo, + NoActivation)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, + combine(datasets::SmallConvolutionLayerDataset(), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8_SIGNED), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + QuantizationData, + QuantizedActivationFunctionsSmallDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, + combine( + make("Input", TensorShape(23U, 27U, 5U)), + make("Weights", TensorShape(3U, 3U, 5U, 2U)), + make("Bias", TensorShape(2U)), + make("Output", TensorShape(11U, 25U, 2U)), + make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8_SIGNED), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + IgnoredQuantizationInfo, + NoActivation)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, + combine( + make("Input", TensorShape(23U, 27U, 5U)), + make("Weights", TensorShape(3U, 3U, 5U, 2U)), + make("Bias", TensorShape(2U)), + make("Output", TensorShape(11U, 25U, 2U)), + make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", DataType::QASYMM8_SIGNED), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + QuantizationData, + QuantizedActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM8_PER_CHANNEL) +const auto QuantizedActivationFunctionsSmallPerChannelDataset = make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f) +}); + FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ReshapeWeights", { true })), + make("DataType", { DataType::QASYMM8_SIGNED })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), QuantizationData), - QuantizedActivationFunctionsSmallDataset), - framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) + QuantizedActivationFunctionsSmallPerChannelDataset), + make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); @@ -299,12 +443,12 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLGEMMConvolutionLayerQuantizedPerChannel FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", { DataType::QASYMM8 })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ReshapeWeights", { true })), + make("DataType", { DataType::QASYMM8 })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), QuantizationData), - QuantizedActivationFunctionsSmallDataset), - framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) + QuantizedActivationFunctionsSmallPerChannelDataset), + make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); @@ -323,9 +467,7 @@ TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), + make("ReshapeWeights", { true })), make("DataType", DataType::F32)), make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsSmallDataset)) { // Validate output @@ -334,9 +476,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<float>, fr FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), + make("ReshapeWeights", { true })), + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset)) { // Validate output @@ -347,9 +489,7 @@ TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), + make("ReshapeWeights", { true })), make("DataType", DataType::F16)), make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsSmallDataset)) { // Validate output @@ -358,9 +498,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<half>, fra FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), + make("ReshapeWeights", { true })), + make("DataType", DataType::F16)), + make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset)) { // Validate output diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index c284cdcee3..d1508fd902 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,27 +53,29 @@ const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorSh *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); -const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", - 2) - *framework::dataset::make("PadLeft", 3) - *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); - -const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) - * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 }); - -const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2) +const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4, 2) * framework::dataset::make("StrideY", 2, 4) * framework::dataset::make("PadX", 1, 3) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); +const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2, 4) * framework::dataset::make("StrideY", 1, 4, 2) * framework::dataset::make("PadX", 1, 2) + * framework::dataset::make("PadY", 1, 3) * framework::dataset::make("NumKernels", { 3 }); + const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1) * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 }); const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); +const auto data3x3_precommit_large_channels = datasets::SmallDeconvolutionShapesWithLargerChannels() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2) + * framework::dataset::make("PadX", 1) + * framework::dataset::make("PadY", 2) * framework::dataset::make("NumKernels", { 5 }); + const auto data2x2_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2) * framework::dataset::make("PadX", 1) * framework::dataset::make("PadY", 1) * framework::dataset::make("NumKernels", { 3 }); -const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) +const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4, 2) * framework::dataset::make("StrideY", 2, 4) * framework::dataset::make("PadX", 0, 1) + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + +const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2, 4) * framework::dataset::make("StrideY", 1, 4, 2) * framework::dataset::make("PadX", 0, 1) * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); @@ -87,42 +89,73 @@ TEST_SUITE(DeconvolutionLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights shape - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), // Non supported data type - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape - TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink - TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), - }), + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights shape + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), // Non supported data type + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape + TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink + TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type + TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC), // Invalid weights shape + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, DataLayout::NHWC), // Non supported data type + TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC), // Invalid bias shape + TensorInfo(TensorShape(4U, 11U, 13U, 3U), 1, DataType::F32, DataLayout::NHWC), // Window shrink + TensorInfo(TensorShape(2U, 16U, 32U), 1, DataType::F32, DataLayout::NHWC), + }), framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32), TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32), - })), - framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16), - TensorInfo(TensorShape(1U), 1, DataType::F32), - TensorInfo(TensorShape(1U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U), 1, DataType::F32), - TensorInfo(TensorShape(1U), 1, DataType::F32), - TensorInfo(TensorShape(4U), 1, DataType::F32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32), - })), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 2U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 2U, 2U, 4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16), + TensorInfo(TensorShape(1U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + TensorInfo(TensorShape(4U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(2U, 11U, 25U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 10U, 25U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 9U, 11U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U, 43U, 91U), 1, DataType::F32, DataLayout::NHWC), + })), framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(3, 3, 2, 2), })), - framework::dataset::make("Expected", { false, false, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, false, false, true, // NCHW + false, false, false, false, false, true })), // NHWC input_info, weights_info, bias_info, output_info, pad_info, expected) { bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info)); @@ -149,6 +182,9 @@ using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, template <typename T> using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 9, 9>; +template <typename T> +using CLDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 5, 1>; + TEST_SUITE(Float) TEST_SUITE(FP32) @@ -171,6 +207,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3<float>, framewor // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } + +FIXTURE_DATA_TEST_CASE(RunSmallWithLargeChannels, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit_large_channels, + framework::dataset::make("DataType", + DataType::F32)), + data_layouts_dataset), + framework::dataset::make("AddBias", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fp32); +} + FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3_asymm, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset), @@ -180,8 +227,8 @@ FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3<float>, fra validate(CLAccessor(_target), _reference, tolerance_fp32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)), - data_layouts_dataset), - add_bias_dataset)) + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -202,7 +249,7 @@ TEST_SUITE_END() // W2x2 TEST_SUITE(W1x1) FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -218,6 +265,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9<float>, fra validate(CLAccessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W9x9 + +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)), + data_layouts_dataset), + framework::dataset::make("AddBias", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // FP32 TEST_SUITE(FP16) @@ -225,7 +283,7 @@ TEST_SUITE(FP16) TEST_SUITE(W4x4) FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -242,8 +300,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3<half>, framework validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)), - data_layouts_dataset), - add_bias_dataset)) + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -264,13 +322,23 @@ TEST_SUITE_END() // W2x2 TEST_SUITE(W1x1) FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset), + framework::dataset::make("AddBias", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float @@ -286,6 +354,24 @@ using CLDeconvolutionLayerQuantizedFixture2x2 = DeconvolutionValidationQuantized template <typename T> using CLDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>; +template <typename T> +using CLDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 5, 1>; + +template <typename T> +using CLDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 4, 4>; + +template <typename T> +using CLDeconvolutionLayerQuantizedPerChannelFixture3x3 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 3, 3>; + +template <typename T> +using CLDeconvolutionLayerQuantizedPerChannelFixture2x2 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 2, 2>; + +template <typename T> +using CLDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 1, 1>; + +template <typename T> +using CLDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 5, 1>; + TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) @@ -295,7 +381,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr data_layouts_dataset), framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })), framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); @@ -315,12 +401,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3, - framework::dataset::make("DataType", - DataType::QASYMM8)), - data_layouts_dataset), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 128) })), framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 128), QuantizationInfo(4.f / 255.f, 128) })), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); @@ -346,13 +431,26 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr data_layouts_dataset), framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })), framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })), + framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })), + framework::dataset::make("AddBias", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) @@ -366,7 +464,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<int8_t>, fra data_layouts_dataset), framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })), framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); @@ -388,12 +486,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<int8_t> } FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3, - framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), - data_layouts_dataset), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10), QuantizationInfo(2.f / 255.f, 127) })), framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 64), QuantizationInfo(4.f / 255.f, -128) })), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); @@ -414,20 +511,192 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture2x2<int8_t> TEST_SUITE_END() // W2x2 TEST_SUITE(W1x1) // DirectDeconvolution and GEMMDeconvolution -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), data_layouts_dataset), framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })), framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })), - add_bias_dataset)) + framework::dataset::make("AddBias", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })), + framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })), + framework::dataset::make("AddBias", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8_SIGNED +const auto input_qinfo_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) }); +const auto output_qinfo_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0) }); +const auto input_signed_qinfo_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10) }); +const auto output_signed_qinfo_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 10) }); + +TEST_SUITE(QSYMM8_PER_CHANNEL) + +TEST_SUITE(W4x4) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data4x4, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + framework::dataset::make("AddBias", { true })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data4x4, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + framework::dataset::make("AddBias", { true })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W4x4 + +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data3x3, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + framework::dataset::make("AddBias", { true })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data3x3, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + framework::dataset::make("AddBias", { true })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunSmallSignedPrecommit, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(data3x3_precommit, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W3x3 + +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data3x3_precommit, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} + +TEST_SUITE(W2x2) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data2x2_precommit, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data2x2_precommit, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W2x2 + +TEST_SUITE(W1x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data1x1, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + framework::dataset::make("AddBias", { false })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data1x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + framework::dataset::make("AddBias", { true })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W1x1 + +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + framework::dataset::make("AddBias", { true })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + framework::dataset::make("AddBias", { false })), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W5x1 + +TEST_SUITE_END() // QSYMM8_PER_CHANNEL + TEST_SUITE_END() // Quantized TEST_SUITE_END() // DeconvolutionLayer diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp index a823b278fc..490b38ccf6 100644 --- a/tests/validation/CL/DepthConvertLayer.cpp +++ b/tests/validation/CL/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,17 +44,16 @@ namespace validation namespace { /** Input data sets **/ -const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); -const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); -const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); -const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); -const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); -const auto DepthConvertLayerShiftDatasetNightly = framework::dataset::make("Shift", 0, 7); -const auto DepthConvertLayerShiftDatasetPrecommit = framework::dataset::make("Shift", { 0, 3, 6 }); +const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); +const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); +const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); +const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); +const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0); } // namespace TEST_SUITE(CL) @@ -63,7 +62,7 @@ TEST_SUITE(DepthConvertLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid data type combination + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Support upcasting from QASYMM8 to S16 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Invalid shift @@ -84,8 +83,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( ConvertPolicy::WRAP, ConvertPolicy::WRAP, })), - framework::dataset::make("Shift",{ 1, 1, 8, 1, 1, 1, })), - framework::dataset::make("Expected", { false, false, false, false, false, true})), + framework::dataset::make("Shift",{ 0, 0, 0, 1, 1, 0, })), + framework::dataset::make("Expected", { true, false, false, false, false, true})), input_info, output_info, policy, shift, expected) { ARM_COMPUTE_EXPECT(bool(CLDepthConvertLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy, shift)) == expected, framework::LogLevel::ERRORS); @@ -111,7 +110,7 @@ using CLDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture<CLTen TEST_SUITE(U8_to_U16) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -119,7 +118,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -129,7 +128,7 @@ TEST_SUITE_END() // U8_to_U16 TEST_SUITE(U8_to_S16) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -137,7 +136,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -146,7 +145,7 @@ TEST_SUITE_END() // U8_to_S16 TEST_SUITE(U8_to_S32) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -154,7 +153,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -164,14 +163,14 @@ TEST_SUITE_END() // U8_to_S32 TEST_SUITE(U16_to_U8) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -181,14 +180,14 @@ TEST_SUITE_END() // U16_to_U8 TEST_SUITE(U16_to_U32) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -198,14 +197,14 @@ TEST_SUITE_END() // U16_to_U32 TEST_SUITE(S16_to_U8) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); @@ -215,14 +214,14 @@ TEST_SUITE_END() // S16_to_U8 TEST_SUITE(S16_to_S32) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index b2009c26ad..d4dbcec9d9 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,6 +41,9 @@ namespace test { namespace validation { + +using framework::dataset::make; + namespace { RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ @@ -48,16 +51,47 @@ constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ -const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5 }); -const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 8 }); +const auto depth_multipliers = make("DepthMultiplier", { 1, 4 }); +const auto large_depth_multipliers = make("DepthMultiplier", { 2, 5, 8 }); -//Activation Functions -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +// Activation Functions +const auto NoActivation = make("ActivationInfo", ActivationLayerInfo()); + +const auto ActivationFunctionsSmallDataset = make("ActivationInfo", { ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 2.f, 0.f) +}); + +const auto ActivationFunctionsDataset = make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU) +}); + +const auto ActivationFunctionsQuantizedSmallDataset = make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 2.f, 0.f) +}); + +const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo", +{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.f) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.3f, -1.5f), }); + +const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo()); + } // namespace TEST_SUITE(CL) @@ -65,85 +99,85 @@ TEST_SUITE(DepthwiseConvolutionLayer) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights - TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size - TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // patch size bigger than input width - TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // dilation < 1 - TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8), - }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8), - })), - framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - TensorInfo(TensorShape(4U), 1, DataType::F32), - TensorInfo(TensorShape(2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(2U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(24U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8), - })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 1, 0), - })), - framework::dataset::make("DepthMultiplier", { 1, - 1, - 3, - 1, - 1, - 1, - 2, - 2, - 2, - 3, - })), - framework::dataset::make("Dilation", { Size2D(1U, 1U), - Size2D(1U, 1U), - Size2D(1U, 1U), - Size2D(1U, 1U), - Size2D(1U, 1U), - Size2D(1U, 1U), - Size2D(20U, 1U), - Size2D(0U, 1U), - Size2D(1U, 1U), - Size2D(1U, 1U), - })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true, true })), +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights + TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size + TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // patch size bigger than input width + TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // dilation < 1 + TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8), + }), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8), + }), + make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(4U), 1, DataType::F32), + TensorInfo(TensorShape(2U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(24U), 1, DataType::S32), + }), + make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8), + }), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 1, 0), + }), + make("DepthMultiplier", { 1, + 1, + 3, + 1, + 1, + 1, + 2, + 2, + 2, + 3, + }), + make("Dilation", { Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(20U, 1U), + Size2D(0U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + }), + make("Expected", { false, false, false, false, false, false, false, false, true, true })), input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, dilation, expected) { bool is_valid = bool(CLDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(true), &weights_info.clone()->set_is_resizable(true), &biases_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), conv_info, depth_multiplier,ActivationLayerInfo(), dilation)); @@ -154,48 +188,34 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi template <typename T> using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>; +template <typename T> +using CLDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, true>; +template <typename T> +using CLDepthwiseConvolutionLayerInPlaceFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, false, true>; TEST_SUITE(Float) TEST_SUITE(FP16) TEST_SUITE(W3x3) TEST_SUITE(NCHW) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NCHW)), - ActivationFunctionsDataset)) -{ - validate(CLAccessor(_target), _reference, tolerance_f16); -} -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NCHW)), - ActivationFunctionsDataset)) + combine( + framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + depth_multipliers, + make("DataType", DataType::F16), + make("DataLayout", DataLayout::NCHW), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) -{ - validate(CLAccessor(_target), _reference, tolerance_f16); -} -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, + combine( + datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::F16), + make("DataLayout", { DataLayout::NCHW }), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } @@ -204,41 +224,42 @@ TEST_SUITE_END() // NCHW TEST_SUITE(NHWC) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NHWC)), - ActivationFunctionsDataset)) + combine( + datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::F16), + make("DataLayout", DataLayout::NHWC), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NHWC)), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset(), + large_depth_multipliers, + make("DataType", DataType::F16), + make("DataLayout", DataLayout::NHWC), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f16); } @@ -249,19 +270,33 @@ TEST_SUITE_END() // W3x3 TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDatasetFp16Subset(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 2 }), + make("DataType", DataType::F16), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } @@ -269,25 +304,38 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, f TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic + +TEST_SUITE(InPlace) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(), + make("DepthMultiplier", { 1 })), + make("DataType", + DataType::F16)), + make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) +{ + validate(CLAccessor(_src), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // InPlace TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -297,84 +345,76 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW)), - ActivationFunctionsDataset)) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW)), - ActivationFunctionsDataset)) + make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW)), - ActivationFunctionsDataset)) + make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW)), - ActivationFunctionsDataset)) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} - TEST_SUITE_END() // Dilation TEST_SUITE_END() // NCHW + TEST_SUITE(NHWC) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC)), - ActivationFunctionsDataset)) + make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsSmallDataset)) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", { 2 })), + make("DataType", + DataType::F32)), + make("DataLayout", DataLayout::NHWC)), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC)), - ActivationFunctionsDataset)) + make("DataLayout", DataLayout::NHWC)), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f32); } + TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC)), - ActivationFunctionsDataset)) + make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC)), - ActivationFunctionsDataset)) + make("DataLayout", DataLayout::NHWC)), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -385,19 +425,45 @@ TEST_SUITE_END() // W3x3 TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } + FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE_NEW(RunLargeKernelSize, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(), + make("DepthMultiplier", { 1 })), + make("DataType", + DataType::F32)), + make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 2 }), + make("DataType", DataType::F32), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -405,78 +471,130 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic + +TEST_SUITE(InPlace) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(), + make("DepthMultiplier", { 1 })), + make("DataType", + DataType::F32)), + make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) +{ + validate(CLAccessor(_src), _reference, tolerance_f32); +} +TEST_SUITE_END() // InPlace TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float template <typename T> using CLDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>; template <typename T> +using CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, true>; +template <typename T> using CLDepthwiseConvolutionLayerQuantizedPerChannelFixture = DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, int8_t>; TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8 - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8 + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 128), QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(1.f, 128) }), + make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8 + ActivationFunctionsQuantizedSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 2U }), + make("DataType", DataType::QASYMM8), + make("SrcQuantizationInfo", { QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.1f, 128) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8 - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8 + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) }), + make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8 + ActivationFunctionsQuantizedSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(1.3f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -484,47 +602,80 @@ TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + make("DepthMultiplier", { 2 }), + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -535,25 +686,73 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }), + make("DataLayout", { DataLayout::NCHW }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + make("DepthMultiplier", { 2 }), + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 2U }), + make("DataType", DataType::QASYMM8_SIGNED), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) }), + make("DataLayout", { DataLayout::NCHW }), + ActivationFunctionsQuantizedSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -566,24 +765,40 @@ TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 2U }), + make("SrcDataType", DataType::QASYMM8_SIGNED), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -591,24 +806,24 @@ TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -618,24 +833,24 @@ TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -643,24 +858,24 @@ TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("SrcDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), - framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("SrcDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp index 6b917d8962..012018c0fc 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -43,6 +42,7 @@ namespace test { namespace validation { +using framework::dataset::make; using namespace arm_compute::misc::shape_calculator; // Create function for CLDepthwiseConvolutionLayerNativeKernel @@ -63,64 +63,89 @@ RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.01f)); constexpr float abs_tolerance_f16(0.03f); /** Width values to test - Precommit */ -const auto width_values_precommit = framework::dataset::make("width", { 1U, 17U, 32U } ); +const auto width_values_precommit = make("width", { 1U, 33U } ); /** Width values to test - Nightly */ -const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } ); +const auto width_values_nightly = make("width", { 53U, 47U } ); /** Height values to test - Precommit */ -const auto height_values_precommit = framework::dataset::make("height", { 19U } ); +const auto height_values_precommit = make("height", { 19U } ); /** Height values to test - Nightly */ -const auto height_values_nightly = framework::dataset::make("height", { 39U, 43U } ); +const auto height_values_nightly = make("height", { 39U, 43U } ); /** Channel values to test - Precommit */ -const auto channel_values_precommit = framework::dataset::make("channels", { 15U }); +const auto channel_values_precommit = make("channels", { 15U }); /** Channel values to test - Nightly */ -const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U }); +const auto channel_values_nightly = make("channels", { 33U, 19U }); + +/** Channel values to test with cl_image support - Precommit */ +const auto channel_values_export_to_cl_image_precommit = make("channels", { 16U }); + +/** Channel values to test with cl_image support - Nightly */ +const auto channel_values_export_to_cl_image_nightly = make("channels", { 32U }); /** Batch values to test - Precommit */ -const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U }); +const auto batch_values_precommit = make("batch", { 1U, 2U }); /** Batch values to test - Nightly */ -const auto batch_values_nightly = framework::dataset::make("batch", { 1U, 3U }); +const auto batch_values_nightly = make("batch", { 3U }); /** Kernel size values to test - Precommit */ -const auto kernel_sz_values_precommit = framework::dataset::make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U), Size2D(5U, 5U) }); +const auto kernel_sz_values_precommit = make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U), Size2D(5U, 5U) }); /** Kernel size values to test - Nightly */ -const auto kernel_sz_values_nightly = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) }); +const auto kernel_sz_values_nightly = make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) }); /** Depth multiplier values to test - All */ -const auto depth_multiplier_values = framework::dataset::make("depth_multiplier", {3U}); +const auto depth_multiplier_values = make("depth_multiplier", {3U}); /** Dilation values to test - All */ -const auto dilation_values = framework::dataset::make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) }); +const auto dilation_values = make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) }); /** Stride values to test - All */ -const auto stride_values = framework::dataset::make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) }); +const auto stride_values = make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) }); -/** Padding values to test - All */ -const auto padding_valid_values = framework::dataset::make("padding_valid", { true, false }); +/** Padding values to test - Precommit */ +const auto padding_valid_values = make("padding_valid", { true, false }); -/** Data type values to test - All */ -const auto data_type_values = framework::dataset::make("data_type", { DataType::F32, DataType::F16 }); +/** Padding values to test - Nightly */ +const auto padding_valid_values_nightly = make("padding_valid", { false }); /** Data layout values to test - All */ -const auto data_layout_values = framework::dataset::make("data_layout", { DataLayout::NHWC }); +const auto data_layout_values = make("data_layout", { DataLayout::NHWC }); /** N0 values to test - Precommit */ -const auto n0_values_precommit = framework::dataset::make("N0", {2, 4}); +const auto n0_values_precommit = make("N0", {2, 4}); /** N0 values to test - Nightly */ -const auto n0_values_nightly = framework::dataset::make("N0", {3, 8}); +const auto n0_values_nightly = make("N0", {3, 8}); + +/** N0 values to test with cl_image support - Precommit */ +const auto n0_values_export_to_cl_image_precommit = make("N0", {4}); + +/** N0 values to test with cl_image support - Nightly */ +const auto n0_values_export_to_cl_image_nightly = make("N0", {8}); -/** Activation values to test */ -const auto act_values = framework::dataset::make("Activation", +/** Activation values to test in precommit */ +const auto act_values = make("Activation", { ActivationLayerInfo() }); + +const auto activations_rest = make("Activation", { - ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU) }); } // namespace @@ -130,94 +155,264 @@ TEST_SUITE(DepthwiseConvolutionLayerNative) TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_precommit, height_values_precommit), channel_values_precommit), batch_values_precommit), kernel_sz_values_precommit), - framework::dataset::make("depth_multiplier", 1)), + make("depth_multiplier", 1)), dilation_values), stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F32)), + make("DataType", DataType::F32)), data_layout_values), act_values), - n0_values_precommit)) + n0_values_precommit), + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_nightly, height_values_nightly), channel_values_nightly), batch_values_nightly), kernel_sz_values_nightly), - framework::dataset::make("depth_multiplier", 1)), + make("depth_multiplier", 1)), dilation_values), stride_values), - padding_valid_values), - framework::dataset::make("DataType", DataType::F32)), + padding_valid_values_nightly), + make("DataType", DataType::F32)), data_layout_values), - act_values), - n0_values_nightly)) + make("Activation", { ActivationLayerInfo() })), + n0_values_nightly), + make("ExportToCLImage", false))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, + combine( + make("width", { 33U } ), + height_values_precommit, + channel_values_precommit, + make("batch", { 2U } ), + make("kernel_size", { Size2D(5U, 5U) }), + make("depth_multiplier", 1), + make("dilation", Size2D(3U, 3U)), + make("stride", Size2D(3U, 2U)), + padding_valid_values_nightly, + make("DataType", DataType::F32), + data_layout_values, + activations_rest, + n0_values_precommit, + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } + +TEST_SUITE(ExportWeightsToCLImage) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_precommit, + height_values_precommit), + channel_values_export_to_cl_image_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + make("depth_multiplier", 1)), + dilation_values), + stride_values), + padding_valid_values), + make("DataType", DataType::F32)), + data_layout_values), + act_values), + n0_values_export_to_cl_image_precommit), + make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_nightly, + height_values_nightly), + channel_values_export_to_cl_image_nightly), + batch_values_nightly), + kernel_sz_values_nightly), + make("depth_multiplier", 1)), + dilation_values), + stride_values), + padding_valid_values_nightly), + make("DataType", DataType::F32)), + data_layout_values), + make("Activation", { ActivationLayerInfo() })), + n0_values_export_to_cl_image_nightly), + make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +TEST_SUITE_END() // ExportWeightsToCLImage TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_precommit, height_values_precommit), channel_values_precommit), batch_values_precommit), kernel_sz_values_precommit), - framework::dataset::make("depth_multiplier", 1)), + make("depth_multiplier", 1)), dilation_values), stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F16)), + make("DataType", DataType::F16)), data_layout_values), act_values), - n0_values_precommit)) + n0_values_precommit), + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16); } - FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - width_values_nightly, - height_values_nightly), - channel_values_nightly), + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 47U } ), + make("height", { 39U } )), + make("channels", { 19U } )), batch_values_nightly), - kernel_sz_values_nightly), - framework::dataset::make("depth_multiplier", 1)), + make("kernel_size", { Size2D(5U, 5U) })), + make("depth_multiplier", 1)), + make("dilation", { Size2D(3U, 3U) })), + make("stride", { Size2D(3U, 2U) })), + padding_valid_values_nightly), + make("DataType", DataType::F16)), + data_layout_values), + make("Activation", { ActivationLayerInfo() })), + n0_values_nightly), + make("ExportToCLImage", false))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL, + combine( + make("width", { 33U } ), + height_values_precommit, + channel_values_precommit, + make("batch", { 2U } ), + make("kernel_size", { Size2D(5U, 5U) }), + make("depth_multiplier", 4), + make("dilation", Size2D(3U, 3U)), + make("stride", Size2D(3U, 2U)), + padding_valid_values_nightly, + make("DataType", DataType::F16), + data_layout_values, + activations_rest, + n0_values_precommit, + make("ExportToCLImage", false))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); +} + +TEST_SUITE(ExportWeightsToCLImage) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_precommit, + height_values_precommit), + channel_values_export_to_cl_image_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + make("depth_multiplier", 1)), dilation_values), stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F16)), + make("DataType", DataType::F16)), data_layout_values), act_values), - n0_values_nightly)) + n0_values_export_to_cl_image_precommit), + make("ExportToCLImage", true))) { - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 47U } ), + make("height", { 39U } )), + channel_values_export_to_cl_image_nightly), + batch_values_nightly), + make("kernel_size", { Size2D(5U, 5U) })), + make("depth_multiplier", 1)), + make("dilation", { Size2D(3U, 3U) })), + make("stride", { Size2D(3U, 2U) })), + padding_valid_values_nightly), + make("DataType", DataType::F16)), + data_layout_values), + make("Activation", { ActivationLayerInfo() })), + n0_values_export_to_cl_image_nightly), + make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } +TEST_SUITE_END() // ExportWeightsToCLImage TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float + TEST_SUITE(DepthMultiplier) TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - width_values_precommit, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 33U } ), height_values_precommit), channel_values_precommit), batch_values_precommit), @@ -226,18 +421,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<fl dilation_values), stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F32)), + make("DataType", DataType::F32)), data_layout_values), act_values), - framework::dataset::make("N0", 1))) + make("N0", 1)), + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - width_values_nightly, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 53U } ), height_values_nightly), channel_values_nightly), batch_values_nightly), @@ -245,21 +441,77 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<fl depth_multiplier_values), dilation_values), stride_values), + padding_valid_values_nightly), + make("DataType", DataType::F32)), + data_layout_values), + make("Activation", { ActivationLayerInfo() })), + make("N0", 1)), + make("ExportToCLImage", false))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); +} + +TEST_SUITE(DepthMultiplierMultipleOfOutputChannels) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 33U } ), + height_values_precommit), + channel_values_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + make("depth_multiplier", 2)), + dilation_values), + stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F32)), + make("DataType", DataType::F32)), data_layout_values), act_values), - framework::dataset::make("N0", 1))) + make("N0", {2})), + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } + +TEST_SUITE(ExportWeightsToCLImage) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 33U } ), + height_values_precommit), + channel_values_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + make("depth_multiplier", 4)), + dilation_values), + stride_values), + padding_valid_values), + make("DataType", DataType::F32)), + data_layout_values), + act_values), + make("N0", {4})), + make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // ExportWeightsToCLImage +TEST_SUITE_END() // DepthMultiplierMultipleOfOutputChannels TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - width_values_precommit, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 33U } ), height_values_precommit), channel_values_precommit), batch_values_precommit), @@ -268,18 +520,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<ha dilation_values), stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F16)), + make("DataType", DataType::F16)), data_layout_values), act_values), - framework::dataset::make("N0", 1))) + make("N0", 1)), + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - width_values_nightly, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 53U } ), height_values_nightly), channel_values_nightly), batch_values_nightly), @@ -287,15 +540,71 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<ha depth_multiplier_values), dilation_values), stride_values), + padding_valid_values_nightly), + make("DataType", DataType::F16)), + data_layout_values), + make("Activation", { ActivationLayerInfo() })), + make("N0", 1)), + make("ExportToCLImage", false))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); +} + +TEST_SUITE(DepthMultiplierMultipleOfOutputChannels) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 33U } ), + height_values_precommit), + channel_values_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + make("depth_multiplier", 2)), + dilation_values), + stride_values), padding_valid_values), - framework::dataset::make("DataType", DataType::F16)), + make("DataType", DataType::F16)), data_layout_values), act_values), - framework::dataset::make("N0", 1))) + make("N0", {2})), + make("ExportToCLImage", false))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); } + +TEST_SUITE(ExportWeightsToCLImage) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + make("width", { 33U } ), + height_values_precommit), + channel_values_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + make("depth_multiplier", 4)), + dilation_values), + stride_values), + padding_valid_values), + make("DataType", DataType::F16)), + data_layout_values), + act_values), + make("N0", {4})), + make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // ExportWeightsToCLImage +TEST_SUITE_END() // DepthMultiplierMultipleOfOutputChannels TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float TEST_SUITE_END() // DepthMultiplier diff --git a/tests/validation/CL/DilatedConvolutionLayer.cpp b/tests/validation/CL/DilatedConvolutionLayer.cpp index 9a9df2c7e4..776bf34151 100644 --- a/tests/validation/CL/DilatedConvolutionLayer.cpp +++ b/tests/validation/CL/DilatedConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -167,13 +167,18 @@ template <typename T> using CLGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>; TEST_SUITE(Quantized) +/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored +/// This is because instead of using the same quantization information for all the tensors, the fixture generates +/// separate quantization info for each input and the output tensor. +/// When we can also support dynamic quantization with the presence of activation, we can remove the explicit +/// quantization info. TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })), framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() }))) { // Validate output @@ -185,7 +190,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerQuantizedFixture<u framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })), + framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })), framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() }))) { // Validate output diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp index e244576daf..ff22ae5ef0 100644 --- a/tests/validation/CL/DirectConvolutionLayer.cpp +++ b/tests/validation/CL/DirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,6 +35,9 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/DirectConvolutionLayerFixture.h" +/** Synced with tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp + * Please check there for any differences in the coverage + */ namespace arm_compute { namespace test @@ -43,10 +46,12 @@ namespace validation { namespace { -RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ -RelativeTolerance<float> tolerance_fp32(0.05f); /**< Tolerance for floating point tests */ -constexpr float tolerance_num = 0.07f; /**< Tolerance number */ -constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */ +RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ +RelativeTolerance<float> tolerance_fp32(0.05f); /**< Tolerance for floating point tests */ +constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/ + +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */ const auto data_strides = combine(framework::dataset::make("StrideX", 1, 3), framework::dataset::make("StrideY", 1, 3)); const auto data_strides_small = combine(framework::dataset::make("StrideX", 1), framework::dataset::make("StrideY", 1)); @@ -87,55 +92,132 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo TEST_SUITE(CL) TEST_SUITE(DirectConvolutionLayer) +/** Check whether the configuration of a Direct Convolution layer with no + * bias leads to a successful execution. + */ +TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT) +{ + const auto src_shape = TensorShape(27U, 13U, 2U); + const auto weights_shape = TensorShape(3U, 3U, 2U, 4U); + const auto bias_shape = TensorShape(4U); + const auto dst_shape = TensorShape(25U, 11U, 4U); + constexpr auto dt = DataType::F32; + + auto src = create_tensor<CLTensor>(src_shape, dt); + auto weights = create_tensor<CLTensor>(weights_shape, dt); + auto dst = create_tensor<CLTensor>(dst_shape, dt); + + const auto conv_info = PadStrideInfo(1, 1, 0, 0); + + // Create Direct Convolution function + CLDirectConvolutionLayer conv{}; + conv.configure(&src, &weights, nullptr, &dst, conv_info); + + src.allocator()->allocate(); + weights.allocator()->allocate(); + dst.allocator()->allocate(); + + library->fill_tensor_value(CLAccessor(src), 1.f); + library->fill_tensor_value(CLAccessor(weights), 1.f); + + conv.run(); + + // Compute reference to compare + SimpleTensor<float> ref_src{ src_shape, dt }; + SimpleTensor<float> ref_weights{ weights_shape, dt }; + SimpleTensor<float> ref_bias{ bias_shape, dt }; + library->fill_tensor_value(ref_src, 1.f); + library->fill_tensor_value(ref_weights, 1.f); + // No bias + library->fill_tensor_value(ref_bias, 0.f); + auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info); + + validate(CLAccessor(dst), ref_dst); +} + +/** Check whether the case of rectangle kernels i.e. when width and height of the weight_shape are not equal + * would lead to successful run + */ +TEST_CASE(NonSquareKernel, framework::DatasetMode::PRECOMMIT) +{ + auto src_shape = TensorShape(33U, 27U, 3U); + auto weights_shape = TensorShape(5U, 7U, 3U, 4U); // non-square kernel + const auto bias_shape = TensorShape(4U); + auto dst_shape = TensorShape(11U, 12U, 4U); + constexpr auto dt = DataType::F32; + + TensorShape src_shape_nhwc(src_shape); + TensorShape weights_shape_nhwc(weights_shape); + TensorShape dst_shape_nhwc(dst_shape); + + // Non-square shapes are only allowed for NHWC + permute(src_shape_nhwc, PermutationVector(2U, 0U, 1U)); + permute(weights_shape_nhwc, PermutationVector(2U, 0U, 1U)); + permute(dst_shape_nhwc, PermutationVector(2U, 0U, 1U)); + + auto src = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC); + auto weights = create_tensor<CLTensor>(weights_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC); + auto dst = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC); + const auto conv_info = PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR); + + // Create direct convolution function + CLDirectConvolutionLayer conv{}; + conv.configure(&src, &weights, nullptr, &dst, conv_info); + + src.allocator()->allocate(); + weights.allocator()->allocate(); + dst.allocator()->allocate(); + + library->fill_tensor_value(CLAccessor(src), 1.f); + library->fill_tensor_value(CLAccessor(weights), 1.f); + + conv.run(); + + // Compute reference to compare + SimpleTensor<float> ref_src{ src_shape, dt }; + SimpleTensor<float> ref_weights{ weights_shape, dt }; + SimpleTensor<float> ref_bias{ bias_shape, dt }; + library->fill_tensor_value(ref_src, 1.f); + library->fill_tensor_value(ref_weights, 1.f); + // No bias + library->fill_tensor_value(ref_bias, 0.f); + auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info); + + validate(CLAccessor(dst), ref_dst); +} // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching data type input/weights + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching input feature maps TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases size + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases dimensions TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), }), framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16), TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(11U, 11U, 2U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32), })), framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), - TensorInfo(TensorShape(4U), 1, DataType::F32), - TensorInfo(TensorShape(4U), 1, DataType::F32), - TensorInfo(TensorShape(4U), 1, DataType::F32), TensorInfo(TensorShape(3U), 1, DataType::F32), TensorInfo(TensorShape(4U, 2U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), - TensorInfo(TensorShape(4U), 1, DataType::F32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32), TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32), })), framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), @@ -143,47 +225,95 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(3, 3, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), - PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), })), framework::dataset::make("ActivationInfo", { + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, false, false, false, true })), input_info, weights_info, biases_info, output_info, conv_info, act_info, expected) { bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } +// clang-format on +// *INDENT-ON* template <typename T> using CLDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>; template <typename T> +using CLDirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T, true>; +template <typename T> using CLDirectConvolutionValidationWithTensorShapesFixture = DirectConvolutionValidationWithTensorShapesFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>; template <typename T> using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>; template <typename T> +using CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T, true>; +template <typename T> using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>; TEST_SUITE(NHWC) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Arbitrary weight sizes for NHWC are supported + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Non-rectangular weights dimensions for NHWC are supported + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Strides > 2 for any kernel sizes for NHWC are supported + }), + framework::dataset::make("WeightsInfo",{ + TensorInfo(TensorShape(2U, 13U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 5U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("BiasesInfo",{ + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("OutputInfo",{ + TensorInfo(TensorShape(4U, 15U, 1U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U, 23U, 11U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U, 9U, 4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("ConvInfo", { + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(3, 3, 0, 0), + })), + framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), +})), + framework::dataset::make("Expected", { true, true, true })), + input_info, weights_info, biases_info, output_info, conv_info, act_info, expected) +{ + bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), - TensorShape(9U, 5U, 6U, 4U), - TensorShape(3U, 5U, 7U, 2U), - TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), framework::dataset::make("StrideX", { 1, 3, 1, 1 })), framework::dataset::make("StrideY", { 1, 3, 2, 1 })), framework::dataset::make("PadX", { 1, 3, 0, 4 })), framework::dataset::make("PadY", { 1, 3, 0, 4 })), framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), - framework::dataset::make("NumKernels", { 7, 3, 1, 3 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), framework::dataset::make("DataLayout", DataLayout::NHWC))) @@ -212,23 +342,40 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), - TensorShape(9U, 5U, 6U, 4U), - TensorShape(3U, 5U, 7U, 2U), - TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), framework::dataset::make("StrideX", { 1, 3, 1, 1 })), framework::dataset::make("StrideY", { 1, 3, 2, 1 })), framework::dataset::make("PadX", { 1, 3, 0, 4 })), framework::dataset::make("PadY", { 1, 3, 0, 4 })), framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), - framework::dataset::make("NumKernels", { 7, 3, 1, 3 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), framework::dataset::make("DataLayout", DataLayout::NHWC))) { - validate(CLAccessor(_target), _reference, tolerance_fp32); + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 2 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 3 })), + framework::dataset::make("KernelSize", { 3 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); } - FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(zip(zip(zip(zip(zip(zip( framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), @@ -242,19 +389,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framewo framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )), framework::dataset::make("DataLayout", DataLayout::NHWC))) { - validate(CLAccessor(_target), _reference, tolerance_fp32); + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); } - TEST_SUITE_END() // FP32 TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), - TensorShape(9U, 5U, 6U, 4U), - TensorShape(3U, 5U, 7U, 2U), - TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), framework::dataset::make("StrideX", { 1, 3, 1, 1 })), framework::dataset::make("StrideY", { 1, 3, 2, 1 })), framework::dataset::make("PadX", { 1, 3, 0, 4 })), @@ -268,7 +414,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_ { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } - +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 2 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 3 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", QuantizationInfo(1.1f / 255, 10))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), @@ -287,14 +451,13 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_ } TEST_SUITE_END() // QASYMM8 -// TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), - TensorShape(9U, 5U, 6U, 4U), - TensorShape(3U, 5U, 7U, 2U), - TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), framework::dataset::make("StrideX", { 1, 3, 1, 1 })), framework::dataset::make("StrideY", { 1, 3, 2, 1 })), framework::dataset::make("PadX", { 1, 3, 0, 4 })), @@ -308,7 +471,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } - +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 3 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), @@ -329,9 +510,48 @@ TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() // NHWC +TEST_SUITE(NCHW) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Unsupported kernel width + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Non-rectangular weights dimensions are unsupported + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW) // Unsupported stride + }), + framework::dataset::make("WeightsInfo",{ + TensorInfo(TensorShape(11U, 11U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW) + })), + framework::dataset::make("BiasesInfo",{ + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW) + })), + framework::dataset::make("OutputInfo",{ + TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(23U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW) + })), + framework::dataset::make("ConvInfo", { + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(3, 3, 0, 0) + })), + framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +})), + framework::dataset::make("Expected", { false, false, false})), + input_info, weights_info, biases_info, output_info, conv_info, act_info, expected) +{ + bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* -TEST_SUITE(NCHW) + TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType", DataType::F16)), @@ -356,13 +576,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<float>, framewo ActivationFunctionsDataset), framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { - validate(CLAccessor(_target), _reference, tolerance_fp32); + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, + framework::dataset::make("DataType", + DataType::F32)), + ActivationFunctionsDataset), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly, framework::dataset::make("DataType", DataType::F32)), ActivationFunctionsDataset), framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { - validate(CLAccessor(_target), _reference, tolerance_fp32); + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); } TEST_SUITE_END() // FP32 @@ -372,91 +600,202 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesFixture ActivationFunctionsDataset)) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_fp32); + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); } TEST_SUITE_END() // FP32_CustomDataset TEST_SUITE_END() // Float +/// @note: Every quantized test has a version with or without activation because the quantization info given is +/// ignored when there is no activation. Instead of using the same quantization information for all the tensors, the +/// fixture generates separate quantization info for each input and the output tensor. +/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged +/// again, with the explicitly specified quantization info removed const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { - ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f) }); +const auto NoActivation = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo() +}); +const auto IgnoredQuantizationInfo = framework::dataset::make("IgnoredQuantizationInfo", +{ + QuantizationInfo() +}); TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit, +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit, + framework::dataset::make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + NoActivation, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit, + framework::dataset::make("DataType", DataType::QASYMM8), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) }), + QuantizedActivationFunctionsDataset, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit, + framework::dataset::make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + NoActivation, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit, + framework::dataset::make("DataType", DataType::QASYMM8), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit_9x9, framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), + DataType::QASYMM8), + IgnoredQuantizationInfo, + NoActivation, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit_9x9, +FIXTURE_DATA_TEST_CASE(RunSmall9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit_9x9, framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), + DataType::QASYMM8), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(data_nightly, framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly, framework::dataset::make("DataType", + DataType::QASYMM8), + IgnoredQuantizationInfo, + NoActivation, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(data_nightly_9x9, +FIXTURE_DATA_TEST_CASE(RunLargeWithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly, framework::dataset::make("DataType", + DataType::QASYMM8), + framework::dataset::make("QuantizationInfoIf", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly_9x9, framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), + DataType::QASYMM8), + IgnoredQuantizationInfo, + NoActivation, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } - -TEST_SUITE_END() // QASYMM8 - -TEST_SUITE(QASYMM8_CustomDataset) -FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::DirectConvolutionLayerDataset(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), +FIXTURE_DATA_TEST_CASE(RunLarge9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly_9x9, + framework::dataset::make("DataType", + DataType::QASYMM8), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(CustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::DirectConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + NoActivation, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(CustomDatasetWithActivation, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::DirectConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() // QASYMM8_CustomDataset +TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit, framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) })), - QuantizedActivationFunctionsDataset), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + NoActivation, framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } - -FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit_9x9, +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) }), + QuantizedActivationFunctionsDataset, + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + NoActivation, + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.1f, -10) }), + QuantizedActivationFunctionsDataset, + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit_9x9, framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), + DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + NoActivation, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmall9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit_9x9, + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output @@ -464,10 +803,21 @@ FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int } FIXTURE_DATA_TEST_CASE(RunCustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::DirectConvolutionLayerDataset(), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })), - QuantizedActivationFunctionsDataset), + combine(datasets::DirectConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + NoActivation, + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunCustomDatasetWithActivation, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::DirectConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) }), + QuantizedActivationFunctionsDataset, framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { // Validate output diff --git a/tests/validation/CL/ElementwiseMax.cpp b/tests/validation/CL/ElementwiseMax.cpp index b9444b2795..bd47c23256 100644 --- a/tests/validation/CL/ElementwiseMax.cpp +++ b/tests/validation/CL/ElementwiseMax.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ const auto ElementwiseMaxQASYMM8SignedDataset = combine(combine(framework::datas const auto ElementwiseMaxQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("DataType", DataType::QSYMM16)); -const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), +const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); @@ -71,6 +71,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(CL) @@ -80,21 +82,18 @@ TEST_SUITE(ElementwiseMax) // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false})), + framework::dataset::make("Expected", { true, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLElementwiseMax::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS); @@ -107,7 +106,8 @@ using CLElementwiseMaxFixture = ElementwiseMaxValidationFixture<CLTensor, CLAcce TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMaxU8Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMaxU8Dataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -115,7 +115,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::Da TEST_SUITE_END() TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -128,33 +129,36 @@ using CLElementwiseMaxQuantizedFixture = ElementwiseMaxValidationQuantizedFixtur TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxQASYMM8Dataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01); } TEST_SUITE_END() TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxQASYMM8SignedDataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE_END() TEST_SUITE(QSYMM16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxQSYMM16Dataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -167,13 +171,16 @@ using CLElementwiseMaxFloatFixture = ElementwiseMaxValidationFloatFixture<CLTens TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset), EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMaxFP16Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMaxFP16Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); @@ -181,14 +188,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, fr TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMaxFP32Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMaxFP32Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -197,16 +206,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, f template <typename T> using CLElementwiseMaxBroadcastFloatFixture = ElementwiseMaxBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseMax, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(), ElementwiseMaxFP32Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), ElementwiseMaxFP32Dataset), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/CL/ElementwiseMin.cpp b/tests/validation/CL/ElementwiseMin.cpp index 8f53b241ab..ee229a0941 100644 --- a/tests/validation/CL/ElementwiseMin.cpp +++ b/tests/validation/CL/ElementwiseMin.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,7 +58,7 @@ const auto ElementwiseMinQASYMM8SignedDataset = combine(combine(framework::datas const auto ElementwiseMinQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("DataType", DataType::QSYMM16)); -const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), +const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); @@ -71,6 +71,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(CL) @@ -80,21 +82,18 @@ TEST_SUITE(ElementwiseMin) // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false})), + framework::dataset::make("Expected", { true, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLElementwiseMin::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS); @@ -107,7 +106,8 @@ using CLElementwiseMinFixture = ElementwiseMinValidationFixture<CLTensor, CLAcce TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMinU8Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMinU8Dataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -115,7 +115,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::Da TEST_SUITE_END() TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinS16Dataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -128,33 +129,36 @@ using CLElementwiseMinQuantizedFixture = ElementwiseMinValidationQuantizedFixtur TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMinQASYMM8Dataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01); } TEST_SUITE_END() TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMinQASYMM8SignedDataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE_END() TEST_SUITE(QSYMM16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMinQSYMM16Dataset), framework::dataset::make("SrcQInfo0", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), framework::dataset::make("SrcQInfo1", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -167,13 +171,16 @@ using CLElementwiseMinFloatFixture = ElementwiseMinValidationFloatFixture<CLTens TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset), EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMinFP16Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMinFP16Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); @@ -181,14 +188,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, fr TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMinFP32Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMinFP32Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -196,16 +205,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, f template <typename T> using CLElementwiseMinBroadcastFloatFixture = ElementwiseMinBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseMin, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(), ElementwiseMinFP32Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), ElementwiseMinFP32Dataset), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/CL/ElementwisePower.cpp b/tests/validation/CL/ElementwisePower.cpp index a2d3ba6c09..c2aeb6e045 100644 --- a/tests/validation/CL/ElementwisePower.cpp +++ b/tests/validation/CL/ElementwisePower.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(CL) @@ -96,29 +98,33 @@ using CLElementwisePowerBroadcastFloatFixture = ElementwisePowerBroadcastValidat TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwisePowerFP16Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwisePowerFP16Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(), ElementwisePowerFP16Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), ElementwisePowerFP16Dataset), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); @@ -126,29 +132,33 @@ FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFl TEST_SUITE_END() //FP16 TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwisePowerFP32Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwisePowerFP32Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(), ElementwisePowerFP32Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), ElementwisePowerFP32Dataset), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/CL/ElementwiseSquaredDiff.cpp b/tests/validation/CL/ElementwiseSquaredDiff.cpp index 0a4ab6627b..ee0279df33 100644 --- a/tests/validation/CL/ElementwiseSquaredDiff.cpp +++ b/tests/validation/CL/ElementwiseSquaredDiff.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,7 +57,7 @@ const auto ElementwiseSquaredDiffQASYMM8Dataset = combine(combine(framework::dat const auto ElementwiseSquaredDiffQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("DataType", DataType::QSYMM16)); -const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), +const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); @@ -70,6 +70,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(CL) @@ -79,21 +81,18 @@ TEST_SUITE(ElementwiseSquaredDiff) // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false})), + framework::dataset::make("Expected", { true, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLElementwiseSquaredDiff::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS); @@ -106,7 +105,8 @@ using CLElementwiseSquaredDiffFixture = ElementwiseSquaredDiffValidationFixture< TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseSquaredDiffU8Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffU8Dataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -114,7 +114,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, frame TEST_SUITE_END() TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference); @@ -127,22 +128,24 @@ using CLElementwiseSquaredDiffQuantizedFixture = ElementwiseSquaredDiffValidatio TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffQASYMM8Dataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01); } TEST_SUITE_END() TEST_SUITE(QSYMM16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffQSYMM16Dataset), framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qsymm16); @@ -155,14 +158,16 @@ using CLElementwiseSquaredDiffFloatFixture = ElementwiseSquaredDiffValidationFlo TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP16Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP16Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01); @@ -170,14 +175,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<h TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset), - EmptyActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset), + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP32Dataset), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP32Dataset), + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -185,16 +192,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<f template <typename T> using CLElementwiseSquaredDiffBroadcastFloatFixture = ElementwiseSquaredDiffBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseSquaredDiff, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(), ElementwiseSquaredDiffFP32Dataset), - EmptyActivationFunctionsDataset)) + EmptyActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(), +FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(), ElementwiseSquaredDiffFP32Dataset), - ActivationFunctionsDataset)) + ActivationFunctionsDataset), + OutOfPlaceDataSet)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/CL/ExpLayer.cpp b/tests/validation/CL/ExpLayer.cpp index 16e75a64b4..1797046e5d 100644 --- a/tests/validation/CL/ExpLayer.cpp +++ b/tests/validation/CL/ExpLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { diff --git a/tests/validation/CL/FFT.cpp b/tests/validation/CL/FFT.cpp index fb2f1f53e2..99a83abe5c 100644 --- a/tests/validation/CL/FFT.cpp +++ b/tests/validation/CL/FFT.cpp @@ -175,6 +175,8 @@ TEST_SUITE(FFTConvolutionLayer) template <typename T> using CLFFTConvolutionLayerFixture = FFTConvolutionValidationFixture<CLTensor, CLAccessor, CLFFTConvolutionLayer, T>; +template <typename T> +using CLFFTConvolutionLayerMixedDataLayoutFixture = FFTConvolutionValidationFixture<CLTensor, CLAccessor, CLFFTConvolutionLayer, T, true>; TEST_SUITE(Float) TEST_SUITE(FP32) @@ -186,6 +188,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLFFTConvolutionLayerFixture<float>, framework: // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num_f32); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFFTConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num_f32); +} TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLFFTConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(), diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp index 78195a556b..2f0c86499b 100644 --- a/tests/validation/CL/FullyConnectedLayer.cpp +++ b/tests/validation/CL/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,7 @@ namespace test { namespace validation { +using framework::dataset::make; namespace { /** Tolerance for float operations */ @@ -51,24 +52,20 @@ constexpr float tolerance_num = 0.07f; /**< Tolerance n /** Tolerance for quantized asymmetric operations */ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); -/** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", -{ - DataType::F16, - DataType::F32, - DataType::QASYMM8, - DataType::QASYMM8_SIGNED, -}); - -const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true })); +const auto FullyConnectedParameters = combine(make("TransposeWeights", { false, true }), make("ReshapeWeights", { false, true })); -const auto QuantizationData = framework::dataset::make("QuantizationInfo", +const auto QuantizationData = make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(1.1f, 10), }); -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +const auto IgnoredQuantizationData = make("IgnoredQuantizationInfo", +{ + QuantizationInfo(), +}); + +const auto ActivationFunctionsDataset = make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), @@ -77,13 +74,16 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH) }); -const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo", +// This dataset case only runs with dynamic quantization +const auto NoActivationFunctionsQuantizedDataset = make("ActivationInfo", { - ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f) + ActivationLayerInfo() }); + +const auto ActivationFunctionsQuantizedDataset = concat(concat( + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f))), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f))); } // namespace TEST_SUITE(CL) @@ -92,33 +92,33 @@ TEST_SUITE(FullyConnectedLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types + make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Invalid weights dimensions TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Wrongly reshaped weights }), - framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16), + make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16), TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), TensorInfo(TensorShape(217U, 231U), 1, DataType::F32), TensorInfo(TensorShape(217U, 315U), 1, DataType::F32), })), - framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32), + make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32), TensorInfo(TensorShape(192U), 1, DataType::F32), TensorInfo(TensorShape(192U), 1, DataType::F32), TensorInfo(TensorShape(271U), 1, DataType::F32), TensorInfo(TensorShape(271U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), })), - framework::dataset::make("TransposeWeights",{ true, true, false, true, true })), - framework::dataset::make("ReshapedWeights",{ false, false, false, false, false})), - framework::dataset::make("Expected", { false, true, true, false, false })), + make("TransposeWeights",{ true, true, false, true, true })), + make("ReshapedWeights",{ false, false, false, false, false})), + make("Expected", { false, true, true, false, false })), input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected) { // Create Fully Connected layer info @@ -138,37 +138,73 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( template <typename T> using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>; +template <typename T> +using CLFullyConnectedLayerMixedDataLayoutFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, true>; +template <typename T> +using CLFullyConnectedLayerDynamicWeightsFixture = FullyConnectedWithDynamicWeightsFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>; +template <typename T> +using CLFullyConnectedNoBiasFixture = FullyConnectedDynamicNoBiasFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>; TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16)), +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + FullyConnectedParameters, + make("DataType", DataType::F16), ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16)), +FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), + FullyConnectedParameters, + make("DataType", DataType::F16), ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::F16), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsReshaped", { false, true }))) +{ +} TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32)), +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, + make("DataType", DataType::F32), ActivationFunctionsDataset)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32)), +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::F32), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::F32), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsReshaped", { false, true }))) +{ +} +FIXTURE_DATA_TEST_CASE(RunDynamicNoBias, CLFullyConnectedNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::F32), + make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }), + make("WeightsReshaped", { false }))) +{ +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, + make("DataType", DataType::F32), ActivationFunctionsDataset)) { // Validate output @@ -179,37 +215,141 @@ TEST_SUITE_END() template <typename T> using CLFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>; +template <typename T> +using CLFullyConnectedLayerQuantizedMixedDataLayoutFixture = FullyConnectedLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, true>; TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData), +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), QuantizationData, ActivationFunctionsQuantizedDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData), +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8), + QuantizationData, + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLargeWithActivation, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), QuantizationData, ActivationFunctionsQuantizedDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } + +// Dynamic Quantization Tests +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), IgnoredQuantizationData, + NoActivationFunctionsQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), IgnoredQuantizationData, + NoActivationFunctionsQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8), + NoActivationFunctionsQuantizedDataset, + make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ }))) +{ +} + +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationData, + NoActivationFunctionsQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE_END() /* QASYMM8 */ TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData), +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8_SIGNED), QuantizationData, ActivationFunctionsQuantizedDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() /* QASYMM8_SIGNED */ -TEST_SUITE_END() /* Quantized */ +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8_SIGNED), + QuantizationData, + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} -TEST_SUITE_END() -TEST_SUITE_END() +// Dynamic Quantization tests below +FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8_SIGNED), IgnoredQuantizationData, + NoActivationFunctionsQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationData, + NoActivationFunctionsQuantizedDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8_SIGNED), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ }))) +{ +} +FIXTURE_DATA_TEST_CASE(RunDynamicNoBias, CLFullyConnectedNoBiasFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8_SIGNED), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ }))) +{ +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // FullyConnectedLayer +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp index 838920c29d..16ca14f1d6 100644 --- a/tests/validation/CL/GEMM.cpp +++ b/tests/validation/CL/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -62,6 +62,29 @@ const auto CNNDataTypes = framework::dataset::make("DataType", TEST_SUITE(CL) TEST_SUITE(GEMM) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), + }), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), + TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), + TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), + })), + framework::dataset::make("Expected", { false, true })), + lhs_info, rhs_info, output_info, expected) +{ + constexpr float alpha = 1.0; + constexpr float beta = 0.0; + const auto gemm_info = GEMMInfo(); + bool is_valid = bool(CLGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* template <typename T> using CLGEMMFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T>; @@ -71,6 +94,9 @@ using CLGEMMOutput3DFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM template <typename T> using CLGEMMInputOutput3DFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T, false, true, true>; +template <typename T> +using CLBatchedMatMulFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T, true, false, false, false, false, true>; + TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), @@ -181,10 +207,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMOutput3DFixture<half>, framework::Dataset validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE_END() // FP16 - TEST_SUITE_END() // Float TEST_SUITE_END() // OUTPUT_3D +TEST_SUITE(BATCHED_MATMUL) + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), + framework::dataset::make("ReshapeWeights", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), + framework::dataset::make("ReshapeWeights", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() +TEST_SUITE_END() // BATCHED_MATMUL + TEST_SUITE_END() // GEMM TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index 5a1971b54c..78d794a9bb 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,9 @@ namespace test { namespace validation { + +using framework::dataset::make; + namespace { constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ @@ -53,6 +56,7 @@ TEST_SUITE(GEMMLowp) TEST_SUITE(MatrixMultiplyCore) using CLGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>; +using CLGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, true>; FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) { @@ -66,18 +70,74 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework: validate(CLAccessor(_target), _reference); } +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; +TEST_SUITE(BatchedMatMul) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) +{ + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8 + +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("reshape_b_only_on_first_run", { false }))) +{ + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // BatchedMatMul + TEST_SUITE(FusedOffsetOutput) TEST_SUITE(QASYMM8) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 }))) +TEST_SUITE(Output3D) +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture = + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>; +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // Output3D + +TEST_SUITE(InputOutput3D) +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture = + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>; +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // InputOutput3D + +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); @@ -86,8 +146,10 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_quant); @@ -130,24 +192,24 @@ TEST_SUITE(QuantizeDownInt32Scale) TEST_SUITE(QASYMM8) -const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, - 3) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_uint8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3) + * make("min", 0) * make("max", 255) * make("addBias", { false, true }); -const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, - 2) - * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_uint8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) + * make("result_shift", 2, 3) * make("min", 0, 2) * make("max", 171, 173) * make("addBias", { false, true }); using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases)) { // Validate output validate(CLAccessor(_target), _reference); @@ -158,24 +220,24 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, - 3) - * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_int8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3) + * make("min", -128) * make("max", 127) * make("addBias", { false, true }); -const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, - 2) - * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true }); +const auto quantize_down_int32_to_int8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) + * make("result_shift", 2, 3) * make("min", -100, -98) * make("max", 71, 73) * make("addBias", { false, true }); using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>; -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases)) { // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, + combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases)) { // Validate output validate(CLAccessor(_target), _reference); @@ -185,140 +247,6 @@ TEST_SUITE_END() // BoundedReLu TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // QuantizeDownInt32Scale -TEST_SUITE(QuantizeDownInt32ScaleByFixedPoint) - -TEST_SUITE(QASYMM8) - -const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true }); -using CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>; - -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() // BoundedReLu -TEST_SUITE_END() // QASYMM8 -TEST_SUITE(QASYMM8_SIGNED) -const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128, -126) * framework::dataset::make("max", 110, 112) * framework::dataset::make("addBias", { false, true }); -using CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint>; - -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int8_scale_by_fixedpoint_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -TEST_SUITE_END() // BoundedReLu -TEST_SUITE_END() // QASYMM8_SIGNED -TEST_SUITE(QSYMM16) - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, - 1073741825) - * framework::dataset::make("result_shift", -3, - -2) - * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, - 254601602) - * framework::dataset::make("result_shift", -3, - -1) - * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); - -using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>; - -TEST_SUITE(NoRelu) -TEST_SUITE(MultSmallerEq1) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() // MultSmallerEq1 -TEST_SUITE(MultGreater1) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() // MultGreater1 -TEST_SUITE_END() // NoRelu -TEST_SUITE(BoundedReLu) -TEST_SUITE(MultSmallerEq1) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() // MultSmallerEq1 -TEST_SUITE(MultGreater1) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() // MultGreater1 -TEST_SUITE_END() // BoundedReLu -TEST_SUITE_END() // QSYMM16 -TEST_SUITE_END() // QuantizeDownInt32ScaleByFixedPoint - TEST_SUITE(QuantizeDownInt32ScaleByFloat) TEST_SUITE(QASYMM8) @@ -326,13 +254,14 @@ using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture = GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, uint8_t>; FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), - datasets::TinyShapes()), - framework::dataset::make("result_real_multiplier", 0.33f)), - framework::dataset::make("result_offset", 2, 3)), - framework::dataset::make("min", 0)), - framework::dataset::make("max", 255)), - framework::dataset::make("addBias", { false, true }))) + combine( + make("DataType", DataType::QASYMM8), + datasets::TinyShapes(), + make("result_real_multiplier", 0.33f), + make("result_offset", 2, 3), + make("min", 0), + make("max", 255), + make("addBias", { false, true }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -343,13 +272,14 @@ TEST_SUITE(QASYMM8_SIGNED) using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed = GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, int8_t>; FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), - datasets::TinyShapes()), - framework::dataset::make("result_real_multiplier", 0.33f)), - framework::dataset::make("result_offset", 2, 3)), - framework::dataset::make("min", -128)), - framework::dataset::make("max", 127)), - framework::dataset::make("addBias", { false, true }))) + combine( + make("DataType", DataType::QASYMM8_SIGNED), + datasets::TinyShapes(), + make("result_real_multiplier", 0.33f), + make("result_offset", 2, 3), + make("min", -128), + make("max", 127), + make("addBias", { false, true }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -363,4 +293,4 @@ TEST_SUITE_END() // GEMMLowp TEST_SUITE_END() // CL } // namespace validation } // namespace test -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp index 1057af95f2..d0d06a8ddb 100644 --- a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,7 @@ */ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" @@ -41,7 +41,7 @@ namespace validation using namespace arm_compute::misc::shape_calculator; // Create function for CLGEMMMatrixMultiplyNativeKernel -using CLGEMMLowpMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyNativeKernel>; +using CLGEMMLowpMatrixMultiplyNative = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyNativeKernel>; // Fixture for CLGEMMLowpMatrixMultiplyNative using CLGEMMLowpMatrixMultiplyNativeFixture = GEMMLowpMatrixMultiplyNativeValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyNative>; diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp index 4873a291ab..88455bdeb8 100644 --- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,9 +23,9 @@ */ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" @@ -42,14 +42,14 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -// Create function for CLGEMMReshapeLHSMatrixKernel -using CLGEMMReshapeLHSMatrix = CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>; +// Create function for ClGemmReshapeLhsMatrixKernel +using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeLhsMatrixKernel>; -// Create function for CLGEMMReshapeRHSMatrixKernel -using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>; +// Create function for ClGemmReshapeRhsMatrixKernel +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>; -// Create function for CLGEMMMatrixMultiplyReshapedKernel -using CLGEMMLowpMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedKernel>; +// Create function for CLGEMMLowpMatrixMultiplyReshapedKernel +using CLGEMMLowpMatrixMultiplyReshaped = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedKernel>; // Fixture for CLGEMMLowpMatrixMultiplyReshaped using CLGEMMLowpMatrixMultiplyReshapedFixture = GEMMLowpMatrixMultiplyReshapedValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshaped>; diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp index fa256280ca..c56901effc 100644 --- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,8 +25,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" @@ -46,10 +46,10 @@ namespace validation using namespace arm_compute::misc::shape_calculator; // Create function for CLGEMMReshapeRHSMatrixKernel -using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>; +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>; // Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel -using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>; +using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>; // Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFixture = GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>; @@ -157,7 +157,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned // Create and configure function CLGEMMLowpMatrixMultiplyReshapedOnlyRHS gemm; - gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info); } } // namespace diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp new file mode 100644 index 0000000000..a0d13c3e39 --- /dev/null +++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLCast.h" +#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" +#include "tests/CL/CLAccessor.h" +#include "tests/CL/Helper.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/fixtures/GEMMLowpFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using namespace arm_compute::opencl::kernels; + +// Create function for CLGEMMReshapeRHSMatrixKernel +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>; + +// Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel +using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel>; + +// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS +using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture = + GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>; + +// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS +using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureSigned = + GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture<int8_t, CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS, CLReductionOperation, CLCast>; + +using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureUnsigned = + GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture<uint8_t, CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS, CLReductionOperation, CLCast>; + +namespace +{ +// *INDENT-OFF* +// clang-format off + +/** M values to test */ +const auto m_values = framework::dataset::make("M", {16, 49}); + +/** N values to test */ +const auto n_values = framework::dataset::make("N", {16, 259}); + +/** K values to test */ +const auto k_values = framework::dataset::make("K", {192}); + +/** Batch size values to test */ +const auto b_values = framework::dataset::make("batch_size", {1, 2}); + +/** M0 values to test - Precommit */ +const auto m0 = framework::dataset::make("M0", {1, 2, 4}); + +/** N0 values to test - Precommit */ +const auto n0 = framework::dataset::make("N0", { 1, 4, 8}); + +/** K0 values to test - Precommit */ +const auto k0 = framework::dataset::make("K0", { 4 }); + +/** H0 values to test - Precommit */ +const auto h0 = framework::dataset::make("H0", 1); + +/** Interleave values to test with RHS matrix */ +const auto i_values_rhs = framework::dataset::make("interleave_rhs", { false }); + +/** Transpose values to test with RHS matrix */ +const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true }); + +const auto broadcast_bias = framework::dataset::make("broadcast_bias", {true, false}); + +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL) +FIXTURE_DATA_TEST_CASE(Signed, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0), + n0), + k0), + h0), + i_values_rhs), + t_values_rhs), + framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED }))) +{ + // Validate output + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + validate(CLAccessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +FIXTURE_DATA_TEST_CASE(Unsigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0), + n0), + k0), + h0), + i_values_rhs), + t_values_rhs), + framework::dataset::make("DataType", { DataType::QASYMM8}))) +{ + // Validate output + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + validate(CLAccessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +FIXTURE_DATA_TEST_CASE(OutputStageSigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureSigned, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0), + n0), + k0), + h0), + i_values_rhs), + t_values_rhs), + broadcast_bias), + framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED}))) +{ + // Validate output + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + validate(CLAccessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +FIXTURE_DATA_TEST_CASE(OutputStageUnsigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureUnsigned, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0), + n0), + k0), + h0), + i_values_rhs), + t_values_rhs), + broadcast_bias), + framework::dataset::make("DataType", { DataType::QASYMM8}))) +{ + // Validate output + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + validate(CLAccessor(_target), _reference); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/CL/GEMMMatrixMultiply.cpp b/tests/validation/CL/GEMMMatrixMultiply.cpp deleted file mode 100644 index fdf7f503ec..0000000000 --- a/tests/validation/CL/GEMMMatrixMultiply.cpp +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "tests/CL/CLAccessor.h" -#include "tests/CL/Helper.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/GEMMFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -using namespace arm_compute::misc::shape_calculator; - -// Create function for CLGEMMMatrixMultiplyKernel -using CLGEMMMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMMatrixMultiplyKernel>; - -// Fixture for GEMMMatrixMultiplyValidationFixture -template <typename T> -using CLGEMMMatrixMultiplyNativeFixture = GEMMMatrixMultiplyValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>; - -// Fixture for GEMMMatrixMultiply3DValidationFixture -template <typename T> -using CLGEMMMatrixMultiplyNative3DFixture = GEMMMatrixMultiply3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>; - -namespace -{ -// *INDENT-OFF* -// clang-format off -RelativeTolerance<float> rel_tolerance_f32(0.001f); -constexpr float abs_tolerance_f32(0.0001f); - -RelativeTolerance<half> rel_tolerance_f16(half(0.2)); -constexpr float tolerance_num_f16 = 0.02f; - -/** Alpha values to test */ -const auto alpha_values = framework::dataset::make("alpha", {1.0f, -0.75f} ); - -/** Beta values to test */ -const auto beta_values = framework::dataset::make("beta", {-0.35f, 0.0f} ); - -/** M, N combinations to test - * 1: Special 1x1 case - * 2: Special multples of processor size in both dimensions - * 3: Non multiples of processor size in both dimensions - * 4: Special 1x1003 case -*/ -const auto m_n_values = zip( - framework::dataset::make("M", {1, 16, 37, 1}), - framework::dataset::make("N", {1, 16, 51, 1003}) - ); - -/** N values to test */ -const auto n_values = framework::dataset::make("N", {51, 1003}); - -/** K values to test */ -const auto k_values = framework::dataset::make("K", 23); - -/** M_W values to test */ -const auto m_w_values = framework::dataset::make("M_W", 5); - -/** M_H values to test */ -const auto m_h_values = framework::dataset::make("M_H", 7); - -/** Batch size values to test */ -const auto b_values = framework::dataset::make("batch_size", 1, 3); - -/** Activation values to test */ -const auto act_values = framework::dataset::make("Activation", -{ - ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), -}); - -/** Broadcast bias from vector to matrix */ -const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } ); - -/** GPU architectures values to test */ -const auto gpu_arch_values = framework::dataset::make("GPUArch", -{ - GPUTarget::MIDGARD, - GPUTarget::BIFROST -}); - -/** Data types values to test in the configuration */ -const auto data_type_values = framework::dataset::make("DataType", -{ - DataType::F32, - DataType::F16 -}); - -/** M values to test */ -const auto fp16_mixed_precision_values = framework::dataset::make("fp16_mixed_precision", {true, false}); -} // namespace - -TEST_SUITE(CL) -TEST_SUITE(GEMMMatrixMultiply) -TEST_CASE(Negative, framework::DatasetMode::ALL) -{ - // Unsupported QASYMM8 data type - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::QASYMM8); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::QASYMM8); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::QASYMM8); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Unsupported SIZE_T data type - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::SIZET); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::SIZET); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::SIZET); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Mixed precision with F32 - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = true; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Max number of dimensions LHS matrix - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U, 4U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Max number of dimensions RHS matrix - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 4U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 4U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 4U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Broadcast bias - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F16); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F16); - // The correct shape should be bias = TensorInfo(TensorShape(14U, 1U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F16); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F16); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, true); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Invalid dimensions for the bias - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32); - // The correct shape should be bias = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(14U, 8U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Invalid dimensions for the output - { - const auto lhs = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32); - // The correct shape should be out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(14U, 7U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = false; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } -} - -TEST_SUITE(Float) -TEST_SUITE(FP32) - -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_n_values, - k_values), - b_values), - alpha_values), - beta_values), - broadcast_bias_values), - framework::dataset::make("fp16_mixed_precision", false)), - act_values), - framework::dataset::make("DataType", DataType::F32)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyNative3DFixture<float>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_w_values, - m_h_values), - n_values), - k_values), - b_values), - alpha_values), - beta_values), - broadcast_bias_values), - framework::dataset::make("fp16_mixed_precision", false)), - act_values), - framework::dataset::make("DataType", DataType::F32)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); -} - -TEST_SUITE_END() // FP32 - -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_n_values, - k_values), - b_values), - alpha_values), - beta_values), - broadcast_bias_values), - fp16_mixed_precision_values), - act_values), - framework::dataset::make("DataType", DataType::F16)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16); -} - -FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyNative3DFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_w_values, - m_h_values), - n_values), - k_values), - b_values), - alpha_values), - beta_values), - broadcast_bias_values), - fp16_mixed_precision_values), - act_values), - framework::dataset::make("DataType", DataType::F16)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16); -} - -TEST_SUITE_END() // FP16 -TEST_SUITE_END() // Float -TEST_SUITE_END() // GEMMMatrixMuliplty -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp deleted file mode 100644 index d6507a06c4..0000000000 --- a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "tests/CL/CLAccessor.h" -#include "tests/CL/Helper.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/GEMMFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -using namespace arm_compute::misc::shape_calculator; - -// Create function for CLGEMMReshapeLHSMatrixKernel -using CLGEMMReshapeLHSMatrix = CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>; - -// Create function for CLGEMMReshapeRHSMatrixKernel -using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>; - -// Create function for CLGEMMMatrixMultiplyKernel -using CLGEMMMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMMatrixMultiplyKernel>; - -// Fixture for GEMMMatrixMultiplyInterleavedTransposedValidationFixture -template <typename T> -using CLGEMMMatrixMultiplyReshapedFixture = - GEMMMatrixMultiplyInterleavedTransposedValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>; - -// Fixture for GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture -template <typename T> -using CLGEMMMatrixMultiplyReshaped3DFixture = - GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>; - -namespace -{ -// *INDENT-OFF* -// clang-format off -RelativeTolerance<float> rel_tolerance_f32(0.001f); -constexpr float abs_tolerance_f32(0.0001f); - -RelativeTolerance<half> rel_tolerance_f16(half(0.2)); -constexpr float tolerance_num_f16 = 0.02f; - -/** Alpha values to test */ -const auto alpha_values = framework::dataset::make("alpha", {1.0f, -0.75f} ); - -/** Beta values to test */ -const auto beta_values = framework::dataset::make("beta", {-0.35f, 0.0f} ); - -/** M, N combinations to test - * 1: Special 1x1 case - * 2: Special multples of processor size in both dimensions - * 3: Non multiples of processor size in both dimensions -*/ -const auto m_n_values = zip( - framework::dataset::make("M", {1, 16, 37}), - framework::dataset::make("N", {1, 16, 51}) - ); - -/** N values to test */ -const auto n_values = framework::dataset::make("N", 51); - -/** K values to test */ -const auto k_values = framework::dataset::make("K", 23); - -/** M_W values to test */ -const auto m_w_values = framework::dataset::make("M_W", 5); - -/** M_H values to test */ -const auto m_h_values = framework::dataset::make("M_H", 7); - -/** Batch size values to test */ -const auto b_values = framework::dataset::make("batch_size", 1, 3); - -/** Activation values to test */ -const auto act_values = framework::dataset::make("Activation", -{ - ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), -}); - -/** V0 values to test */ -const auto v0_values = framework::dataset::make("V0", 2); - -/** H0 values to test */ -const auto h0_values = framework::dataset::make("H0", 4); - -/** Broadcast bias from vector to matrix */ -const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", {false, true} ); - -/** GPU architectures values to test */ -const auto gpu_arch_values = framework::dataset::make("GPUArch", -{ - GPUTarget::MIDGARD, - GPUTarget::BIFROST -}); - -/** Data types values to test in the configuration */ -const auto data_type_values = framework::dataset::make("DataType", -{ - DataType::F32, - DataType::F16 -}); - -/** M values to test */ -const auto fp16_mixed_precision_values = framework::dataset::make("fp16_mixed_precision", {true, false}); -} // namespace - -TEST_SUITE(CL) -TEST_SUITE(GEMMMatrixMultiplyInterleavedTransposed) -TEST_CASE(Negative, framework::DatasetMode::ALL) -{ - // The following tests are already integrated in the GEMMMatrixMultiply validation because - // in common with this validation - // - Unsupported QASYMM8 data type - // - Unsupported SIZE_T data type - // - Mixed precision with F32 - // - Max number of dimensions LHS matrix - // - Max number of dimensions RHS matrix - - // Invalid LHS dimensions - { - // The correct shape should be: lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32); - const auto lhs = TensorInfo(TensorShape(256U, 2U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = true; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Invalid RHS dimensions - { - const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32); - // The correct shape should be rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(104U, 4U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = true; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Broadcast bias - { - const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32); - // The correct shape should be bias = TensorInfo(TensorShape(24U, 1U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = true; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, true); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Invalid dimensions for the bias - { - const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32); - // The correct shape should be bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(25U, 16U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = true; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } - - // Invalid dimensions for the output - { - const auto lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32); - const auto rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32); - const auto bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - // The correct shape should be out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32); - const auto out = TensorInfo(TensorShape(24U, 13U, 1U, 1U), 1, DataType::F32); - constexpr float alpha = 1.3f; - constexpr float beta = 0.7f; - const bool is_interleaved_transposed = true; - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false); - const GPUTarget gpu_target = GPUTarget::MIDGARD; - const bool fp_mixed_precision = false; - const auto status = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - } -} - -TEST_SUITE(Float) -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_n_values, - k_values), - b_values), - alpha_values), - beta_values), - v0_values), - h0_values), - broadcast_bias_values), - framework::dataset::make("fp16_mixed_precision", false)), - act_values), - framework::dataset::make("DataType", DataType::F32)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_w_values, - m_h_values), - n_values), - k_values), - b_values), - alpha_values), - beta_values), - v0_values), - h0_values), - broadcast_bias_values), - framework::dataset::make("fp16_mixed_precision", false)), - act_values), - framework::dataset::make("DataType", DataType::F32)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); -} - -TEST_SUITE_END() // FP32 - -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_n_values, - k_values), - b_values), - alpha_values), - beta_values), - v0_values), - h0_values), - broadcast_bias_values), - fp16_mixed_precision_values), - act_values), - framework::dataset::make("DataType", DataType::F16)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16); -} - -FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - m_w_values, - m_h_values), - n_values), - k_values), - b_values), - alpha_values), - beta_values), - v0_values), - h0_values), - broadcast_bias_values), - fp16_mixed_precision_values), - act_values), - framework::dataset::make("DataType", DataType::F16)), - gpu_arch_values)) -{ - // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16); -} - -TEST_SUITE_END() // FP16 -TEST_SUITE_END() // Float -TEST_SUITE_END() // GEMMMatrixMulipltyInterleavedTransposed -TEST_SUITE_END() // CL -} // namespace validation -} // namespace test -} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp index ec6b87fbae..0ddf43766f 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,7 +26,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" @@ -44,9 +44,10 @@ namespace test namespace validation { using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::opencl::kernels; -// Create function for CLGEMMMatrixMultiplyNativeKernel -using CLGEMMMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMMatrixMultiplyNativeKernel>; +// Create function for ClGemmMatrixMultiplyNativeKernel +using CLGEMMMatrixMultiplyNative = CLSynthetizeOperator<ClGemmMatrixMultiplyNativeKernel>; // Fixture for CLGEMMMatrixMultiplyNative template <typename T> @@ -90,8 +91,8 @@ const auto b_values = framework::dataset::make("batch_size", 1, 3); /** Activation values to test */ const auto act_values = framework::dataset::make("Activation", { - ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), }); /** M0 values to test - Precommit */ @@ -184,7 +185,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned // Create and configure function CLGEMMMatrixMultiplyNative gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), 1.0f, 1.0f, lhs_info, rhs_info, kernel_info); } } // namespace @@ -322,6 +323,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyNative3DFixture<float>, f // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float TEST_SUITE_END() // GEMMMatrixMulipltyNative diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp index 52afb716e4..b06e4bf213 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,9 +26,9 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" @@ -46,15 +46,16 @@ namespace test namespace validation { using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::opencl::kernels; -// Create function for CLGEMMReshapeLHSMatrixKernel -using CLGEMMReshapeLHSMatrix = CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>; +// Create function for ClGemmReshapeLhsMatrixKernel +using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>; -// Create function for CLGEMMReshapeRHSMatrixKernel -using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>; +// Create function for ClGemmReshapeRhsMatrixKernel +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>; -// Create function for CLGEMMMatrixMultiplyReshapedKernel -using CLGEMMMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedKernel>; +// Create function for ClGemmMatrixMultiplyReshapedKernel +using CLGEMMMatrixMultiplyReshaped = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedKernel>; // Fixture for CLGEMMMatrixMultiplyReshaped template <typename T> @@ -109,6 +110,7 @@ const auto b_values = framework::dataset::make("batch_size", 2, 3); const auto act_values = framework::dataset::make("Activation", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), }); /** Alpha values to test - Precommit */ @@ -327,7 +329,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi framework::dataset::make("Expected", { true, true, false, false, false, true, true,true})), input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected) { - ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true), + ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true), &input1_info.clone()->set_is_resizable(true), &input2_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true),1.f,1.f, @@ -335,6 +337,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi rhs_info, gemm_info)) == expected, framework::LogLevel::ERRORS); } + TEST_SUITE(Float) TEST_SUITE(FP32) @@ -360,7 +363,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, fra act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::DISABLED, @@ -385,7 +396,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, fra act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL, @@ -410,7 +429,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::DISABLED, @@ -435,8 +462,17 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } + TEST_SUITE(ExportToCLImage) DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( framework::dataset::make("Input0Info", { TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32), // OK or incorrect if cl_khr_image2d_from_buffer not supported @@ -559,10 +595,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi true, true, false, - false})), + true})), input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected) { - ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true), + ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true), &input1_info.clone()->set_is_resizable(true), &input2_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true),1.f,1.f, @@ -703,6 +739,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::ARM_COMPUTE_PRINT_INFO(); } } + TEST_SUITE_END() // ExportToCLImage TEST_SUITE_END() // FP32 @@ -730,7 +767,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, fram act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::DISABLED, @@ -755,7 +800,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, fram act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL, @@ -780,7 +833,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::DISABLED, @@ -805,7 +866,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } TEST_SUITE(ExportToCLImage) @@ -930,10 +999,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi true, true, false, - false})), + true})), input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected) { - ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true), + ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true), &input1_info.clone()->set_is_resizable(true), &input2_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true),1.f,1.f, @@ -1074,6 +1143,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::ARM_COMPUTE_PRINT_INFO(); } } + TEST_SUITE_END() // ExportToCLImage TEST_SUITE_END() // FP16 @@ -1101,7 +1171,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixtu act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED, @@ -1126,7 +1204,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixtu act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::ALL, @@ -1151,7 +1237,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionF act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED, @@ -1176,8 +1270,17 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionF act_values)) { // Validate output - validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } } + TEST_SUITE_END() // MixedPrecision TEST_SUITE_END() // Float TEST_SUITE_END() // GEMMMatrixMultiplyReshaped diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp index ebcecb8b78..dafc8dc5ec 100644 --- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp +++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" @@ -45,12 +45,13 @@ namespace test namespace validation { using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::opencl::kernels; -// Create function for CLGEMMReshapeRHSMatrixKernel -using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>; +// Create function for ClGemmReshapeRhsMatrixKernel +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>; -// Create function for CLGEMMMatrixMultiplyReshapedOnlyRHSKernel -using CLGEMMMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>; +// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsKernel +using CLGEMMMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>; // Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS template <typename T> @@ -98,6 +99,7 @@ const auto b_values = framework::dataset::make("batch_size", 2); const auto act_values = framework::dataset::make("Activation", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 10.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), }); /** M0 values to test - precommit */ @@ -210,6 +212,7 @@ bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned CLGEMMMatrixMultiplyReshapedOnlyRHS gemm; return bool(gemm.validate(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info)); } + } // namespace TEST_SUITE(CL) @@ -461,6 +464,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur framework::ARM_COMPUTE_PRINT_INFO(); } } + TEST_SUITE_END() // FP32 TEST_SUITE(FP16) @@ -589,6 +593,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur framework::ARM_COMPUTE_PRINT_INFO(); } } + TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp new file mode 100644 index 0000000000..3b3cf85317 --- /dev/null +++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" +#include "tests/CL/CLAccessor.h" +#include "tests/CL/Helper.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/GEMMFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using namespace arm_compute::opencl::kernels; + +// Create function for ClGemmReshapeRhsMatrixKernel +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>; + +// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel +using CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel>; + +// Fixture for CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL +template <typename T> +using CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture = GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL>; + +namespace +{ +// *INDENT-OFF* +// clang-format off +RelativeTolerance<float> rel_tolerance_f32(0.001f); +constexpr float abs_tolerance_f32(0.0001f); +RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.001f)); +constexpr float abs_tolerance_f16(0.3f); + +/** Alpha values to test - Precommit */ +const auto a_values = framework::dataset::make("alpha", {1.0f, 0.75f} ); + +/** Beta values to test - Precommit */ +const auto beta_values = framework::dataset::make("beta", {0.0f, -0.75f} ); + +/** M values to test */ +const auto m_values = framework::dataset::make("M", {49}); + +/** N values to test */ +const auto n_values = framework::dataset::make("N", {257}); + +/** K values to test */ +/** The test case requires this to be multiple of 4*/ +const auto k_values = framework::dataset::make("K", {192}); + +/** Batch size values to test */ +const auto b_values = framework::dataset::make("batch_size", {1, 2}); + +/** Activation values to test */ +const auto act_values = framework::dataset::make("Activation", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), +}); + +/** M0 values to test - Precommit */ +const auto m0_values_precommit = framework::dataset::make("M0", { 1, 2, 4 }); + +/** N0 values to test - Precommit */ +const auto n0_values_precommit = framework::dataset::make("N0", { 4, 8 }); + +/** K0 values to test - Precommit */ +const auto k0_values_precommit = framework::dataset::make("K0", { 1 }); + +/** Broadcast bias from vector to matrix */ +const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } ); + +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(GEMMMatrixMultiplyReshapedOnlyRhsMMUL) +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportToCLImage", false)), + framework::dataset::make("DataType", DataType::F32)), + a_values), + beta_values), + broadcast_bias_values), + act_values)) +{ + // Validate output + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportToCLImage", false)), + framework::dataset::make("DataType", DataType::F16)), + a_values), + beta_values), + broadcast_bias_values), + act_values)) +{ + // Validate output + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // FP16 + +TEST_SUITE(ExportToCLImage) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportToCLImage", true)), + framework::dataset::make("DataType", DataType::F32)), + a_values), + beta_values), + broadcast_bias_values), + act_values)) +{ + // Validate output + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + m_values, + n_values), + k_values), + b_values), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportToCLImage", true)), + framework::dataset::make("DataType", DataType::F16)), + a_values), + beta_values), + broadcast_bias_values), + act_values)) +{ + // Validate output + if(validate_result) + { + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // ExportToCLImage +TEST_SUITE_END() // Float +TEST_SUITE_END() // GEMMMatrixMultiplyReshapedOnlyRhsMMUL +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp index 34c37dffde..0dd9b811f6 100644 --- a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp +++ b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" @@ -43,9 +43,10 @@ namespace test namespace validation { using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::opencl::kernels; // Initialize the output tensor with zero and fill the border with zero -using CLGEMMReshapeLHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder<CLGEMMReshapeLHSMatrixKernel, 16>; +using CLGEMMReshapeLHSMatrix = CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder<ClGemmReshapeLhsMatrixKernel, 16>; template <typename T> using CLGEMMReshapeLHSMatrixFixture = GEMMReshapeLHSMatrixValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeLHSMatrix, T, false>; @@ -65,8 +66,10 @@ const auto b_values = framework::dataset::make("batchsize", 1, 3); /** M0 values to test */ const auto m0_values_s32 = framework::dataset::make("M0", { 2, 3 }); -const auto m0_values_s16 = framework::dataset::make("M0", { 4, 5 }); -const auto m0_values_s8 = framework::dataset::make("M0", { 6, 7, 8 }); +const auto m0_values_s16 = framework::dataset::make("M0", { 4 }); +const auto m0_values_s16_nt = framework::dataset::make("M0", { 5 }); +const auto m0_values_s8_nt = framework::dataset::make("M0", { 6,7 }); +const auto m0_values_s8 = framework::dataset::make("M0", { 8 }); /** K0 values to test */ const auto k0_values_s32 = framework::dataset::make("K0", { 2, 3 }); @@ -100,6 +103,7 @@ FIXTURE_DATA_TEST_CASE(S32, CLGEMMReshapeLHSMatrixFixture<int>, framework::Datas // Validate output validate(CLAccessor(_target), _reference); } + FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(), b_values), @@ -113,6 +117,7 @@ FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::Dat // Validate output validate(CLAccessor(_target), _reference); } + FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(), b_values), @@ -127,6 +132,37 @@ FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::Datas validate(CLAccessor(_target), _reference); } +TEST_SUITE(NotTransposed) +FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(), + b_values), + framework::dataset::make("DataType", DataType::S16)), + m0_values_s16_nt), + k0_values_s16), + v0_values), + i_values), + framework::dataset::make("transpose", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(), + b_values), + framework::dataset::make("DataType", DataType::S8)), + m0_values_s8_nt), + k0_values_s8), + v0_values), + i_values), + framework::dataset::make("transpose", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +TEST_SUITE_END() + TEST_SUITE(ReinterpretInputAs3D) FIXTURE_DATA_TEST_CASE(S32, CLGEMMReshapeLHSMatrix3DFixture<int>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape3DShapes(), diff --git a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp index 14048e81ec..f8462058a6 100644 --- a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp +++ b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" @@ -73,9 +73,10 @@ const auto i_values = framework::dataset::make("interleave", { true, false }); } // namespace using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::opencl::kernels; // Initialize the output tensor with zero and fill the border with zero -using CLGEMMReshapeRHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder<CLGEMMReshapeRHSMatrixKernel, 16>; +using CLGEMMReshapeRHSMatrix = CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder<ClGemmReshapeRhsMatrixKernel, 16>; template <typename T> using CLGEMMReshapeRHSMatrixFixture = GEMMReshapeRHSMatrixValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, T>; @@ -117,7 +118,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( rhs_info.transpose = true; rhs_info.interleave = true; - bool has_error = bool(CLGEMMReshapeRHSMatrixKernel::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), rhs_info)); + bool has_error = bool(ClGemmReshapeRhsMatrixKernel::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), rhs_info)); ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); } @@ -158,9 +159,9 @@ DATA_TEST_CASE(ValidatePadding, framework::DatasetMode::ALL, combine(combine(com padding = round_up_width - output_shape[0]; } - CLGEMMReshapeRHSMatrixKernel kernel; + ClGemmReshapeRhsMatrixKernel kernel; - kernel.configure(&input, &output, rhs_info); + kernel.configure(CLKernelLibrary::get().get_compile_context(), input.info(), output.info(), rhs_info); ARM_COMPUTE_EXPECT((output.info()->padding().right == padding), framework::LogLevel::ERRORS); } diff --git a/tests/validation/CL/Gather.cpp b/tests/validation/CL/Gather.cpp index f0b87d7d9f..7619baae1e 100644 --- a/tests/validation/CL/Gather.cpp +++ b/tests/validation/CL/Gather.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,19 +48,21 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices data type - TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices dimensionality - TensorInfo(TensorShape(5U, 5U, 5U, 5U, 5U), 1, DataType::F32), // Invalid Input dimensionality - TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Mismatching data type input/output - TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid positive axis value - TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Invalid negative axis value + TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Output shape + TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices data type + TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid Indices dimensionality + TensorInfo(TensorShape(5U, 5U, 5U, 5U, 5U), 1, DataType::F32), // Invalid Input dimensionality + TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Mismatching data type input/output + TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), // Invalid positive axis value + TensorInfo(TensorShape(27U, 27U), 1, DataType::F16), // Invalid negative axis value }), framework::dataset::make("IndicesInfo", { TensorInfo(TensorShape(10U), 1, DataType::U32), TensorInfo(TensorShape(10U), 1, DataType::U32), TensorInfo(TensorShape(10U), 1, DataType::U32), - TensorInfo(TensorShape(10U), 1, DataType::U8), TensorInfo(TensorShape(10U, 10U), 1, DataType::U32), + TensorInfo(TensorShape(10U), 1, DataType::U8), + TensorInfo(TensorShape(10U, 10U, 10U, 10U), 1, DataType::U32), TensorInfo(TensorShape(10U), 1, DataType::U32), TensorInfo(TensorShape(10U), 1, DataType::U32), TensorInfo(TensorShape(10U), 1, DataType::U32), @@ -71,7 +73,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(27U, 10U), 1, DataType::F32), TensorInfo(TensorShape(10U, 27U), 1, DataType::F32), TensorInfo(TensorShape(10U, 27U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 10U), 1, DataType::F32), + TensorInfo(TensorShape(10U, 27U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 10U, 10U, 10U, 10U), 1, DataType::F32), TensorInfo(TensorShape(10U, 5U, 5U, 5U, 5U), 1, DataType::F32), TensorInfo(TensorShape(27U, 10U), 1, DataType::F32), TensorInfo(TensorShape(27U, 27U), 1, DataType::F32), @@ -82,13 +85,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( 1, -2, 0, + 0, 1, 0, 1, 2, -3, })), - framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, false })), + framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, false, false })), input_info, indices_info, output_info, axis, expected) { const Status status = CLGather::validate(&input_info.clone()->set_is_resizable(true), &indices_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), axis); @@ -111,6 +115,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices, + CLGatherFixture<half>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLGatherFixture<half>, framework::DatasetMode::NIGHTLY, @@ -131,6 +144,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices, + CLGatherFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLGatherFixture<float>, framework::DatasetMode::NIGHTLY, @@ -152,6 +174,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices, + CLGatherFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::U8))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLGatherFixture<uint8_t>, framework::DatasetMode::NIGHTLY, @@ -172,6 +203,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices, + CLGatherFixture<uint16_t>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::U16))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + + FIXTURE_DATA_TEST_CASE(RunLarge, CLGatherFixture<uint16_t>, framework::DatasetMode::NIGHTLY, diff --git a/tests/validation/CL/Im2Col.cpp b/tests/validation/CL/Im2Col.cpp index c6006efcba..1f5b781690 100644 --- a/tests/validation/CL/Im2Col.cpp +++ b/tests/validation/CL/Im2Col.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLIm2ColKernel.h" +#include "src/gpu/cl/kernels/ClIm2ColKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Asserts.h" @@ -40,7 +40,7 @@ namespace validation TEST_SUITE(CL) TEST_SUITE(Im2Col) -using CLIm2Col = CLSynthetizeFunction<CLIm2ColKernel>; +using ClIm2Col = ClSynthetizeOperatorWithBorder<opencl::kernels::ClIm2ColKernel>; /** Negative tests * @@ -63,7 +63,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto output = TensorInfo(TensorShape(9U, 10U, 12U, 2U), 1, DataType::F32); const auto conv_size = Size2D(3, 3); const bool has_bias = false; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -73,7 +73,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto output = TensorInfo(TensorShape(9U, 80U, 2U), 1, DataType::QASYMM8); const auto conv_size = Size2D(3, 3); const bool has_bias = true; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -84,7 +84,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto conv_size = Size2D(3, 3); const auto dilation = Size2D(0, 1); const bool has_bias = false; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -96,7 +96,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto dilation = Size2D(1, 1); const bool has_bias = false; const unsigned int num_groups = 2; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -108,7 +108,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto dilation = Size2D(1, 1); const bool has_bias = false; const unsigned int num_groups = 2; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -118,7 +118,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto output = TensorInfo(TensorShape(9U, 81U, 2U), 1, DataType::F32); const auto conv_size = Size2D(3, 3); const bool has_bias = false; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -128,13 +128,13 @@ TEST_CASE(Negative, framework::DatasetMode::ALL) const auto output = TensorInfo(TensorShape(1U, 1U, 1U, 2U), 1, DataType::F32, DataLayout::NHWC); const auto conv_size = Size2D(9, 9); const bool has_bias = false; - const auto status = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); + const auto status = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } } template <typename T> -using CLIm2ColFixture = Im2ColValidationFixture<CLTensor, CLAccessor, CLIm2Col, T, true>; +using ClIm2ColFixture = Im2ColOpValidationFixture<CLTensor, CLAccessor, ClIm2Col, T, true>; TEST_SUITE(NHWC) @@ -150,7 +150,7 @@ TEST_SUITE(NHWC) * Kernel tested im2col3x3_nhwc */ FIXTURE_DATA_TEST_CASE(W3x3, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", @@ -180,7 +180,7 @@ framework::dataset::make("Groups", 1))) * Kernel tested im2col9x9_nhwc */ FIXTURE_DATA_TEST_CASE(W9x9, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", @@ -210,7 +210,7 @@ framework::dataset::make("Groups", 1))) * Kernel tested im2col_generic_nhwc */ FIXTURE_DATA_TEST_CASE(Generic, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", @@ -243,7 +243,7 @@ TEST_SUITE(NCHW) * Kernel tested im2col1x1_stridex1_nchw */ FIXTURE_DATA_TEST_CASE(W1x1_Stride1_NoPad, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", { TensorShape(4U, 4U, 3U, 2U), TensorShape(5U, 4U, 3U, 2U), TensorShape(3U, 4U, 3U, 2U) }), @@ -267,7 +267,7 @@ FIXTURE_DATA_TEST_CASE(W1x1_Stride1_NoPad, * Kernel tested im2col3x3_nchw */ FIXTURE_DATA_TEST_CASE(W3x3, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(4U, 4U, 3U, 2U)), @@ -291,7 +291,7 @@ FIXTURE_DATA_TEST_CASE(W3x3, * Kernel tested im2col5x5_nchw */ FIXTURE_DATA_TEST_CASE(W5x5, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(7U, 4U, 3U, 2U)), @@ -317,7 +317,7 @@ FIXTURE_DATA_TEST_CASE(W5x5, * Kernel tested im2col11x11_padx0_pady0_nchw */ FIXTURE_DATA_TEST_CASE(W11x11_NoPad, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", { TensorShape(11U, 11U, 2U, 2U), TensorShape(14U, 13U, 1U, 2U) }), @@ -341,7 +341,7 @@ FIXTURE_DATA_TEST_CASE(W11x11_NoPad, * Kernel tested im2col_generic_padx0_pady0_nchw */ FIXTURE_DATA_TEST_CASE(GenericZeroPad, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(13U, 11U, 2U, 2U)), @@ -367,7 +367,7 @@ TEST_SUITE_END() // NCHW * Kernel tested im2col_generic_(nchw|nhwc) */ FIXTURE_DATA_TEST_CASE(Generic, - CLIm2ColFixture<float>, + ClIm2ColFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(13U, 11U, 5U, 2U)), @@ -393,7 +393,7 @@ FIXTURE_DATA_TEST_CASE(Generic, * - im2col9x9_nhwc */ FIXTURE_DATA_TEST_CASE(Quantized, - CLIm2ColFixture<uint8_t>, + ClIm2ColFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(13U, 11U, 11U, 2U)), @@ -419,7 +419,7 @@ FIXTURE_DATA_TEST_CASE(Quantized, * - im2col9x9_nhwc */ FIXTURE_DATA_TEST_CASE(FP16, - CLIm2ColFixture<half>, + ClIm2ColFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( framework::dataset::make("InputShape", TensorShape(13U, 11U, 11U, 2U)), diff --git a/tests/validation/CL/Remap.cpp b/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp index f73073105b..67f70685d1 100644 --- a/tests/validation/CL/Remap.cpp +++ b/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,18 +22,19 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLRemap.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.h" #include "tests/CL/CLAccessor.h" +#include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/RemapFixture.h" +#include "tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h" namespace arm_compute { @@ -41,37 +42,48 @@ namespace test { namespace validation { +using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::opencl::kernels; + +using CLIndirectConv2dAddressPrecalculation = CLSynthetizeOperator<ClIndirectConv2dAddressPrecalculationKernel>; + +using CLIndirectConv2dAddressPrecalculationFixture = IndirectConv2dAddressPrecalculationValidationFixture<CLTensor, CLAccessor, CLIndirectConv2dAddressPrecalculation>; + +// *INDENT-OFF* +// clang-format off +/** Data types */ + namespace { -constexpr AbsoluteTolerance<uint8_t> tolerance_value(1); -constexpr float tolerance_number = 0.2f; +const auto src_w_values = framework::dataset::make("src_w", {91}); +const auto src_h_values = framework::dataset::make("src_h", {103}); +const auto src_b_values = framework::dataset::make("src_b", {1, 2}); +const auto wei_w_values = framework::dataset::make("wei_w", {3, 5}); +const auto wei_h_values = framework::dataset::make("wei_h", {1, 6}); +const auto pad_values = framework::dataset::make("pad", {1, 2, 3}); +const auto stride_values = framework::dataset::make("stride", {1, 2}); +const auto m0_values = framework::dataset::make("M0", { 1, 2, 4, 5, 7 }); } // namespace TEST_SUITE(CL) -TEST_SUITE(Remap) -template <typename T> -using CLRemapFixture = RemapValidationFixture<CLTensor, CLAccessor, CLRemap, T>; - -FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} +TEST_SUITE(IndirectConv2dAddressPrecalculation) -FIXTURE_DATA_TEST_CASE(RunLarge, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConv2dAddressPrecalculationFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(src_w_values, + src_h_values), + src_b_values), + wei_w_values), + wei_h_values), + pad_values), + stride_values), + m0_values)) { // Validate output - validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); + validate(CLAccessor(_target), _reference); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // IndirectConv2dAddressPrecalculation +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/IndirectConvolutionLayer.cpp b/tests/validation/CL/IndirectConvolutionLayer.cpp new file mode 100644 index 0000000000..aedf070e6b --- /dev/null +++ b/tests/validation/CL/IndirectConvolutionLayer.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h" + +// Note: Since the interface of indirect convolution is the same of direct convolution, we can reuse +// the direct convolution fixture + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ +RelativeTolerance<float> tolerance_fp32(0.05f); /**< Tolerance for floating point tests */ +constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ + +/** Activation function Dataset*/ +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) }); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(IndirectConvolutionLayer) + +/** Check whether the configuration of a indirect convolution layer with no + * bias leads to a successful run. + */ +TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT) +{ + const TensorShape src_shape_nhwc = TensorShape(8U, 27U, 13U); + const TensorShape wei_shape_nhwc = TensorShape(8U, 3U, 3U, 4U); + const TensorShape bia_shape = TensorShape(4U); + const TensorShape dst_shape_nhwc = TensorShape(4U, 25U, 11U); + constexpr DataType dt = DataType::F32; + constexpr DataLayout data_layout = DataLayout::NHWC; + + auto src_nhwc = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), data_layout); + auto wei_nhwc = create_tensor<CLTensor>(wei_shape_nhwc, dt, 1, QuantizationInfo(), data_layout); + auto dst_nhwc = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), data_layout); + + TensorShape src_shape_nchw = src_shape_nhwc; + TensorShape wei_shape_nchw = wei_shape_nhwc; + TensorShape dst_shape_nchw = dst_shape_nhwc; + + permute(src_shape_nchw, PermutationVector(1U, 2U, 0U)); + permute(wei_shape_nchw, PermutationVector(1U, 2U, 0U, 3U)); + permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U)); + + const PadStrideInfo conv_info = PadStrideInfo(1, 1, 0, 0); + + // Create indirect Convolution function + CLIndirectConvolutionLayer conv{}; + conv.configure(&src_nhwc, &wei_nhwc, nullptr, &dst_nhwc, conv_info); + + src_nhwc.allocator()->allocate(); + wei_nhwc.allocator()->allocate(); + dst_nhwc.allocator()->allocate(); + + library->fill_tensor_value(CLAccessor(src_nhwc), 1.f); + library->fill_tensor_value(CLAccessor(wei_nhwc), 1.f); + + conv.run(); + + // Compute reference to compare + SimpleTensor<float> ref_src{ src_shape_nchw, dt }; + SimpleTensor<float> ref_wei{ wei_shape_nchw, dt }; + SimpleTensor<float> ref_bia{ bia_shape, dt }; + library->fill_tensor_value(ref_src, 1.f); + library->fill_tensor_value(ref_wei, 1.f); + // No bias + library->fill_tensor_value(ref_bia, 0.f); + auto ref_dst = reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw, conv_info); + + validate(CLAccessor(dst_nhwc), ref_dst); +} + +/** Check whether the case of rectangle kernels i.e. when width and height of the weight_shape are not equal + * would lead to successful run + */ +TEST_CASE(NonSquareKernel, framework::DatasetMode::PRECOMMIT) +{ + const TensorShape src_shape_nhwc = TensorShape(3U, 33U, 27U); + const TensorShape wei_shape_nhwc = TensorShape(3U, 5U, 7U, 4U); // non-square kernel + const TensorShape bia_shape = TensorShape(4U); + const TensorShape dst_shape_nhwc = TensorShape(4U, 11U, 12U); + constexpr DataType dt = DataType::F32; + constexpr DataLayout data_layout = DataLayout::NHWC; + + auto src_nhwc = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), data_layout); + auto wei_nhwc = create_tensor<CLTensor>(wei_shape_nhwc, dt, 1, QuantizationInfo(), data_layout); + auto dst_nhwc = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), data_layout); + + TensorShape src_shape_nchw = src_shape_nhwc; + TensorShape wei_shape_nchw = wei_shape_nhwc; + TensorShape dst_shape_nchw = dst_shape_nhwc; + + permute(src_shape_nchw, PermutationVector(1U, 2U, 0U)); + permute(wei_shape_nchw, PermutationVector(1U, 2U, 0U, 3U)); + permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U)); + + const PadStrideInfo conv_info = PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR); + + // Create indirect convolution function + CLIndirectConvolutionLayer conv{}; + conv.configure(&src_nhwc, &wei_nhwc, nullptr, &dst_nhwc, conv_info); + + src_nhwc.allocator()->allocate(); + wei_nhwc.allocator()->allocate(); + dst_nhwc.allocator()->allocate(); + + library->fill_tensor_value(CLAccessor(src_nhwc), 1.f); + library->fill_tensor_value(CLAccessor(wei_nhwc), 1.f); + + conv.run(); + + // Compute reference to compare + SimpleTensor<float> ref_src{ src_shape_nchw, dt }; + SimpleTensor<float> ref_wei{ wei_shape_nchw, dt }; + SimpleTensor<float> ref_bia{ bia_shape, dt }; + library->fill_tensor_value(ref_src, 1.f); + library->fill_tensor_value(ref_wei, 1.f); + // No bias + library->fill_tensor_value(ref_bia, 0.f); + auto ref_dst = reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw, conv_info); + + validate(CLAccessor(dst_nhwc), ref_dst); +} +// *INDENT-OFF* +// clang-format off +// Note: Since the interface of indirect convolution is the same of direct convolution, we can reuse +// the direct convolution fixture +template <typename T> +using CLIndirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLIndirectConvolutionLayer, T>; +template <typename T> +using CLIndirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLIndirectConvolutionLayer, T, true>; + +TEST_SUITE(NHWC) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLIndirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLIndirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 2 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 3 })), + framework::dataset::make("KernelSize", { 3 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLIndirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32); +} +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // NHWC +TEST_SUITE_END() // IndirectConvolutionLayer +TEST_SUITE_END() // CL + +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/LogLayer.cpp b/tests/validation/CL/LogLayer.cpp index 95c4f1226e..895c306841 100644 --- a/tests/validation/CL/LogLayer.cpp +++ b/tests/validation/CL/LogLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { diff --git a/tests/validation/CL/MatMul.cpp b/tests/validation/CL/MatMul.cpp new file mode 100644 index 0000000000..844597f3e9 --- /dev/null +++ b/tests/validation/CL/MatMul.cpp @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLMatMul.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ActivationFunctionsDataset.h" +#include "tests/framework/DatasetModes.h" +#include "tests/framework/Macros.h" +#include "tests/framework/TestCase.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/validation/fixtures/MatMulFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */ +constexpr float abs_tolerance_f32( + 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp32 data type in case using relative tolerance fails because of small values */ +constexpr float abs_tolerance_f16( + 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data type in case using relative tolerance fails because of small values */ +RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */ +constexpr AbsoluteTolerance<uint8_t> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +} // namespace + +template <typename T> +using CLMatMulFixture = MatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>; + +template <typename T> +using CLQuantizedMatMulFixture = QuantizedMatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>; + +template <typename T> +using CLMatMulActivationFixture = MatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>; + +template <typename T> +using CLMatMulActivationAlphaBetaFixture = MatMulValidationWithActivationAlphaBetaFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>; + +template <typename T> +using CLQuantizedMatMulActivationFixture = QuantizedMatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>; + +/* The main act functions matmul (float) is expected to support */ +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f), +}); + +/* (Float datatype only) Larger activation functions dataset, used during some nightly tests. */ +const auto AllActivationsDataset = combine(datasets::ActivationFunctions(), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); + +// Alpha beta values should be integer values +// This is for testing purposes with quantized datatypes and is not a limitation of the kernel. +// To properly remove this restriction, dst_qinfo should be auto-initialised with consideration for alpha beta values +// The main act functions quantized matmul kernels are expected to support +const auto ActivationFunctionsQuantizedDataset = concat(concat(concat( + framework::dataset::make("ActivationInfo", ActivationLayerInfo()), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 1.f))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 1.f))); + +TEST_SUITE(CL) +TEST_SUITE(MatMul) + +TEST_SUITE(Float) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunAllActivations, CLMatMulActivationAlphaBetaFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SmallerMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + framework::dataset::make("DataType", DataType::F32)), + AllActivationsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} + +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} + +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + ActivationFunctionsQuantizedDataset), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })), + framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + ActivationFunctionsQuantizedDataset), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })), + framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + ActivationFunctionsQuantizedDataset), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })), + framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + ActivationFunctionsQuantizedDataset), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })), + framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 50) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} + +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // Quantized + +TEST_SUITE_END() // MatMul +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/MatMulKernel.cpp b/tests/validation/CL/MatMulKernel.cpp new file mode 100644 index 0000000000..b47f8bc924 --- /dev/null +++ b/tests/validation/CL/MatMulKernel.cpp @@ -0,0 +1,650 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h" +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/MatMulKernelFixture.h" +#include "tests/validation/reference/Permute.h" + +#include <tuple> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +constexpr float abs_tolerance_f32( + 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */ +constexpr float abs_tolerance_f16( + 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */ +RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +} // namespace + +/** M0 values to test --precommit*/ +const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 }); + +/** N0 values to test --precommit*/ +const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 }); + +/** K0 values to test --precommit*/ +const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 }); + +/** M0 values to test --nightly*/ +const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 }); +const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 }); + +/** N0 values to test --nightly*/ +const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 }); +const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 }); + +/** K0 values to test --nightly*/ +const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 }); +const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 }); +const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 }); + +template <typename T> +using CLMatMulKernelFixture = MatMulKernelValidationFixture<T, ClMatMulNativeKernel>; + +template <typename T> +using CLMatMulKernelBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulNativeKernel>; + +TEST_SUITE(CL) +TEST_SUITE(MatMulKernel) +TEST_SUITE(Validate) + +TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL) +{ + using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>; + + const std::vector<MatMulConfigurationPair> supported_block_sizes = + { + // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false) + // Lhs not-transposed, Rhs-not-transposed + { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0 + { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 7), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 9, 1, 2), true }, + { MatMulKernelInfo(false, false, 3, 16, 3), true }, + { MatMulKernelInfo(false, false, 7, 3, 4), true }, + { MatMulKernelInfo(false, false, 7, 3, 4, true), false }, // N0 not in {4, 8, 16} + { MatMulKernelInfo(false, false, 7, 1, 4, true), false }, // N0 not in {4, 8, 16} + { MatMulKernelInfo(false, false, 7, 12, 4, true), false }, // N0 not in {4, 8, 16} + { MatMulKernelInfo(false, false, 7, 4, 4, true), true }, + { MatMulKernelInfo(false, false, 7, 8, 4, true), true }, + { MatMulKernelInfo(false, false, 7, 16, 4, true), true }, + + // Lhs not-transposed, Rhs transposed + { MatMulKernelInfo(false, true, 0, 1, 1), false }, // M0 should be > 0 + { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 3, 3, 12), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 3, 3, 6), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, true, 5, 1, 2), true }, + { MatMulKernelInfo(false, true, 3, 3, 3), true }, + { MatMulKernelInfo(false, true, 2, 4, 8), true }, + { MatMulKernelInfo(false, true, 2, 4, 5, true), false }, // K0 not in {4, 8, 16} + { MatMulKernelInfo(false, true, 2, 4, 9, true), false }, // K0 not in {4, 8, 16} + { MatMulKernelInfo(false, true, 2, 4, 3, true), false }, // K0 not in {4, 8, 16} + { MatMulKernelInfo(false, true, 2, 4, 4, true), true }, + { MatMulKernelInfo(false, true, 2, 4, 8, true), true }, + { MatMulKernelInfo(false, true, 2, 8, 16, true), true }, + + // Lhs transposed, Rhs-not-transposed + { MatMulKernelInfo(true, false, 1, 1, 0), false }, // K0 should be > 0 + { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 4, 1, 22), true }, + { MatMulKernelInfo(true, false, 3, 3, 3), true }, + { MatMulKernelInfo(true, false, 2, 4, 8), true }, + { MatMulKernelInfo(true, false, 2, 3, 8, true), false }, // N0 not in {4, 8, 16} + { MatMulKernelInfo(true, false, 2, 7, 8, true), false }, // N0 not in {4, 8, 16} + { MatMulKernelInfo(true, false, 2, 5, 8, true), false }, // N0 not in {4, 8, 16} + { MatMulKernelInfo(true, false, 2, 4, 8, true), true }, + { MatMulKernelInfo(true, false, 2, 8, 8, true), true }, + { MatMulKernelInfo(true, false, 2, 16, 8, true), true }, + + // Lhs transposed, Rhs-transposed + { MatMulKernelInfo(true, true, 2, 1, 5), false }, // K0 should in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 1, 8, 7), false }, // K0 should in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 5, 3, 6), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 4, 8, 16), true }, + { MatMulKernelInfo(true, true, 3, 3, 4), true }, + { MatMulKernelInfo(true, true, 16, 4, 8), true }, + { MatMulKernelInfo(true, true, 2, 2, 1, true), false }, // K0 not in {4, 8, 16} + { MatMulKernelInfo(true, true, 2, 2, 5, true), false }, // K0 not in {4, 8, 16} + { MatMulKernelInfo(true, true, 2, 4, 7, true), false }, // K0 not in {4, 8, 16} + { MatMulKernelInfo(true, true, 2, 4, 4, true), true }, + { MatMulKernelInfo(true, true, 2, 8, 8, true), true }, + { MatMulKernelInfo(true, true, 2, 8, 16, true), true }, + }; + + // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal + // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here, + // not the shapes themselves. + const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32); + + const bool export_to_cl_image_supported = image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + for(auto &pair : supported_block_sizes) + { + TensorInfo output_info; + Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first); + + if(!pair.first.export_rhs_to_cl_image || export_to_cl_image_supported) + { + ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS); + } + } +} + +TEST_CASE(ExportToCLImage, framework::DatasetMode::ALL) +{ + // We skip this test if the hardware does not support exporting to CL Image + if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device())) + { + constexpr size_t pixel_size = 4; + const size_t max_image_w = pixel_size * CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>(); + const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>(); + + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool, bool, bool>; + const std::vector<ShapeConfigurationTuple> shape_configurations = + { + // lhs_shape, rhs_shape, adj_lhs, adj_rhs, expected + // Lhs t/Nt, Rhs Nt + // Transposition of Lhs doesn't add any value to the tests, therefore always assumed false below + { TensorShape(5U, 1U), TensorShape(3U, 5U), false, false, false }, // N should be multiple of 4 + { TensorShape(5U, 1U), TensorShape(14U, 5U), false, false, false }, // N should be multiple of 4 + { TensorShape(5U, 1U), TensorShape(12U, 5U), false, false, true }, + { TensorShape(5U, 1U), TensorShape(8U, 5U), false, false, true }, + { TensorShape(5U, 1U), TensorShape(4U, 5U), false, false, true }, + { TensorShape(max_image_h + 1, 1U), TensorShape(4U, max_image_h + 1), false, false, false }, // Cannot fit into CL Image memory's height + { TensorShape(5U, 1U), TensorShape(max_image_w + 1, 5U), false, false, false }, // Cannot fit into CL Image memory's width + { TensorShape(max_image_h, 1U), TensorShape(4U, max_image_h), false, false, true }, // Barely fits into CL Image memory's height + { TensorShape(5U, 1U), TensorShape(max_image_w, 5U), false, false, true }, // Barely fits into CL Image memory's width + + // Lhs Nt/T , Rhs T + { TensorShape(5U, 1U), TensorShape(5U, 3U), false, true, false }, // K should be multiple of 4 + { TensorShape(5U, 1U), TensorShape(5U, 14U), false, true, false }, // K should be multiple of 4 + { TensorShape(4U, 1U), TensorShape(4U, 10U), false, true, true }, + { TensorShape(8U, 1U), TensorShape(8U, 9U), false, true, true }, + { TensorShape(12U, 1U), TensorShape(12U, 6U), false, true, true }, + }; + + for(auto &tuple : shape_configurations) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + + const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32); + + const bool adj_lhs = std::get<2>(tuple); + const bool adj_rhs = std::get<3>(tuple); + + // We choose M0, N0, K0 equal to 4 so that they're always valid for CLImage in any combination + const MatMulKernelInfo matmul_kernel_info + { + adj_lhs, adj_rhs, 4, 4, 4, true /* export_rhs_to_cl_image */ + }; + + TensorInfo output_info; + Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, matmul_kernel_info); + + const bool expected = std::get<4>(tuple); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } +} + +TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>; + const std::vector<ShapeConfigurationTuple> shape_configurations = + { + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U), true }, + { TensorShape(10U, 12U), TensorShape(3U, 10U), TensorShape(3U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 5U), TensorShape(2U), false }, // Mismatch in the K dimension + { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension + { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), true }, + { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting + { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(1U), false }, // Unsupported bias broadcasting. + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U, 3U), false }, // 2D bias is unsupported. + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(6U), false }, // bias first dimension != dst first dimension + }; + + for(auto &tuple : shape_configurations) + { + const bool expected = std::get<3>(tuple); + + for(bool adj_lhs : + { + false, true + }) + { + for(bool adj_rhs : + { + false, true + }) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + TensorShape bia_shape = std::get<2>(tuple); + + if(adj_lhs) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + + if(adj_rhs) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32); + const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::F32); + TensorInfo output_info; + + MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ }; + + Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + } +} + +TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>; + const std::vector<DataTypeConfigurationTuple> data_type_configurations = + { + { DataType::F32, DataType::F32, DataType::F32, true }, + { DataType::F16, DataType::F16, DataType::F16, true }, + { DataType::F16, DataType::F32, DataType::F32, false }, // no mixed precision + { DataType::F64, DataType::F64, DataType::F64, false }, // no double precision + { DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false }, // no quantized types + { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false }, // no quantized types + { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types + { DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false }, // no quantized types + { DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false }, // no quantized types + { DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false }, // no quantized types + { DataType::S64, DataType::S64, DataType::S64, false }, // no integral types + { DataType::S32, DataType::S32, DataType::S32, false }, // no integral types + { DataType::S16, DataType::S16, DataType::S16, false }, // no integral types + { DataType::S8, DataType::S8, DataType::S8, false }, // no integral types + { DataType::U64, DataType::U64, DataType::U64, false }, // no integral types + { DataType::U32, DataType::U32, DataType::U32, false }, // no integral types + { DataType::U16, DataType::U16, DataType::U16, false }, // no integral types + { DataType::U8, DataType::U8, DataType::U8, false }, // no integral types + }; + + const TensorShape shape = TensorShape(10U, 10U); + const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false }; + for(auto &tuple : data_type_configurations) + { + const bool expected = std::get<3>(tuple); + + const TensorInfo lhs_info(shape, 1, std::get<0>(tuple)); + const TensorInfo rhs_info(shape, 1, std::get<1>(tuple)); + TensorInfo output_info(shape, 1, std::get<2>(tuple)); + + Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } +} + +TEST_SUITE_END() // Validate + +TEST_SUITE(Float) +TEST_SUITE(FP32) +TEST_SUITE(Buffer) +FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulKernelBiasFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_nt_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_t_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0 +// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels +FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 2 })), + framework::dataset::make("K0", { 2 })), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +TEST_SUITE_END() // Buffer + +TEST_SUITE(ExportRhsToCLImage) +FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { false })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 4, 8, 16 })), + framework::dataset::make("K0", { 2, 4 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { false })), + framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor + framework::dataset::make("N0", { 4, 8, 16 })), + framework::dataset::make("K0", { 1, 2, 3, 4 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 2, 4 })), + framework::dataset::make("K0", { 4, 8, 16 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true })), + framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor + framework::dataset::make("N0", { 1, 2, 3, 4 })), + framework::dataset::make("K0", { 4, 8, 16 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +TEST_SUITE_END() // ExportRhsToCLImage +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +TEST_SUITE(Buffer) +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_nt_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_t_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} +TEST_SUITE_END() // Buffer + +TEST_SUITE(ExportRhsToCLImage) +FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { false })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 4, 8, 16 })), + framework::dataset::make("K0", { 2, 4 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { false })), + framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor + framework::dataset::make("N0", { 4, 8, 16 })), + framework::dataset::make("K0", { 1, 2, 3, 4 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 2, 4 })), + framework::dataset::make("K0", { 4, 8, 16 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true })), + framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor + framework::dataset::make("N0", { 1, 2, 3, 4 })), + framework::dataset::make("K0", { 4, 8, 16 })), + framework::dataset::make("ExportRhsToCLImage", { true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_export_to_cl_image) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +TEST_SUITE_END() // ExportRhsToCLImage +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float +TEST_SUITE_END() // MatMulKernel +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/MatMulLowpNativeKernel.cpp b/tests/validation/CL/MatMulLowpNativeKernel.cpp new file mode 100644 index 0000000000..90eee4fb82 --- /dev/null +++ b/tests/validation/CL/MatMulLowpNativeKernel.cpp @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "src/gpu/cl/kernels/ClMatMulLowpNativeKernel.h" + +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/MatMulKernelFixture.h" +#include "tests/validation/reference/Permute.h" + +#include <tuple> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +} +template <typename T> +using CLMatMulLowpNativeKernelFixture = MatMulKernelValidationFixture<T, ClMatMulLowpNativeKernel>; + +template <typename T> +using CLMatMulLowpKernelWithBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulLowpNativeKernel>; + +/** M0 values to test --precommit*/ +const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 }); + +/** N0 values to test --precommit*/ +const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 }); + +/** K0 values to test --precommit*/ +const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 }); + +/** M0 values to test --nightly*/ +const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 }); +const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 }); + +/** N0 values to test --nightly*/ +const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 }); +const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 }); + +/** K0 values to test --nightly*/ +const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 }); +const auto k0_values_nightly_rhs_t = framework::dataset::make("K0", { 1, 2, 3, 4, 8 }); +const auto k0_values_nightly_lhs_t_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 }); + +TEST_SUITE(CL) +TEST_SUITE(MatMulLowpNativeKernel) +TEST_SUITE(Validate) + +TEST_CASE(SupportedKernelConfigurations, framework::DatasetMode::ALL) +{ + using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>; + + const std::vector<MatMulConfigurationPair> supported_block_sizes = + { + // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false) + // Lhs not-transposed, Rhs-not-transposed + { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0 + { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 7), false }, // K0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 9, 1, 2), true }, + { MatMulKernelInfo(false, false, 3, 16, 3), true }, + { MatMulKernelInfo(false, false, 7, 3, 4), true }, + { MatMulKernelInfo(false, false, 7, 3, 4, true), true }, // export to CLImage is unsupported for quantized types + }; + + // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal + // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here, + // not the shapes themselves. + const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::QASYMM8_SIGNED); + const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::QASYMM8_SIGNED); + + for(auto &pair : supported_block_sizes) + { + TensorInfo output_info; + Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first); + + ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS); + } +} + +TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>; + const std::vector<ShapeConfigurationTuple> shape_configurations = + { + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U), true }, + { TensorShape(10U, 12U), TensorShape(3U, 10U), TensorShape(3U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 5U), TensorShape(2U), false }, // Mismatch in the K dimension + { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension + { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), true }, + { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting + { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(1U), false }, // invalid broadcast of bias + { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U, 3U), false }, // 2d bias is invalid + }; + + for(auto &tuple : shape_configurations) + { + const bool expected = std::get<3>(tuple); + + for(bool adj_lhs : + { + false, true + }) + { + for(bool adj_rhs : + { + false, true + }) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + TensorShape bia_shape = std::get<2>(tuple); + + if(adj_lhs) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + + if(adj_rhs) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::QASYMM8_SIGNED); + const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::QASYMM8_SIGNED); + const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::S32); + TensorInfo output_info; + + MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ }; + + Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + } +} + +TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL) +{ + using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>; + const std::vector<DataTypeConfigurationTuple> data_type_configurations = + { + { DataType::F32, DataType::F32, DataType::F32, DataType::F32, false }, // no floating point types + { DataType::F16, DataType::F16, DataType::F16, DataType::F16, false }, // no floating point types + { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision + { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, true }, + { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, true }, + { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8, false }, // no mixed data types + { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false }, // no integral types + { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false }, // no integral types + { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false }, // no integral types + { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false }, // no integral types + { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false }, // no integral types + { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false }, // no integral types + { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false }, // no integral types + { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false }, // no integral types + { DataType::QASYMM8, DataType::QASYMM8, DataType::F32, DataType::QASYMM8, false } // Only S32 bias is supported + }; + + // It's enough to test a single shape and block size configuration while checking data types + const TensorShape shape = TensorShape(10U, 10U); + const TensorShape bia_shape = TensorShape(10U); + const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false }; + for(auto &tuple : data_type_configurations) + { + const bool expected = std::get<4>(tuple); + + const TensorInfo lhs_info(shape, 1, std::get<0>(tuple)); + const TensorInfo rhs_info(shape, 1, std::get<1>(tuple)); + const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple)); + TensorInfo output_info(shape, 1, std::get<3>(tuple)); + + Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } +} + +TEST_SUITE_END() // Validate + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true, false })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true, false })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpKernelWithBiasFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true, false })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_nt_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_t_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +// Running High Dimensional test is enough for qasymm8_signed, because we're stressing the number of dimensions, not data type or M0/N0/K0 +// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels +FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true, false })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 2 })), + framework::dataset::make("K0", { 2 })), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true, false })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { true, false })), + framework::dataset::make("TransposeB", { true, false })), + m0_values_precommit), + n0_values_precommit), + k0_values_precommit), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_nt_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_values_nightly_lhs_t_rhs_nt), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_values_nightly_rhs_t), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // MatMulLowpNativeKernel +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp b/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp new file mode 100644 index 0000000000..ac46b67c9e --- /dev/null +++ b/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.h" + +#include "tests/datasets/MatMulLowpMMULDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/MatMulKernelFixture.h" +#include "tests/validation/reference/Permute.h" + +#include <tuple> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +} +using framework::dataset::make; + +template <typename T> +using CLMatMulLowpNativeMMULKernelFixture = MatMulKernelValidationFixture<T, ClMatMulLowpNativeMMULKernel, true /* use_mmul */>; + +template <typename T> +using CLMatMulLowpNativeMMULKernelWithBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulLowpNativeMMULKernel, true /* use_mmul */>; + +/** M0 values to test --precommit*/ +const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 }); + +/** N0 values to test --precommit*/ +const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 }); + +/** M0 values to test --nightly*/ +const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 2, 4, 5, 8 }); +const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 2, 4, 8 }); + +/** N0 values to test --nightly*/ +const auto n0_values_nightly = framework::dataset::make("N0", { 1, 3, 8, 16 }); + +TEST_SUITE(CL) +TEST_SUITE(MatMulLowpNativeMMULKernel) +TEST_SUITE(Validate) + +TEST_CASE(SupportedKernelConfigurations, framework::DatasetMode::ALL) +{ + using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>; + + const std::vector<MatMulConfigurationPair> supported_block_sizes = + { + // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false) + { MatMulKernelInfo(false, false, 0, 1, 4), false }, // M0 should be > 0 + { MatMulKernelInfo(false, true, 3, 5, 4), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 6, 4), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 8), false }, // K0 not in 4 + { MatMulKernelInfo(true, false, 5, 3, 4), false }, // M0 not in {1, 2, 3, 4, 8, 16} when Lhs is transposed + { MatMulKernelInfo(false, false, 9, 1, 4), true }, + { MatMulKernelInfo(false, true, 3, 16, 4), true }, + { MatMulKernelInfo(false, false, 7, 3, 4), true }, + { MatMulKernelInfo(true, false, 8, 3, 4), true }, + { MatMulKernelInfo(true, true, 4, 3, 4), true }, + { MatMulKernelInfo(false, false, 7, 3, 4, true), false }, // export to CLImage is unsupported for quantized types + }; + + // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal + // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here, + // not the shapes themselves. + const TensorInfo lhs_info = TensorInfo(TensorShape(64U, 64U), 1, DataType::QASYMM8_SIGNED); + const TensorInfo rhs_info = TensorInfo(TensorShape(64U, 64U), 1, DataType::QASYMM8_SIGNED); + + for(auto &pair : supported_block_sizes) + { + TensorInfo output_info; + Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first); + const bool expected = (pair.second && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())); + + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } +} + +TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>; + const std::vector<ShapeConfigurationTuple> shape_configurations = + { + { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(3U), true }, + { TensorShape(16U, 12U), TensorShape(3U, 16U), TensorShape(3U), true }, + { TensorShape(64U, 4U), TensorShape(2U, 64U), TensorShape(2U), true }, + { TensorShape(16U, 4U), TensorShape(2U, 32U), TensorShape(2U), false }, // Mismatch in the K dimension + { TensorShape(16U, 0U), TensorShape(2U, 16U), TensorShape(2U), false }, // Invalid dimension + { TensorShape(32U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), true }, + { TensorShape(32U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting + { TensorShape(32U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension + { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(1U), false }, // invalid broadcast of bias + { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(3U, 3U), false }, // 2d bias is invalid + { TensorShape(12U, 12U), TensorShape(3U, 12U), TensorShape(3U), false }, // K must be multiple of 16 + }; + + for(auto &tuple : shape_configurations) + { + const bool expected = (std::get<3>(tuple) && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())); + + for(bool adj_lhs : + { + false, true + }) + { + for(bool adj_rhs : + { + false, true + }) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + TensorShape bia_shape = std::get<2>(tuple); + + if(adj_lhs) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + + if(adj_rhs) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::QASYMM8_SIGNED); + const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::QASYMM8_SIGNED); + const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::S32); + TensorInfo output_info; + + MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 4, false /* export_rhs_to_cl_image */ }; + + Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + } +} + +TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL) +{ + using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>; + const std::vector<DataTypeConfigurationTuple> data_type_configurations = + { + { DataType::F32, DataType::F32, DataType::F32, DataType::F32, false }, // no floating point types + { DataType::F16, DataType::F16, DataType::F16, DataType::F16, false }, // no floating point types + { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision + { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, true }, + { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, true }, + { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false }, // only qasymm8/qasymm8_signed is supported + { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8, false }, // no mixed data types + { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false }, // no integral types + { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false }, // no integral types + { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false }, // no integral types + { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false }, // no integral types + { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false }, // no integral types + { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false }, // no integral types + { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false }, // no integral types + { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false }, // no integral types + { DataType::QASYMM8, DataType::QASYMM8, DataType::F32, DataType::QASYMM8, false } // Only S32 bias is supported + }; + + // It's enough to test a single shape and block size configuration while checking data types + const TensorShape shape = TensorShape(48U, 48U); + const TensorShape bia_shape = TensorShape(48U); + const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 4, false }; + for(auto &tuple : data_type_configurations) + { + const bool expected = (std::get<4>(tuple) && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())); + + const TensorInfo lhs_info(shape, 1, std::get<0>(tuple)); + const TensorInfo rhs_info(shape, 1, std::get<1>(tuple)); + const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple)); + TensorInfo output_info(shape, 1, std::get<3>(tuple)); + + Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } +} + +TEST_SUITE_END() // Validate + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8_SIGNED) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeMMULKernelFixture<int8_t>, + framework::DatasetMode::ALL, + combine(datasets::SmallMatMulLowpMMULDataset(), + make("TransposeA", { false, true }), + make("TransposeB", { false, true }), + m0_values_precommit, + n0_values_precommit, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8_SIGNED))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpNativeMMULKernelWithBiasFixture<int8_t>, + framework::DatasetMode::ALL, + combine(datasets::SmallMatMulLowpMMULWithBiasDataset(), + make("TransposeA", { false, true }), + make("TransposeB", { false, true }), + m0_values_precommit, + n0_values_precommit, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8_SIGNED))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +FIXTURE_DATA_TEST_CASE(RunLargeLhsNotTransposed, CLMatMulLowpNativeMMULKernelFixture<int8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulLowpMMULDataset(), + make("TransposeA", { false }), + make("TransposeB", { false, true }), + m0_values_nightly_lhs_nt, + n0_values_nightly, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8_SIGNED))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeMMULKernelFixture<int8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulLowpMMULDataset(), + make("TransposeA", { true }), + make("TransposeB", { false, true }), + m0_values_nightly_lhs_t, + n0_values_nightly, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8_SIGNED))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +// Running High Dimensional test is enough for qasymm8_signed, because we're stressing the number of dimensions, not data type or M0/N0/K0 +// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels +FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulLowpNativeMMULKernelFixture<int8_t>, + framework::DatasetMode::ALL, + combine(datasets::HighDimensionalMatMulLowpMMULDataset(), + make("TransposeA", { false, true }), + make("TransposeB", { false, true }), + make("M0", { 2 }), + make("N0", { 2 }), + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8_SIGNED))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeMMULKernelFixture<uint8_t>, + framework::DatasetMode::ALL, + combine(datasets::SmallMatMulLowpMMULDatasetSubset(), + make("TransposeA", { false, true }), + make("TransposeB", { false, true }), + m0_values_precommit, + n0_values_precommit, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpNativeMMULKernelWithBiasFixture<uint8_t>, + framework::DatasetMode::ALL, + combine(datasets::SmallMatMulLowpMMULWithBiasDataset(), + make("TransposeA", { false, true }), + make("TransposeB", { false, true }), + m0_values_precommit, + n0_values_precommit, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +FIXTURE_DATA_TEST_CASE(RunLargeLhsNotTransposed, CLMatMulLowpNativeMMULKernelFixture<uint8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulLowpMMULDataset(), + make("TransposeA", { false }), + make("TransposeB", { false, true }), + m0_values_nightly_lhs_nt, + n0_values_nightly, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeMMULKernelFixture<uint8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulLowpMMULDataset(), + make("TransposeA", { true }), + make("TransposeB", { false, true }), + m0_values_nightly_lhs_t, + n0_values_nightly, + make("K0", { 4 }), + make("ExportRhsToCLImage", { false }), + make("DataType", DataType::QASYMM8))) +{ + if(_device_supports_mmul) + { + // Validate output + validate(CLAccessor(_target), _reference, tolerance_quant); + } +} + +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // MatMulLowpNativeMMULKernel +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/MatMulNativeMMULKernel.cpp b/tests/validation/CL/MatMulNativeMMULKernel.cpp new file mode 100644 index 0000000000..655dd354dc --- /dev/null +++ b/tests/validation/CL/MatMulNativeMMULKernel.cpp @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h" +#include "tests/datasets/LargeMatMulMMULDataset.h" +#include "tests/datasets/SmallMatMulMMULDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/MatMulKernelFixture.h" +#include "tests/validation/reference/Permute.h" + +#include <tuple> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +constexpr float abs_tolerance_f32( + 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */ +constexpr float abs_tolerance_f16( + 0.02f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */ +RelativeTolerance<half_float::half> tolerance_f16(half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +} // namespace + +/** M0 values to test --precommit*/ +const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 }); + +/** N0 values to test --precommit*/ +const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 }); + +/** M0 values to test --nightly*/ +const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 }); +const auto m0_values_nightly_lhs_t = framework::dataset::make("M0", { 1, 2, 3, 4, 8 }); + +/** N0 values to test --nightly*/ +const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 }); +const auto n0_values_nightly_rhs_t = framework::dataset::make("N0", { 1, 2, 3, 4, 8 }); + +/** K0 value -- Fixed to 1 */ +const auto k0_value = framework::dataset::make("K0", { 1 }); + +template <typename T> +using CLMatMulNativeMMULKernelFixture = MatMulKernelValidationFixture<T, ClMatMulNativeMMULKernel, true /*use_mmul*/>; + +template <typename T> +using CLMatMulKernelBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulNativeMMULKernel, true /*use_mmul*/>; + +TEST_SUITE(CL) +TEST_SUITE(MatMulNativeMMULKernel) +TEST_SUITE(Validate) + +TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL) +{ + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>; + + const std::vector<MatMulConfigurationPair> supported_block_sizes = + { + // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false) + // Lhs not-transposed, Rhs not-transposed + { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0 + { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(false, false, 3, 3, 4), false }, // K0 not 1 + { MatMulKernelInfo(false, false, 9, 1, 1), true }, + { MatMulKernelInfo(false, false, 3, 16, 1), true }, + { MatMulKernelInfo(false, false, 7, 3, 1), true }, + + // Lhs transposed, Rhs not-transposed + { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 6, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 5, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, false, 2, 2, 2), false }, // K0 is not 1 + { MatMulKernelInfo(true, false, 4, 1, 1), true }, + { MatMulKernelInfo(true, false, 3, 3, 1), true }, + { MatMulKernelInfo(true, false, 2, 4, 1), true }, + + // Lhs not-transposed, Rhs not-transposed + { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8} + { MatMulKernelInfo(false, true, 2, 17, 1), false }, // N0 not in {1, 2, 3, 4, 8} + { MatMulKernelInfo(false, true, 4, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8} + { MatMulKernelInfo(false, true, 4, 4, 7), false }, // K0 is not 1 + { MatMulKernelInfo(false, true, 4, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8} + { MatMulKernelInfo(false, true, 3, 8, 1), true }, + { MatMulKernelInfo(false, true, 8, 16, 1), true }, + { MatMulKernelInfo(false, true, 2, 4, 1), true }, + + // Lhs transposed, Rhs transposed + { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 3, 7, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 6, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 5, 3, 1), false }, // M0 not in {1, 2, 3, 4, 8, 16} + { MatMulKernelInfo(true, true, 4, 8, 2), false }, // K0 is not 1 + { MatMulKernelInfo(true, true, 4, 8, 1), true }, + { MatMulKernelInfo(true, true, 3, 3, 1), true }, + { MatMulKernelInfo(true, true, 16, 4, 1), true }, + }; + + // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal + // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here, + // not the shapes themselves. + const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32); + + for(auto &pair : supported_block_sizes) + { + TensorInfo output_info; + Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first); + ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS); + } + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL) +{ + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>; // lhs, rhs, bias, result + const std::vector<ShapeConfigurationTuple> shape_configurations = + { + { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(3U), true }, + { TensorShape(12U, 12U), TensorShape(3U, 12U), TensorShape(3U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true }, + { TensorShape(8U, 4U), TensorShape(2U, 4U), TensorShape(2U), false }, // Mismatch in the K dimension + { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension + { TensorShape(5U, 7U), TensorShape(2U, 5U), TensorShape(2U), false }, // K not a multiple of 4 (MMUL_K0) + { TensorShape(8U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 8U, 3U, 4U, 5U, 6U), TensorShape(2U), true }, + { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // No batch broadcasting + { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // Mismatch in batch dimension + { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(1U), false }, // Bias first dimensions != dst first dimension. + { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(5U, 6U), false }, // Bias is 2d which is invalid. + }; + + for(auto &tuple : shape_configurations) + { + const bool expected = std::get<3>(tuple); + + for(bool adj_lhs : + { + false, true + }) + { + for(bool adj_rhs : + { + false, true + }) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + TensorShape bia_shape = std::get<2>(tuple); + + if(adj_lhs) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + + if(adj_rhs) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32); + const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32); + const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::F32); + TensorInfo output_info; + + MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ }; + + Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + } + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL) +{ + if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device())) + { + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>; + const std::vector<DataTypeConfigurationTuple> data_type_configurations = + { + { DataType::F32, DataType::F32, DataType::F32, DataType::F32, true }, + { DataType::F16, DataType::F16, DataType::F16, DataType::F16, true }, + { DataType::F32, DataType::F32, DataType::F32, DataType::F32, true }, + { DataType::F32, DataType::F32, DataType::F16, DataType::F32, false }, // incorrect bias type + { DataType::F16, DataType::F32, DataType::F32, DataType::F32, false }, // no mixed precision + { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision + { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, false }, // no quantized types + { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, false }, // no quantized types + { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types + { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false }, // no quantized types + { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false }, // no quantized types + { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false }, // no quantized types + { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false }, // no integral types + { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false }, // no integral types + { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false }, // no integral types + { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false }, // no integral types + { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false }, // no integral types + { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false }, // no integral types + { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false }, // no integral types + { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false }, // no integral types + }; + + const TensorShape shape = TensorShape(8U, 8U); + const TensorShape bia_shape = TensorShape(8U); + const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false }; + for(auto &tuple : data_type_configurations) + { + const bool expected = std::get<4>(tuple); + + const TensorInfo lhs_info(shape, 1, std::get<0>(tuple)); + const TensorInfo rhs_info(shape, 1, std::get<1>(tuple)); + const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple)); + TensorInfo output_info(shape, 1, std::get<3>(tuple)); + + Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + else + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +TEST_SUITE_END() // Validate + +TEST_SUITE(Float) +TEST_SUITE(FP32) +TEST_SUITE(Buffer) +FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulKernelBiasFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} + +FIXTURE_DATA_TEST_CASE(RunLargeRhsTranspose, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0 +// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels +FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("M0", { 2 })), + framework::dataset::make("N0", { 2 })), + framework::dataset::make("K0", { 1 })), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); + } +} +TEST_SUITE_END() // Buffer + +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +TEST_SUITE(Buffer) +FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + m0_values_precommit), + n0_values_precommit), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_nt), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeRhsTranspose, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { false })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_nt), + n0_values_nightly_rhs_t), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { false })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_nt), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(), + framework::dataset::make("TransposeA", { true })), + framework::dataset::make("TransposeB", { true })), + m0_values_nightly_lhs_t), + n0_values_nightly_rhs_t), + k0_value), + framework::dataset::make("ExportRhsToCLImage", { false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + // Validate output + if(_device_supports_mmul) + { + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + } +} +TEST_SUITE_END() // Buffer + +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float +TEST_SUITE_END() // MatMulNativeMMULKernel +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/MaxUnpoolingLayer.cpp b/tests/validation/CL/MaxUnpoolingLayer.cpp index 6cba8b8bd5..cf4fcdda70 100644 --- a/tests/validation/CL/MaxUnpoolingLayer.cpp +++ b/tests/validation/CL/MaxUnpoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,20 +51,19 @@ const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(framework::datase TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall, framework::dataset::make("DataType", DataType::F32))), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }) )) { - printf("validate\n"); // Validate output validate(CLAccessor(_target), _reference); } TEST_SUITE_END() // FP32 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall, framework::dataset::make("DataType", DataType::F16))), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }) diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp index e77a21ed7f..cdeb622130 100644 --- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -78,7 +78,7 @@ TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), - framework::dataset::make("Epsilon", { 1e-8 }))) + framework::dataset::make("Epsilon", { 1e-3 }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); diff --git a/tests/validation/CL/NegLayer.cpp b/tests/validation/CL/NegLayer.cpp index 690b8f44fb..c93e31dca9 100644 --- a/tests/validation/CL/NegLayer.cpp +++ b/tests/validation/CL/NegLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -50,6 +50,15 @@ TEST_SUITE(NegLayer) template <typename T> using CLNegLayerFixture = NegValidationFixture<CLTensor, CLAccessor, CLNegLayer, T>; +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLNegLayerFixture<int>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S32))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLNegLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp index 1aed2786ff..8c1890842b 100644 --- a/tests/validation/CL/NormalizationLayer.cpp +++ b/tests/validation/CL/NormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -62,33 +62,28 @@ const auto NormalizationDatasetFP32 = combine(combine(combine(framework::dataset TEST_SUITE(CL) TEST_SUITE(NormalizationLayer) -//TODO(COMPMID-415): Missing configuration? - // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/output TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Even normalization - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non implemented IN_MAP_2D - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Windows shrinking for NCHW TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), })), framework::dataset::make("NormInfo", { NormalizationLayerInfo(NormType::IN_MAP_1D, 5), NormalizationLayerInfo(NormType::IN_MAP_1D, 5), NormalizationLayerInfo(NormType::IN_MAP_1D, 4), NormalizationLayerInfo(NormType::IN_MAP_2D, 5), - NormalizationLayerInfo(NormType::IN_MAP_1D, 5), NormalizationLayerInfo(NormType::CROSS_MAP, 5), })), - framework::dataset::make("Expected", { false, false, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, false, true })), input_info, output_info, norm_info, expected) { ARM_COMPUTE_EXPECT(bool(CLNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), norm_info)) == expected, framework::LogLevel::ERRORS); diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp index 82f3e4f806..f3f1c8b1b8 100644 --- a/tests/validation/CL/PReluLayer.cpp +++ b/tests/validation/CL/PReluLayer.cpp @@ -56,7 +56,7 @@ const auto PReluLayerQASYMM8Dataset = combine(combine(framework::dataset::make(" const auto PReluLayerQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); -const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), +const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); @@ -71,21 +71,18 @@ TEST_SUITE(PReluLayer) // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false})), + framework::dataset::make("Expected", { true, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLPReluLayer::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS); @@ -200,6 +197,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerFixture<int16_t>, framework::Datase // Validate output validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunOneDimensional, CLPReluLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(framework::dataset::make("Shape", TensorShape(1U, 16U)), PReluLayerS16Dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} TEST_SUITE_END() TEST_SUITE(Float) diff --git a/tests/validation/CL/PadLayer.cpp b/tests/validation/CL/PadLayer.cpp index 370195b078..ea0cb32785 100644 --- a/tests/validation/CL/PadLayer.cpp +++ b/tests/validation/CL/PadLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/graph/Utils.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLPadLayer.h" +#include "src/graph/mutators/MutatorUtils.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" #include "tests/datasets/ShapeDatasets.h" @@ -110,6 +112,63 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS); } +DATA_TEST_CASE(CheckFusingWithConvolution, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("DataLayout", { DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NCHW, + DataLayout::NHWC, + DataLayout::NHWC, + DataLayout::NHWC, + DataLayout::NHWC, + DataLayout::NHWC, + DataLayout::NHWC, + DataLayout::NHWC, + DataLayout::UNKNOWN + }), + framework::dataset::make("PaddingList", { PaddingList({{0, 0}, {1, 1}, {1, 1}}), // nchw + PaddingList({{1, 1}, {1, 1}, {0, 0}, {0, 0}}), + PaddingList({{1, 1}, {1, 1}}), + PaddingList({}), + PaddingList({{0, 0}}), + PaddingList({{0, 0}, {0, 0}, {0, 0}, {0, 0}}), + PaddingList({{0, 0}, {0, 0}, {0, 0}, {1, 0}}), + PaddingList({{0, 1}}), + PaddingList({{0, 0}, {1, 1}, {1, 1}}), // nhwc + PaddingList({{0, 0}, {0, 0}, {1, 1}, {1, 1}}), + PaddingList({{0, 0}, {1, 0}, {1, 1}, {0, 0}}), + PaddingList({}), + PaddingList({{0, 0}}), + PaddingList({{0, 1}}), + PaddingList({{0, 0}, {1, 1}}), + PaddingList({{0, 0}}) + })), // unknown + framework::dataset::make("Expected", { false, // nchw + true, + true, + true, + true, + true, + false, + true, + true, // nhwc + false, + true, + true, + true, + false, + true, + false // unknown + })), + data_layout, padding_list, expected) +{ + ARM_COMPUTE_EXPECT(expected == arm_compute::graph::is_padding_in_height_or_width(data_layout, padding_list), framework::LogLevel::ERRORS); +} + // clang-format on // *INDENT-ON* diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp index 70e618efa1..62ff15a37f 100644 --- a/tests/validation/CL/PixelWiseMultiplication.cpp +++ b/tests/validation/CL/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,9 @@ namespace test { namespace validation { +/** Synced with tests/validation/dynamic_fusion/gpu/cl/Mul.cpp from the dynamic fusion interface. + * Please check there for any differences in the coverage + */ namespace { namespace @@ -50,9 +53,6 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f) }); -// Since in-place computation on CL-side hasn't been intended to be implemented, they are not tested. -// However, this dataset is required for the shared fixture and it would make extension easier when -// CL-side also starts supporting in-place computation. const auto InPlaceDataSet = framework::dataset::make("InPlace", { false }); } //namespace // *INDENT-OFF* @@ -81,7 +81,9 @@ using CLPixelWiseMultiplicationToF16Fixture = PixelWiseMultiplicationValidationF template <typename T> using CLPixelWiseMultiplicationToF32Fixture = PixelWiseMultiplicationValidationFloatFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>; template <typename T> -using CLPixelWiseMultiplicationBroadcastFixture = PixelWiseMultiplicationBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>; +using CLPixelWiseMultiplicationToF32BroadcastFixture = PixelWiseMultiplicationBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>; +template <typename T> +using CLPixelWiseMultiplicationIntegerFixture = PixelWiseMultiplicationValidationIntegerFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, int>; TEST_SUITE(CL) TEST_SUITE(PixelWiseMultiplication) @@ -91,27 +93,24 @@ TEST_SUITE(PixelWiseMultiplication) DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid scale TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Scale",{ 2.f, 2.f, 2.f, -1.f, 1.f, 1.f})), - framework::dataset::make("Expected", { true, true, false, false, false, false})), + framework::dataset::make("Scale",{ 2.f, 2.f, -1.f, 1.f, 1.f})), + framework::dataset::make("Expected", { true, true, false, false, false})), input1_info, input2_info, output_info, scale, expected) { bool has_error = bool(CLPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO)); @@ -119,6 +118,33 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( } // clang-format on // *INDENT-ON* +TEST_SUITE(INT32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationIntegerFixture<int>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType1", DataType::S32)), + framework::dataset::make("DataType2", DataType::S32)), + framework::dataset::make("Scale", { 1.f })), + datasets::ConvertPolicies()), + framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)), + EmptyActivationFunctionsDataset), + InPlaceDataSet)) +{ + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationIntegerFixture<int>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapes(), + framework::dataset::make("DataType1", DataType::S32)), + framework::dataset::make("DataType2", DataType::S32)), + framework::dataset::make("Scale", { 1.f })), + datasets::ConvertPolicies()), + framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)), + EmptyActivationFunctionsDataset), + framework::dataset::make("InPlace", { true }))) +{ + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() TEST_SUITE(F16toF16) TEST_SUITE(Scale255) @@ -133,18 +159,36 @@ TEST_SUITE(F32toF32) TEST_SUITE(Scale255) PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_255, TO_NEAREST_UP, EmptyActivationFunctionsDataset, VALIDATE(float, 1.f)) PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivation, ToF32Fixture<float>, ALL, TinyShapes(), F32, F32, scale_255, TO_NEAREST_UP, ActivationFunctionsDataset, VALIDATE(float, 1.f)) +FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationToF32Fixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapes(), + framework::dataset::make("DataTypeIn1", DataType::F32)), + framework::dataset::make("DataTypeIn2", DataType::F32)), + framework::dataset::make("Scale", { scale_255 })), + datasets::ConvertPolicies()), + framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)), + EmptyActivationFunctionsDataset), + framework::dataset::make("InPlace", { true }))) +{ + // Validate output + VALIDATE(float, 1.f) +} TEST_SUITE_END() // Scale255 TEST_SUITE_END() // F32toF32 -PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, BroadcastFixture<float>, PRECOMMIT, SmallShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, EmptyActivationFunctionsDataset, +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, ToF32BroadcastFixture<float>, PRECOMMIT, SmallShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, + EmptyActivationFunctionsDataset, VALIDATE(float, 1.f)) -PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivationSmallBroadcast, BroadcastFixture<float>, ALL, TinyShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, ActivationFunctionsDataset, +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivationSmallBroadcast, ToF32BroadcastFixture<float>, ALL, TinyShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, + ActivationFunctionsDataset, VALIDATE(float, 1.f)) template <typename T> using CLPixelWiseMultiplicationQuantizedFixture = PixelWiseMultiplicationValidationQuantizedFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, T>; using CLPixelWiseMultiplicationQSYMM16ToS32Fxture = PixelWiseMultiplicationValidationQuantizedFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, int16_t, int16_t, int32_t>; +template <typename T> +using CLPixelWiseMultiplicationQuantizedBroadcastFixture = PixelWiseMultiplicationBroadcastValidationQuantizedFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, T>; + TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, @@ -163,6 +207,41 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<uint8 // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLPixelWiseMultiplicationQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(), + framework::dataset::make("DataTypeIn1", DataType::QASYMM8)), + framework::dataset::make("DataTypeIn2", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::QASYMM8)), + framework::dataset::make("Scale", { 1.f, 2.f })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)), + framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("OUtQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + InPlaceDataSet)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(), + framework::dataset::make("DataTypeIn1", DataType::QASYMM8)), + framework::dataset::make("DataTypeIn2", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::QASYMM8)), + framework::dataset::make("Scale", { 1.f, 2.f })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)), + framework::dataset::make("Src0QInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("OUtQInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("InPlace", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) diff --git a/tests/validation/CL/Pooling3dLayer.cpp b/tests/validation/CL/Pooling3dLayer.cpp new file mode 100644 index 0000000000..84d630e6cf --- /dev/null +++ b/tests/validation/CL/Pooling3dLayer.cpp @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/TensorShape.h" +#include "tests/framework/datasets/Datasets.h" + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLPooling3dLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/Pooling3dLayerDataset.h" +#include "tests/datasets/PoolingTypesDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/Pooling3dLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Input data sets for floating-point data types */ +const auto Pooling3dLayerDatasetFP = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })), + framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })), + framework::dataset::make("Padding", { Padding3D(0, 1, 0), Padding3D(1, 1, 1) })), + framework::dataset::make("ExcludePadding", { true, false })); + +const auto Pooling3dLayerDatasetFPSmall = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 2, 2), Size3D(3, 3, 3) })), + framework::dataset::make("Stride", { Size3D(2, 2, 2), Size3D(2, 1, 1) })), + framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })), + framework::dataset::make("ExcludePadding", { true, false })); + +const auto Pooling3DLayerDatasetQuantized = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })), + framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(1, 1, 2), Size3D(2, 2, 1)})), + framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })), + framework::dataset::make("ExcludePadding", { true })); + +using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>; + +constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ +constexpr AbsoluteTolerance<float> tolerance_f16(0.1f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8_SIGNED integer datatype*/ +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */ + +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(Pooling3dLayer) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Mismatching data type + TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination + TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination + TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output shape + TensorInfo(TensorShape(5U, 13U, 15U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global Pooling + TensorInfo(TensorShape(13U,13U, 5U, 1U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data type + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 5U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(1U, 16U, 1U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), + }), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(2U, 25U, 11U, 3U, 3U), 1, DataType::F16, DataLayout::NDHWC), + TensorInfo(TensorShape(2U, 30U, 11U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(2U, 25U, 16U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(2U, 27U, 13U, 3U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 1U, 1U, 1U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global pooling applied + TensorInfo(TensorShape(5U, 2U, 2U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling + TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::QASYMM8, DataLayout::NDHWC), // Invalid data type + TensorInfo(TensorShape(5U, 1U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(1U, 15U, 1U, 2U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Output width larger than input + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), + })), + framework::dataset::make("PoolInfo", { Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(2, 0, 0)), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(0, 0, 0)), + Pooling3dLayerInfo(PoolingType::L2, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)), + Pooling3dLayerInfo(PoolingType::AVG), + Pooling3dLayerInfo(PoolingType::MAX), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG), + Pooling3dLayerInfo(PoolingType::MAX, 2, Size3D(1, 1, 2), Padding3D(0, 0, 0), false), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(2U, 2U, 2U), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), true), // Pool size is smaller than the padding size with padding excluded + Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), false), // Pool size is smaller than the padding size with padding included + Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(2U, 2U, 2U), Padding3D(2,1,2,2,1,2), false, false, DimensionRoundingType::CEIL), // CEIL with asymmetric Padding + })), + framework::dataset::make("Expected", { false, false, false, false, true, false, false, false, true , false, true, false, false, false})), + input_info, output_info, pool_info, expected) +{ + ARM_COMPUTE_EXPECT(bool(CLPooling3dLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS); +} + + +template <typename T> +using CLPooling3dLayerFixture = Pooling3dLayerValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>; + +template <typename T> +using CLSpecialPooling3dLayerFixture = SpecialPooling3dLayerValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>; + +template <typename T> +using CLPooling3dLayerGlobalFixture = Pooling3dLayerGlobalValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>; + +template <typename T> +using CLPooling3dLayerQuantizedFixture = Pooling3dLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>; + +// clang-format on +// *INDENT-ON* +TEST_SUITE(QUANTIZED) + +TEST_SUITE(QASYMM8) +// Small Dataset Quantized Dataset +FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(), + combine(Pooling3DLayerDatasetQuantized, + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, 10), QuantizationInfo(1.f / 127.f, 10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, 5), QuantizationInfo(1.f / 127.f, 10) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +// Large Dataset Quantized Dataset +FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(), + combine(Pooling3DLayerDatasetQuantized, + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, 10), QuantizationInfo(1.f / 127.f, 10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, 5), QuantizationInfo(1.f / 127.f, 10) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() + +TEST_SUITE(QASYMM8_SIGNED) + +// Large Dataset Quantized Dataset Signed +FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(), + combine(Pooling3DLayerDatasetQuantized, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed); +} + +// Large Dataset Quantized pooling test +FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(), + combine(Pooling3DLayerDatasetQuantized, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed); +} + +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE(Float) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPooling3dLayerFixture<float>, framework::DatasetMode::ALL, datasets::Pooling3dLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5dShapes(), combine(Pooling3dLayerDatasetFPSmall, + framework::dataset::make("DataType", DataType::F32)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP, + framework::dataset::make("DataType", DataType::F32)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U), + TensorShape(4U, 27U, 13U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallGlobal, CLPooling3dLayerGlobalFixture<float>, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U), + TensorShape(27U, 13U, 4U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U), + TensorShape(4U, 79U, 37U, 11U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall, + framework::dataset::make("DataType", DataType::F16)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP, + framework::dataset::make("DataType", DataType::F16)))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U), + TensorShape(4U, 27U, 13U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunSmallGlobal, CLPooling3dLayerGlobalFixture<half>, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U), + TensorShape(27U, 13U, 4U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U), + TensorShape(4U, 79U, 37U, 11U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float +TEST_SUITE_END() // Pooling3dLayer +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp index de5c9f2e8d..9fe28c7acf 100644 --- a/tests/validation/CL/PoolingLayer.cpp +++ b/tests/validation/CL/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -101,6 +101,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), // Invalid output Global Pooling TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::QASYMM8), TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(1U, 16U, 1U), 1, DataType::F32), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), @@ -110,6 +111,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32), TensorInfo(TensorShape(12U, 12U, 5U), 1, DataType::QASYMM8), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(1U, 15U, 1U), 1, DataType::F32), })), framework::dataset::make("PoolInfo", { PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NCHW, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NCHW, PadStrideInfo(1, 1, 2, 0)), @@ -119,8 +121,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( PoolingLayerInfo(PoolingType::MAX, DataLayout::NCHW), PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NHWC, PadStrideInfo(), false), PoolingLayerInfo(PoolingType::AVG, DataLayout::NCHW), + PoolingLayerInfo(PoolingType::MAX, 2, DataLayout::NHWC, PadStrideInfo(1, 1, 0, 0), false), })), - framework::dataset::make("Expected", { false, false, false, false, true, false, true, true })), + framework::dataset::make("Expected", { false, false, false, false, true, false, true, true , false})), input_info, output_info, pool_info, expected) { ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS); @@ -131,6 +134,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( template <typename T> using CLPoolingLayerFixture = PoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>; +template <typename T> +using CLPoolingLayerMixedDataLayoutFixture = PoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T, true>; template <typename T> using CLSpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>; @@ -148,7 +153,7 @@ FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPoolingLayerFixture<float>, framewor // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F32))), pool_data_layout_dataset)) @@ -156,6 +161,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::Datase // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), + combine(combine(combine(combine(datasets::PoolingTypes(), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })), + framework::dataset::make("ExcludePadding", { false })), + framework::dataset::make("DataType", DataType::F32))), + pool_data_layout_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", DataType::F32))), @@ -165,21 +181,61 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::Datase validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPIndicesSmall, - framework::dataset::make("DataType", - DataType::F32))), - pool_data_layout_dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(PoolingLayerDatasetFPIndicesSmall, + framework::dataset::make("DataType", + DataType::F32))), + pool_data_layout_dataset),framework::dataset::make("UseKernelIndices", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); validate(CLAccessor(_target_indices), _ref_indices); } +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(27, 13) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(79, 37) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling + TEST_SUITE_END() // FP32 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLMixedPrecesionPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall, - framework::dataset::make("DataType", DataType::F16))), +FIXTURE_DATA_TEST_CASE(RunSmall, CLMixedPrecesionPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(PoolingLayerDatasetFPSmall, + framework::dataset::make("DataType", DataType::F16))), pool_data_layout_dataset), pool_fp_mixed_precision_dataset)) { @@ -194,15 +250,55 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMixedPrecesionPoolingLayerFixture<half>, fram // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPIndicesSmall, - framework::dataset::make("DataType", - DataType::F16))), - pool_data_layout_dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(PoolingLayerDatasetFPIndicesSmall, + framework::dataset::make("DataType", + DataType::F16))), + pool_data_layout_dataset), framework::dataset::make("UseKernelIndices", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); validate(CLAccessor(_target_indices), _ref_indices); } + +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(27, 13) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(79, 37) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling + TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float @@ -210,9 +306,11 @@ TEST_SUITE(Quantized) template <typename T> using CLPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T>; +template <typename T> +using CLPoolingLayerQuantizedMixedDataLayoutFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T, true>; TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetQASYMM8Small, framework::dataset::make("DataType", DataType::QASYMM8))), pool_data_layout_dataset), @@ -222,10 +320,23 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framew // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), + framework::dataset::make("ExcludePadding", { true })), + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 255.f, 10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 255.f, 5) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetQASYMM8Small, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), pool_data_layout_dataset), @@ -235,6 +346,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framewo // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8_s); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), + framework::dataset::make("ExcludePadding", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8_s); +} TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() // PoolingLayer diff --git a/tests/validation/CL/ROIPoolingLayer.cpp b/tests/validation/CL/ROIPoolingLayer.cpp new file mode 100644 index 0000000000..eb16c1baec --- /dev/null +++ b/tests/validation/CL/ROIPoolingLayer.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLROIPoolingLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/Globals.h" +#include "tests/datasets/ROIDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ROIPoolingLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> relative_tolerance_f32(0.01f); +AbsoluteTolerance<float> absolute_tolerance_f32(0.001f); + +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); +} // end namespace + +TEST_SUITE(CL) +TEST_SUITE(RoiPooling) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Successful test + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::QASYMM8), // Successful test (quantized) + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Incorrect rois type + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching depth size input/output + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching number of rois and output batch size + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Invalid number of values per ROIS + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching height and width input/output + + }), + framework::dataset::make("RoisInfo", { TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::F16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 10U), 1, DataType::U16), + TensorInfo(TensorShape(4, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F16), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 3U, 4U), 1, DataType::F32), + })), + framework::dataset::make("PoolInfo", { ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + })), + framework::dataset::make("Expected", { true, true, false, false, false, false, false })), + input_info, rois_info, output_info, pool_info, expected) +{ + ARM_COMPUTE_EXPECT(bool(CLROIPoolingLayer::validate(&input_info.clone()->set_is_resizable(true), &rois_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), pool_info)) == expected, framework::LogLevel::ERRORS); +} + +using CLROIPoolingLayerFloatFixture = ROIPoolingLayerFixture<CLTensor, CLAccessor, CLROIPoolingLayer, float>; + +TEST_SUITE(Float) +FIXTURE_DATA_TEST_CASE(Small, CLROIPoolingLayerFloatFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32); +} + +TEST_SUITE_END() // Float test suite end + +// Begin quantized tests +template <typename T> +using CLROIPoolingLayerQuantizedFixture = ROIPoolingLayerQuantizedFixture<CLTensor, CLAccessor, CLROIPoolingLayer, T>; + +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(Small, CLROIPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallROIDataset(), + framework::dataset::make("DataType", { DataType::QASYMM8 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 127) })), + framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(2.f / 255.f, 120) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // end qasymm8 tests + +TEST_SUITE_END() // RoiPooling +TEST_SUITE_END() // NEON + +} // validation namespace end +} // test namespace end +} // arm_compute namespace end diff --git a/tests/validation/CL/ReduceMean.cpp b/tests/validation/CL/ReduceMean.cpp index 947f84af49..8a8fa4aef0 100644 --- a/tests/validation/CL/ReduceMean.cpp +++ b/tests/validation/CL/ReduceMean.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value constexpr AbsoluteTolerance<float> tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric quantized type */ -const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(0, 1, 2, 3) }), +const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(2, 3), Coordinates(0, 1, 2, 3) }), framework::dataset::make("KeepDims", { true })); const auto axis_drop = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1), Coordinates(3), Coordinates(1, 2), Coordinates(2, 1) }), framework::dataset::make("KeepDims", { false })); } // namespace diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp index 31c5a97925..beb58381ca 100644 --- a/tests/validation/CL/ReductionOperation.cpp +++ b/tests/validation/CL/ReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,10 +50,11 @@ RelativeTolerance<float> rel_tolerance_f16(0.2f); /** Tolerance for quantized operations */ RelativeTolerance<float> tolerance_qasymm8(1); -const auto ReductionOperationsSumProd = framework::dataset::make("ReductionOperationsSumProd", +const auto ReductionOperationsSumProdMean = framework::dataset::make("ReductionOperationsSumProdMean", { ReductionOperation::SUM, ReductionOperation::PROD, + ReductionOperation::MEAN_SUM }); const auto ReductionOperationsMinMax = framework::dataset::make("ReductionMinMax", @@ -109,15 +110,16 @@ using CLReductionOperationFixture = ReductionOperationFixture<CLTensor, CLAccess TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall4D, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd, - ReductionOperationsMinMax)), + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), + concat(ReductionOperationsSumProdMean, + ReductionOperationsMinMax)), KeepDimensions)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd, + combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProdMean, ReductionOperationsMinMax)), KeepDimensions)) { @@ -127,15 +129,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::D TEST_SUITE_END() // F16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall4D, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd, - ReductionOperationsMinMax)), + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), + concat(ReductionOperationsSumProdMean, + ReductionOperationsMinMax)), KeepDimensions)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd, + combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProdMean, ReductionOperationsMinMax)), KeepDimensions)) { @@ -152,7 +155,7 @@ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - ReductionOperationsSumProd), + ReductionOperationsSumProdMean), framework::dataset::make("QuantizationInfo", QuantizationInfo(1.f / 64, 2))), KeepDimensions)) { @@ -172,7 +175,7 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - ReductionOperationsSumProd), + ReductionOperationsSumProdMean), framework::dataset::make("QuantizationInfo", QuantizationInfo(1.f / 64, 2))), KeepDimensions)) { diff --git a/tests/validation/CL/Reverse.cpp b/tests/validation/CL/Reverse.cpp index 11df0e7803..82effc2136 100644 --- a/tests/validation/CL/Reverse.cpp +++ b/tests/validation/CL/Reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,9 +41,10 @@ namespace test { namespace validation { +using framework::dataset::make; namespace { -auto run_small_dataset = combine(datasets::SmallShapes(), datasets::Tiny1DShapes()); +auto run_small_dataset = combine(datasets::Small3DShapes(), datasets::Tiny1DShapes()); auto run_large_dataset = combine(datasets::LargeShapes(), datasets::Tiny1DShapes()); } // namespace @@ -53,33 +54,34 @@ TEST_SUITE(Reverse) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype + make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis shape TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis length (> 4) TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(2U), 1, DataType::U8), }), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), + make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(2U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(2U), 1, DataType::U8), })), - framework::dataset::make("AxisInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U8), + make("AxisInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U8), TensorInfo(TensorShape(2U, 10U), 1, DataType::U32), TensorInfo(TensorShape(8U), 1, DataType::U32), TensorInfo(TensorShape(2U), 1, DataType::U32), TensorInfo(TensorShape(2U), 1, DataType::U32), TensorInfo(TensorShape(2U), 1, DataType::U32), })), - framework::dataset::make("Expected", { false, false, false, false, true, true})), + make("Expected", { false, false, false, false, true, true})), src_info, dst_info, axis_info, expected) { Status s = CLReverse::validate(&src_info.clone()->set_is_resizable(false), &dst_info.clone()->set_is_resizable(false), - &axis_info.clone()->set_is_resizable(false)); + &axis_info.clone()->set_is_resizable(false), + false); ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -93,7 +95,11 @@ TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, CLReverseFixture<half>, framework::DatasetMode::PRECOMMIT, - combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16))) + combine( + run_small_dataset, + make("DataType", DataType::F16), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -102,7 +108,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, FIXTURE_DATA_TEST_CASE(RunLarge, CLReverseFixture<half>, framework::DatasetMode::NIGHTLY, - combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16))) + combine( + run_large_dataset, + make("DataType", DataType::F16), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -113,7 +123,11 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLReverseFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(run_small_dataset, framework::dataset::make("DataType", DataType::F32))) + combine( + run_small_dataset, + make("DataType", DataType::F32), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -122,7 +136,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, FIXTURE_DATA_TEST_CASE(RunLarge, CLReverseFixture<float>, framework::DatasetMode::NIGHTLY, - combine(run_large_dataset, framework::dataset::make("DataType", DataType::F32))) + combine( + run_large_dataset, + make("DataType", DataType::F32), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -135,7 +153,11 @@ TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLReverseFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(run_small_dataset, framework::dataset::make("DataType", DataType::QASYMM8))) + combine( + run_small_dataset, + make("DataType", DataType::QASYMM8), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -144,7 +166,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, FIXTURE_DATA_TEST_CASE(RunLarge, CLReverseFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(run_large_dataset, framework::dataset::make("DataType", DataType::QASYMM8))) + combine( + run_large_dataset, + make("DataType", DataType::QASYMM8), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/CL/RoundLayer.cpp b/tests/validation/CL/RoundLayer.cpp index 5aa9ca6b4e..f0c88d1ad3 100644 --- a/tests/validation/CL/RoundLayer.cpp +++ b/tests/validation/CL/RoundLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { diff --git a/tests/validation/CL/RsqrtLayer.cpp b/tests/validation/CL/RsqrtLayer.cpp index 29c113b105..2353bda8d3 100644 --- a/tests/validation/CL/RsqrtLayer.cpp +++ b/tests/validation/CL/RsqrtLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -42,8 +42,11 @@ namespace validation { namespace { -RelativeTolerance<float> tolerance_fp32(0.000001f); -RelativeTolerance<float> tolerance_fp16(0.001f); +RelativeTolerance<float> tolerance_fp32(0.000001f); +RelativeTolerance<float> tolerance_fp16(0.001f); +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */ +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */ + } // namespace TEST_SUITE(CL) TEST_SUITE(RsqrtLayer) @@ -68,6 +71,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( // *INDENT-ON* template <typename T> using CLRsqrtLayerFixture = RsqrtValidationFixture<CLTensor, CLAccessor, CLRsqrtLayer, T>; +template <typename T> +using CLRsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture<CLTensor, CLAccessor, CLRsqrtLayer, T>; TEST_SUITE(Float) TEST_SUITE(FP16) @@ -102,6 +107,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLRsqrtLayerFixture<float>, framework::DatasetM TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLRsqrtLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + framework::dataset::make("SrcQInfo", { QuantizationInfo(0.4044, -128) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(0.0027, -128) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8_s); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLRsqrtLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("SrcQInfo", { QuantizationInfo(0.4044, 0) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(0.0027, 0) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // RsqrtLayer TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp index 523b49deb7..10a99ae34f 100644 --- a/tests/validation/CL/Scale.cpp +++ b/tests/validation/CL/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -82,6 +82,7 @@ constexpr AbsoluteTolerance<int16_t> tolerance_s16(1); constexpr float tolerance_f32_absolute(0.001f); RelativeTolerance<float> tolerance_f32(0.05); +constexpr float abs_tolerance_f16(0.1f); RelativeTolerance<half> tolerance_f16(half(0.1)); constexpr float tolerance_num_f32(0.01f); @@ -186,16 +187,6 @@ TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS); } -TEST_CASE(WindowShrink, framework::DatasetMode::ALL) -{ - const auto input = TensorInfo{ TensorShape(37U, 37U, 2U), 1, DataType::F32 }; - const auto output = TensorInfo{ TensorShape(39U, 55U, 2U), 1, DataType::F32 }; - Status result{}; - - result = CLScale::validate(&input.clone()->set_is_resizable(false), &output.clone()->set_is_resizable(false), ScaleKernelInfo{ default_interpolation_policy, default_border_mode }); - ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS); -} - TEST_CASE(IncorrectScaleFactor, framework::DatasetMode::ALL) { const auto input = TensorInfo{ TensorShape(28U, 33U, 2U), 1, DataType::F32 }; @@ -210,6 +201,8 @@ TEST_SUITE_END() // Validate template <typename T> using CLScaleFixture = ScaleValidationFixture<CLTensor, CLAccessor, CLScale, T>; +template <typename T> +using CLScaleMixedDataLayoutFixture = ScaleValidationFixture<CLTensor, CLAccessor, CLScale, T, true>; TEST_SUITE(Float) TEST_SUITE(FP32) @@ -223,6 +216,15 @@ FIXTURE_DATA_TEST_CASE(Run, CLScaleFixture<float>, framework::DatasetMode::ALL, // Validate output validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLScaleMixedDataLayoutFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); +} FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleAlignCornersSamplingPolicySet)) { //Create valid region @@ -261,7 +263,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLScaleFixture<half>, framework::DatasetMode::ALL, A const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); // Validate output - validate(CLAccessor(_target), _reference, valid_region, tolerance_f16); + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet)) { @@ -270,7 +272,7 @@ FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<half>, framework::Dataset const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); // Validate output - validate(CLAccessor(_target), _reference, valid_region, tolerance_f16); + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); } const auto f16_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16)); FIXTURE_DATA_TEST_CASE(RunNightly, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, ASSEMBLE_DATASET(f16_nightly_shape, ScaleSamplingPolicySet)) @@ -280,7 +282,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly, CLScaleFixture<half>, framework::DatasetMode: const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); // Validate output - validate(CLAccessor(_target), _reference, valid_region, tolerance_f16); + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, ASSEMBLE_DATASET(f16_nightly_shape, ScaleAlignCornersSamplingPolicySet)) { @@ -289,7 +291,7 @@ FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, CLScaleFixture<half>, framework:: const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); // Validate output - validate(CLAccessor(_target), _reference, valid_region, tolerance_f16); + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); } TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp new file mode 100644 index 0000000000..b1531eb64a --- /dev/null +++ b/tests/validation/CL/ScatterLayer.cpp @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/functions/CLScatter.h" +#include "tests/validation/fixtures/ScatterLayerFixture.h" +#include "tests/datasets/ScatterDataset.h" +#include "tests/CL/CLAccessor.h" +#include "arm_compute/function_info/ScatterInfo.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */ +RelativeTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */ +RelativeTolerance<int32_t> tolerance_int(0); /**< Tolerance value for comparing reference's output against implementation's output for integer data types */ +} // namespace + +template <typename T> +using CLScatterLayerFixture = ScatterValidationFixture<CLTensor, CLAccessor, CLScatter, T>; + +using framework::dataset::make; + +TEST_SUITE(CL) +TEST_SUITE(Scatter) +DATA_TEST_CASE(Validate, framework::DatasetMode::PRECOMMIT, zip( + make("InputInfo", { TensorInfo(TensorShape(9U), 1, DataType::F32), // Mismatching data types + TensorInfo(TensorShape(15U), 1, DataType::F32), // Valid + TensorInfo(TensorShape(15U), 1, DataType::U8), // Valid + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(217U), 1, DataType::F32), // Mismatch input/output dims. + TensorInfo(TensorShape(217U), 1, DataType::F32), // Updates dim higher than Input/Output dims. + TensorInfo(TensorShape(12U), 1, DataType::F32), // Indices wrong datatype. + TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), // Number of updates != number of indices + TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), // index_len != (dst_dims - upt_dims + 1) + TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), // index_len > 5 + }), + make("UpdatesInfo",{TensorInfo(TensorShape(3U), 1, DataType::F16), + TensorInfo(TensorShape(15U), 1, DataType::F32), + TensorInfo(TensorShape(15U), 1, DataType::U8), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(217U), 1, DataType::F32), + TensorInfo(TensorShape(217U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(2U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + }), + make("IndicesInfo",{TensorInfo(TensorShape(1U, 3U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 15U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 271U), 1, DataType::S32), + TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32), + TensorInfo(TensorShape(1U, 4U), 1, DataType::S32), + TensorInfo(TensorShape(3U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(6U, 2U), 1, DataType::S32), + }), + make("OutputInfo",{TensorInfo(TensorShape(9U), 1, DataType::F16), + TensorInfo(TensorShape(15U), 1, DataType::F32), + TensorInfo(TensorShape(15U), 1, DataType::U8), + TensorInfo(TensorShape(8U), 1, DataType::F32), + TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(271U), 1, DataType::F32), + TensorInfo(TensorShape(12U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), + }), + make("ScatterInfo",{ ScatterInfo(ScatterFunction::Add, false), + ScatterInfo(ScatterFunction::Max, false), + ScatterInfo(ScatterFunction::Max, false), + ScatterInfo(ScatterFunction::Min, false), + ScatterInfo(ScatterFunction::Add, false), + ScatterInfo(ScatterFunction::Update, false), + ScatterInfo(ScatterFunction::Sub, false), + ScatterInfo(ScatterFunction::Sub, false), + ScatterInfo(ScatterFunction::Update, false), + ScatterInfo(ScatterFunction::Update, false), + }), + make("Expected", { false, true, true, true, false, false, false, false, false, false })), + input_info, updates_info, indices_info, output_info, scatter_info, expected) +{ + const Status status = CLScatter::validate(&input_info, &updates_info, &indices_info, &output_info, scatter_info); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} + +const auto allScatterFunctions = make("ScatterFunction", + {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max }); + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small1DScatterDataset(), + make("DataType", {DataType::F32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// With this test, src should be passed as nullptr. +FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small1DScatterDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Add}), + make("ZeroInit", {true}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// Updates/src/dst have same no. dims. +FIXTURE_DATA_TEST_CASE(RunSmallMultiDim, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMultiDimDataset(), + make("DataType", {DataType::F32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+1-D to m+n-D cases +FIXTURE_DATA_TEST_CASE(RunSmallMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMultiIndicesDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add }), + make("ZeroInit", {false}), + make("Inplace", {false, true}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+k, k-1-D m+n-D case +FIXTURE_DATA_TEST_CASE(RunSmallBatchedMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterBatchedDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}), + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {true}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +// m+k, k-1-D m+n-D case +FIXTURE_DATA_TEST_CASE(RunSmallScatterScalar, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterScalarDataset(), + make("DataType", {DataType::F32}), + make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}), + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) // NOTE: Padding not supported in this datset +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() // FP32 + + +// NOTE: Padding is disabled for the SmallScatterMixedDataset due certain shapes not supporting padding. +// Padding is well tested in F32 Datatype test cases. + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::F16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float + +TEST_SUITE(Integer) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int32_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(S8) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::S8}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // S8 + +TEST_SUITE(U32) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U32}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U32 + +TEST_SUITE(U16) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U16}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallScatterMixedDataset(), + make("DataType", {DataType::U8}), + allScatterFunctions, + make("ZeroInit", {false}), + make("Inplace", {false}), + make("Padding", {false}))) +{ + validate(CLAccessor(_target), _reference, tolerance_int); +} +TEST_SUITE_END() // U8 +TEST_SUITE_END() // Integer + +TEST_SUITE_END() // Scatter +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/Select.cpp b/tests/validation/CL/Select.cpp index 3d7c61aab5..d3540cae48 100644 --- a/tests/validation/CL/Select.cpp +++ b/tests/validation/CL/Select.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -107,6 +107,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunOneDim, + CLSelectFixture<half>, + framework::DatasetMode::PRECOMMIT, + combine(combine(framework::dataset::make("Shape", TensorShape(1U, 16U)), + framework::dataset::make("has_same_rank", { false, true })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLSelectFixture<half>, framework::DatasetMode::NIGHTLY, @@ -127,6 +138,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunOneDim, + CLSelectFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(framework::dataset::make("Shape", TensorShape(1U, 16U)), + framework::dataset::make("has_same_rank", { false, true })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLSelectFixture<float>, framework::DatasetMode::NIGHTLY, @@ -149,6 +171,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunOneDim, + CLSelectFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(framework::dataset::make("Shape", TensorShape(1U, 16U)), + framework::dataset::make("has_same_rank", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, CLSelectFixture<uint8_t>, framework::DatasetMode::NIGHTLY, diff --git a/tests/validation/CL/SinLayer.cpp b/tests/validation/CL/SinLayer.cpp index e40c990db6..f0cb4c314e 100644 --- a/tests/validation/CL/SinLayer.cpp +++ b/tests/validation/CL/SinLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h" +#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/CL/CLAccessor.h" @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp index 396e274e0b..eb47b7f666 100644 --- a/tests/validation/CL/SoftmaxLayer.cpp +++ b/tests/validation/CL/SoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,12 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/SoftmaxLayerFixture.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/CL/CLBufferAllocator.h" +#include "arm_compute/runtime/BlobMemoryPool.h" + namespace arm_compute { namespace test @@ -62,6 +68,47 @@ const auto CNNDataTypes = framework::dataset::make("DataType", TEST_SUITE(CL) TEST_SUITE(SoftmaxLayer) +TEST_CASE(SimpleMemoryManaged, framework::DatasetMode::ALL) +{ + // The purpose of this test is to test if the function can + // run correctly even with the given memory manager from its caller + // (Similar scenario when the library is integrated into other software) + // especially when working with workspace() method of + // @ref arm_compute::opencl::ClSoftmax. + const auto shape = TensorShape{4,2}; // Random shape, not important + constexpr auto dt = DataType::F32; // Random data type, not important + + // Create a memory manager + auto lm = std::make_shared<BlobLifetimeManager>(); + auto pm = std::make_shared<arm_compute::PoolManager>(); + auto alloc = std::make_unique<CLBufferAllocator>(); + auto mm = std::make_shared<MemoryManagerOnDemand>(lm, pm); + + auto src = create_tensor<CLTensor>(shape, dt); + auto dst = create_tensor<CLTensor>(shape, dt); + src.allocator()->allocate(); + dst.allocator()->allocate(); + + // Create the function with the memory manager + CLSoftmaxLayer smx(mm); + smx.configure(&src, &dst); + + // Populate the memory, acquire() will happen in run() + mm->populate(*alloc.get(), 1); + + std::vector<float> input_vals{0.0f, 1.0f, 0.0f, 0.0f, 0.5f, 0.0f, 0.0f, 0.0f,}; + library->fill_static_values(CLAccessor(src), input_vals); + + smx.run(); + + // Compute reference to compare + SimpleTensor<float> ref_src{shape, dt}; + library->fill_static_values(ref_src, input_vals); + auto ref_dst = reference::softmax_layer<float>(ref_src, 1., 0, false); + + validate(CLAccessor(dst), ref_dst); +} + // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( diff --git a/tests/validation/CL/Tile.cpp b/tests/validation/CL/Tile.cpp index a06c05744f..f243780c00 100644 --- a/tests/validation/CL/Tile.cpp +++ b/tests/validation/CL/Tile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,7 @@ namespace validation namespace { const auto MultiplesDataset = framework::dataset::make("Multiples", { Multiples{ 3 }, + Multiples{ 7 }, Multiples{ 2, 2 }, Multiples{ 1, 1, 3, 4 }, Multiples{ 2, 1, 2, 2 }, diff --git a/tests/validation/CL/Transpose.cpp b/tests/validation/CL/Transpose.cpp index 943534058b..6cf5fe8537 100644 --- a/tests/validation/CL/Transpose.cpp +++ b/tests/validation/CL/Transpose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,12 +50,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), // Invalid shape TensorInfo(TensorShape(20U, 13U), 1, DataType::U8), // Wrong data type TensorInfo(TensorShape(20U, 16U), 1, DataType::U32), // Valid + TensorInfo(TensorShape(20U, 16U, 3U, 3U), 1, DataType::U16), // Transpose only first two dimensions }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(13U, 20U), 1, DataType::U32), TensorInfo(TensorShape(31U, 20U), 1, DataType::U16), TensorInfo(TensorShape(16U, 20U), 1, DataType::U32), + TensorInfo(TensorShape(16U, 20U, 3U, 3U), 1, DataType::U16), })), - framework::dataset::make("Expected", { false, false, true })), + framework::dataset::make("Expected", { false, false, true, true })), a_info, output_info, expected) { // Lock tensors @@ -80,6 +82,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint8_t>, framework::Dataset // Validate output validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunLargeHighDimensional, + CLTransposeFixture<uint8_t>, + framework::DatasetMode::NIGHTLY, + combine(concat(concat(datasets::Large3DShapes(), datasets::Large4DShapes()), + datasets::Large5dShapes()), + framework::dataset::make("DataType", DataType::U8))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} TEST_SUITE_END() // U8 TEST_SUITE(U16) @@ -106,6 +118,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint32_t>, framework::Datase // Validate output validate(CLAccessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmallHighDimensional, + CLTransposeFixture<uint32_t>, + framework::DatasetMode::PRECOMMIT, + combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()), + framework::dataset::make("DataType", DataType::U32))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", DataType::U32))) { diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp index 833256039e..ac433721d8 100644 --- a/tests/validation/CL/UNIT/DynamicTensor.cpp +++ b/tests/validation/CL/UNIT/DynamicTensor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,11 +29,8 @@ #include "arm_compute/runtime/MemoryManagerOnDemand.h" #include "arm_compute/runtime/PoolManager.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "src/core/CL/kernels/CLIm2ColKernel.h" #include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "src/core/CL/kernels/CLReductionOperationKernel.h" -#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" diff --git a/tests/validation/CL/UNIT/Multithreaded.cpp b/tests/validation/CL/UNIT/Multithreaded.cpp new file mode 100644 index 0000000000..5c75df709d --- /dev/null +++ b/tests/validation/CL/UNIT/Multithreaded.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/RuntimeContext.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Macros.h" +#include "tests/framework/ParametersLibrary.h" +#include "tests/validation/Validation.h" +#include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" +#include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" +#include <thread> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(RuntimeContext) +// This test tries scheduling work concurrently from two independent threads +TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL) +{ + constexpr auto num_threads(16u); + std::array<CLActivationLayer, num_threads> func{}; + std::array<CLPixelWiseMultiplication, num_threads> pmul{}; + std::array<CLTensor, num_threads> s0{}; + std::array<CLTensor, num_threads> s1{}; + + std::array<CLTensor, num_threads> st{}; + std::array<CLTensor, num_threads> dt{}; + + const TensorShape tensor_shape(128u, 4u, 5u); + const ActivationLayerInfo ainfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.5f, 1.f); + std::array<std::thread, num_threads> threads; + auto ctx = parameters->get_ctx<CLTensor>(); + + for(auto i = 0u; i < num_threads; ++i) + { + s0[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1); + s1[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1); + st[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1); + dt[i] = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1); + func[i] = CLActivationLayer(ctx); + pmul[i] = CLPixelWiseMultiplication(); + threads[i] = + std::thread([&,i] + { + auto &s = st[i]; + auto &t = dt[i]; + auto &p0 = s0[i]; + auto &p1 = s1[i]; + pmul[i].configure(&p0, &p1, &s, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP); + func[i].configure(&s, &t, ainfo); + s.allocator()->allocate(); + t.allocator()->allocate(); + p0.allocator()->allocate(); + p1.allocator()->allocate(); + library->fill_tensor_uniform(CLAccessor(p0), 0, -1.f, 1.f); + library->fill_tensor_uniform(CLAccessor(p1), 0, -1.f, 1.f); + pmul[i].run(); + func[i].run(); + }); + } + + for(auto &t : threads) + { + t.join(); + } + + SimpleTensor<float> rs{ tensor_shape, DataType::F32, 1 }; + SimpleTensor<float> ra{ tensor_shape, DataType::F32, 1 }; + SimpleTensor<float> rb{ tensor_shape, DataType::F32, 1 }; + library->fill_tensor_uniform(ra, 0, -1.f, 1.f); + library->fill_tensor_uniform(rb, 0, -1.f, 1.f); + const auto mul = reference::pixel_wise_multiplication<float, float, float>(ra, rb, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP, DataType::F32); + const auto golden = reference::activation_layer<float>(mul, ainfo); + for(auto &d : dt) + { + validate(CLAccessor(d), golden); + } +} + +TEST_SUITE_END() // MultipleThreadedScheduller +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp index 3ccdd99fe3..559f47e16c 100644 --- a/tests/validation/CL/UNIT/TensorAllocator.cpp +++ b/tests/validation/CL/UNIT/TensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,9 +24,14 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/core/utils/misc/MMappedFile.h" +#include "arm_compute/runtime/BlobLifetimeManager.h" +#include "arm_compute/runtime/CL/CLBufferAllocator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/PoolManager.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" #include "tests/framework/Asserts.h" @@ -60,12 +65,108 @@ cl_mem import_malloc_memory_helper(void *ptr, size_t size) return buf; } + +class DummyAllocator final : public IAllocator +{ +public: + DummyAllocator() = default; + + void *allocate(size_t size, size_t alignment) override + { + ++_n_calls; + return _backend_allocator.allocate(size, alignment); + } + void free(void *ptr) override + { + return _backend_allocator.free(ptr); + } + std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override + { + // Needs to be implemented as is the one that is used internally by the CLTensorAllocator + ++_n_calls; + return _backend_allocator.make_region(size, alignment); + } + int get_n_calls() const + { + return _n_calls; + } + +private: + int _n_calls{}; + CLBufferAllocator _backend_allocator{}; +}; + +void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator) +{ + // Create tensors + CLTensor src, weights, bias, dst; + src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC)); + weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC)); + bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC)); + dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC)); + + // Create and configure function + CLGEMMConvolutionLayer conv(mm); + conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U)); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + // Finalize memory manager + if(mm != nullptr) + { + mm->populate(mm_allocator, 1 /* num_pools */); + ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS); + } + + conv.run(); +} } // namespace TEST_SUITE(CL) TEST_SUITE(UNIT) TEST_SUITE(TensorAllocator) +/* Validate that an external global allocator can be used for all internal allocations */ +TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL) +{ + DummyAllocator global_tensor_alloc; + CLTensorAllocator::set_global_allocator(&global_tensor_alloc); + + // Run a convolution + run_conv2d(nullptr /* mm */, global_tensor_alloc); + + // Check that allocator has been called multiple times > 4 + ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS); + + // Nullify global allocator + CLTensorAllocator::set_global_allocator(nullptr); +} + +/* Validate that an external global allocator can be used for the pool manager */ +TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL) +{ + auto lifetime_mgr = std::make_shared<BlobLifetimeManager>(); + auto pool_mgr = std::make_shared<PoolManager>(); + auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr); + + DummyAllocator global_tensor_alloc; + CLTensorAllocator::set_global_allocator(&global_tensor_alloc); + + // Run a convolution + run_conv2d(mm, global_tensor_alloc); + + // Check that allocator has been called multiple times > 4 + ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS); + + // Nullify global allocator + CLTensorAllocator::set_global_allocator(nullptr); +} + /** Validates import memory interface when importing cl buffer objects */ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL) { @@ -79,31 +180,31 @@ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL) // Negative case : Import nullptr CLTensor t1; t1.allocator()->init(info); - ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(cl::Buffer())), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer()))); + ARM_COMPUTE_ASSERT(t1.info()->is_resizable()); // Negative case : Import memory to a tensor that is memory managed CLTensor t2; MemoryGroup mg; t2.allocator()->set_associated_memory_group(&mg); - ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf))); + ARM_COMPUTE_ASSERT(t2.info()->is_resizable()); // Negative case : Invalid buffer size CLTensor t3; const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32); t3.allocator()->init(info_neg); - ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf))); + ARM_COMPUTE_ASSERT(t3.info()->is_resizable()); // Positive case : Set raw pointer CLTensor t4; t4.allocator()->init(info); - ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf))); + ARM_COMPUTE_ASSERT(!t4.info()->is_resizable()); ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS); t4.allocator()->free(); - ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(t4.info()->is_resizable()); ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS); } @@ -141,8 +242,8 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) std::align(alignment, total_size_in_bytes, aligned_ptr, space); cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes)); - ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer))); + ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); // Fill tensor std::uniform_real_distribution<float> distribution(-5.f, 5.f); @@ -205,12 +306,12 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) // Map file utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0); - ARM_COMPUTE_EXPECT(mmapped_file.is_mapped(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mmapped_file.is_mapped()); unsigned char *data = mmapped_file.data(); cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes)); - ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer))); + ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); // Fill tensor std::uniform_real_distribution<float> distribution(-5.f, 5.f); @@ -233,7 +334,7 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) // Release resources tensor.allocator()->free(); - ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); } } #endif // !defined(BARE_METAL) diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp deleted file mode 100644 index cf2513bf2c..0000000000 --- a/tests/validation/CL/UNIT/Tuner.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/tuners/BifrostTuner.h" -#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h" -#include "tests/Utils.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -TEST_SUITE(CL) -TEST_SUITE(UNIT) -TEST_SUITE(Tuner) - -/** Validates static tuning of Bifrost tuner */ -TEST_CASE(BifrostTunerSimple, framework::DatasetMode::ALL) -{ - // Create tuner - tuners::BifrostTuner tuner; - - // Create tensors - auto src = create_tensor<CLTensor>(TensorShape(13U, 13U, 16U), DataType::F32); - auto weights = create_tensor<CLTensor>(TensorShape(3U, 3U, 16U, 3U), DataType::F32); - auto bias = create_tensor<CLTensor>(TensorShape(3U), DataType::F32); - auto dst = create_tensor<CLTensor>(TensorShape(13U, 13U, 3U), DataType::F32); - - // Create kernel - cl::NDRange fake_lws(2000); - CLDirectConvolutionLayerKernel conv; - conv.set_target(GPUTarget::G72); - - // Configure - conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1)); - - // Hard-wire lws to kernel and validate lws - conv.set_lws_hint(fake_lws); - ARM_COMPUTE_EXPECT(conv.lws_hint()[0] == 2000, framework::LogLevel::ERRORS); - - // Tune kernel and validate - tuner.tune_kernel_static(conv); - ARM_COMPUTE_EXPECT(conv.lws_hint()[0] != 2000, framework::LogLevel::ERRORS); - - // Clear tuner - CLScheduler::get().default_init(); -} -TEST_SUITE_END() -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp index acf795e48b..357c88af10 100644 --- a/tests/validation/CL/UNIT/WeightsRetention.cpp +++ b/tests/validation/CL/UNIT/WeightsRetention.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,18 +22,7 @@ * SOFTWARE. */ #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" -#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" -#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/Globals.h" diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp index d04c10cee2..4345c4b08a 100644 --- a/tests/validation/CL/WeightsReshape.cpp +++ b/tests/validation/CL/WeightsReshape.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLWeightsReshapeKernel.h" +#include "src/gpu/cl/kernels/ClWeightsReshapeKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/datasets/ShapeDatasets.h" @@ -41,7 +41,7 @@ namespace validation TEST_SUITE(CL) TEST_SUITE(WeightsReshape) -using CLWeightsReshape = CLSynthetizeFunction<CLWeightsReshapeKernel>; +using ClWeightsReshape = ClSynthetizeOperatorWithBorder<opencl::kernels::ClWeightsReshapeKernel>; /** Validate tests * @@ -87,15 +87,15 @@ framework::dataset::make("NumGroups", { 1, 1, 1, 2, 1, 2 })), framework::dataset::make("Expected", { false, false, false, false, false, false })), input_info, biases_info, output_info, num_groups, expected) { - bool status = bool(CLWeightsReshape::validate(&input_info, &biases_info, &output_info, num_groups)); + bool status = bool(opencl::kernels::ClWeightsReshapeKernel::validate(&input_info, &biases_info, &output_info, num_groups)); ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS); } template <typename T> -using CLWeightsReshapeFixture = WeightsReshapeValidationFixture<CLTensor, CLAccessor, CLWeightsReshape, T>; +using ClWeightsReshapeFixture = WeightsReshapeOpValidationFixture<CLTensor, CLAccessor, ClWeightsReshape, T>; TEST_SUITE(Float) -FIXTURE_DATA_TEST_CASE(FP32, CLWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(3U, 3U, 48U, 120U) }), +FIXTURE_DATA_TEST_CASE(FP32, ClWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(3U, 3U, 48U, 120U) }), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("HasBias", { true, false })), framework::dataset::make("NumGroups", { 1, 2 }))) @@ -104,7 +104,7 @@ FIXTURE_DATA_TEST_CASE(FP32, CLWeightsReshapeFixture<float>, framework::DatasetM validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(FP16, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(13U, 13U, 96U, 240U) }), +FIXTURE_DATA_TEST_CASE(FP16, ClWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(13U, 13U, 96U, 240U) }), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("HasBias", { true, false })), framework::dataset::make("NumGroups", { 3, 4 }))) @@ -113,7 +113,7 @@ FIXTURE_DATA_TEST_CASE(FP16, CLWeightsReshapeFixture<half>, framework::DatasetMo validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(BFloat16, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(9U, 9U, 96U, 240U) }), +FIXTURE_DATA_TEST_CASE(BFloat16, ClWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(9U, 9U, 96U, 240U) }), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("HasBias", { false })), framework::dataset::make("NumGroups", { 3, 4 }))) @@ -125,7 +125,7 @@ FIXTURE_DATA_TEST_CASE(BFloat16, CLWeightsReshapeFixture<half>, framework::Datas TEST_SUITE_END() TEST_SUITE(Quantized) -FIXTURE_DATA_TEST_CASE(QASYMM8, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }), +FIXTURE_DATA_TEST_CASE(QASYMM8, ClWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("HasBias", { false })), framework::dataset::make("NumGroups", { 1, 2 }))) @@ -134,7 +134,7 @@ FIXTURE_DATA_TEST_CASE(QASYMM8, CLWeightsReshapeFixture<uint8_t>, framework::Dat validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(QASYMM8_SIGNED, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }), +FIXTURE_DATA_TEST_CASE(QASYMM8_SIGNED, ClWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("HasBias", { false })), framework::dataset::make("NumGroups", { 1, 2 }))) diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp index 115f9378c9..196e7edb8c 100644 --- a/tests/validation/CL/Winograd.cpp +++ b/tests/validation/CL/Winograd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,12 +27,10 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h" -#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" -#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/PaddingCalculator.h" +#include "tests/datasets/ActivationFunctionsDataset.h" #include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SmallConvolutionLayerDataset.h" @@ -50,631 +48,379 @@ namespace test { namespace validation { +using framework::dataset::make; namespace { // *INDENT-OFF* // clang-format off -constexpr AbsoluteTolerance<float> tolerance_f32(0.002f); const AbsoluteTolerance<half> tolerance_f16(half(1.f)); constexpr AbsoluteTolerance<float> tolerance_convolution_layer_f32(0.1f); const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f)); RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ constexpr float abs_tolerance_convolution_layer_f16 = 2.5f; /**< Tolerance number */ -constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */ - -// Input transform -const auto SmallWinogradInputTransformDatasetNCHW = - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x2_3x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_3x1(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_3x1(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(), - datasets::SmallWinogradInputTransformDataset1x4_1x5())))))))); - -const auto SmallWinogradInputTransformDatasetNHWC = framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_3x1(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x5(), - framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(), - datasets::SmallWinogradInputTransformDataset1x2_1x7()))))))); - -const auto SmallWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC, - datasets::SmallWinogradInputTransformDataset2x2_7x7()); - -const auto LargeWinogradInputTransformDatasetNCHW = - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_3x1(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x3(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_3x1(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x3(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(), - datasets::LargeWinogradInputTransformDataset1x4_1x5())))))))); - -const auto LargeWinogradInputTransformDatasetNHWC = - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(), - framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(), - datasets::LargeWinogradInputTransformDataset1x4_1x5()))); - -const auto LargeWinogradInputTransformDatasetNHWC_FP32 = - framework::dataset::concat(LargeWinogradInputTransformDatasetNHWC, - (datasets::LargeWinogradInputTransformDataset2x2_7x7())); - -// Filter transform -const auto SmallWinogradFilterTransformDatasetNCHW = - framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })), - framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })), - framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) }))))))); - -const auto SmallWinogradFilterTransformDatasetNHWC_F16 = - framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), - framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - framework::dataset::concat(combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), - framework::dataset::concat(combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })), - combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) }))))))))); - -const auto SmallWinogradFilterTransformDatasetNHWC_F32 = - framework::dataset::concat(SmallWinogradFilterTransformDatasetNHWC_F16, - combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) }))); - -const auto LargeWinogradFilterTransformDatasetNCHW = - framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })), - framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })), - framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) }))))))); - -const auto LargeWinogradFilterTransformDatasetNHWC_F16 = - framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), - framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })), - framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })), - framework::dataset::concat(combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })), - framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })), - combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) }))))))))); - -const auto LargeWinogradFilterTransformDatasetNHWC_F32 = - framework::dataset::concat(LargeWinogradFilterTransformDatasetNHWC_F16, - combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) }))); - -// Output transform -const auto SmallWinogradOutputTransformDatasetNCHW = datasets::SmallWinogradOutputTransformDatasetNCHW(); - -const auto SmallWinogradOutputTransformDatasetNHWC_F16 = datasets::SmallWinogradOutputTransformDatasetNHWC_F16(); - -const auto SmallWinogradOutputTransformDatasetNHWC_F32 = datasets::SmallWinogradOutputTransformDatasetNHWC_F32(); - -const auto LargeWinogradOutputTransformDatasetNCHW = datasets::LargeWinogradOutputTransformDatasetNCHW(); - -const auto LargeWinogradOutputTransformDatasetNHWC_F16 = datasets::LargeWinogradOutputTransformDatasetNHWC_F16(); - -const auto LargeWinogradOutputTransformDatasetNHWC_F32 = datasets::LargeWinogradOutputTransformDatasetNHWC_F32(); - -//Activation Functions -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +constexpr float tolerance_num_f16 = 0.15f; /**< Tolerance number */ + +const auto ActivationFunctionsDataset = make("ActivationInfo", { - ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.8f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU) }); -const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo", + +const auto ActivationFunctionsSmallDataset = make("ActivationInfo", { ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f) }); } // namespace using namespace arm_compute::misc::shape_calculator; +/* + Testing Strategy of CL Winograd: + - For nchw and nhwc and for each kernel size, we have a dedicated OpenCL kernel. + (except 1xN and Nx1 uses NxN under the hood). Therefore, test cases should be + stressed for each of these configurations. + - Fp32 and Fp16 kernels are the same. Only the DATA_TYPE build option changes + between these two. Because the same kernel is stressed thoroughly for both + small and large shapes for Fp32 data type, Fp16 kernels are run on a subset + of the shapes, because we get diminishing returns by exhaustively testing the + same kernel. + - Activations only affect the output stage and it's calculated on the output tile. + Exhaustively testing all activations with all the shapes does not provide much + value but increases the testing time quite significantly. Therefore, all activations + are tested in a subset of the shapes, and for all MxM kernels and data layouts as + they represent different OpenCL kernels. (1xM and Mx1 kernels use MxM under the hood). +*/ TEST_SUITE(CL) TEST_SUITE(Winograd) -TEST_SUITE(InputTransform) - -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo",{ - TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F16), // F16 not supported - TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported - TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32), // Kernel size not supported - TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32), // Strides not supported - TensorInfo(TensorShape(53U, 33U, 4U), 1, DataType::F32), // Padding needed - TensorInfo(TensorShape(34U, 42U, 7U, 3U), 1, DataType::F32), // Padding needed - TensorInfo(TensorShape(31U, 37U, 37U), 1, DataType::F32) // Padding needed - }), - framework::dataset::make("OutputInfo", { - TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::F16), - TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(5U, 1U, 16U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(4U, 442U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(7U, 320U, 16U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(37U, 304U, 16U), 1, DataType::F32) - })), - framework::dataset::make("WinogradInfo", { - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW), - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW), - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(2, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(34U, 42U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW), - WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(31U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW) - })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false })), - input_info, output_info, winograd_info, expected) -{ - ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); -} - -using CLWinogradInputTransformFixtureFP32 = WinogradInputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradInputTransform, float>; -using CLWinogradInputTransformFixtureFP16 = WinogradInputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradInputTransform, half>; - -TEST_SUITE(NCHW) -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() // FP32 - -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F16 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f16); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F16 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f16); -} -TEST_SUITE_END() // FP16 -TEST_SUITE_END() // NCHW - -TEST_SUITE(NHWC) -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F16 }))) +TEST_SUITE(ConvolutionLayer) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputInfo", { + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // Insufficient padding + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch + TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported + TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed + TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported + }), + make("WeightsInfo", { + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16), + TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), + TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) + }), + make("BiasesInfo", { + TensorInfo(TensorShape(19U), 1, DataType::F16), + TensorInfo(TensorShape(19U), 1, DataType::F32), + TensorInfo(TensorShape(21U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32), + TensorInfo(TensorShape(16U), 1, DataType::F32) + }), + make("OutputInfo", { + TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16), + TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), + TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) + }), + make("ConvInfo", { + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 2, 0, 0), + PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 1, 0) + }), + make("Expected", { false, false, false, false, false })), + input_info, weights_info, bias_info, output_info, conv_info, expected) { - validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); + ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F16 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); -} -TEST_SUITE_END() // FP16 -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC_FP32, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); +DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip( + make("WeightsInfo", { + // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted + + // Fp32/16, NCHW + // 3x1, 1x3, 3x3 --> all TRUE + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // 7x1, 1x7, 7x7 + // nchw does not support kernels with size 7 --> all FALSE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // Fp32/16, NHWC + // 7x1, 1x7, 7x7 --> all TRUE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + // 3x1, 1x3, 3x3 --> all TRUE + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + }), + make("Expected", { + true, true, true, // nchw, 3x3, 1x3, 3x1 + true, true, true, // nchw, 5x5, 1x5, 5x1 + false, false, false, // nchw, 7x7, 1x7, 7x1 + false, false, false, // nchw, random unsupported kernels + true, true, true, // nhwc, 7x7, 1x7, 7x1 + true, true, true, // nhwc, 3x3, 1x3, 3x1 + true, true, true, // nhwc, 5x5, 1x5, 5x1 + false, false, false, // nchw, random unsupported kernels + })), + weights_info_const, expected) +{ + DataType data_type = weights_info_const.data_type(); + DataLayout data_layout = weights_info_const.data_layout(); + + TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type); + TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type); + TensorInfo weights_info = weights_info_const; + + if(data_layout == DataLayout::NHWC) + { + // Convert to NHWC + PermutationVector perm = PermutationVector(2U, 0U, 1U); + + TensorShape input_shape = input_info.tensor_shape(); + TensorShape weights_shape = weights_info.tensor_shape(); + permute(input_shape, perm); + permute(weights_shape, perm); + + input_info.set_tensor_shape(input_shape); + weights_info.set_tensor_shape(weights_shape); + + input_info.set_data_layout(data_layout); + weights_info.set_data_layout(data_layout); + bias_info.set_data_layout(data_layout); + } + + PadStrideInfo conv_info(1, 1, 0, 0); + + TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info); + TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout); + + Status status = CLWinogradConvolutionLayer::validate( + &input_info, + &weights_info, + &bias_info, + &output_info, + conv_info, + ActivationLayerInfo(), + true /* fast math */); + + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC_FP32, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() // FP32 -TEST_SUITE_END() // NHWC -TEST_SUITE_END() // InputTransform - -TEST_SUITE(FilterTransform) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo",{ - TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::F16), // F16 supported - TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported - TensorInfo(TensorShape(5U, 5U, 5U, 3U), 1, DataType::F32), // Kernel size not supported - TensorInfo(TensorShape(3U, 3U), 1, DataType::F32), // Output tile not supported - TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::F32), // valid - TensorInfo(TensorShape(3U, 3U, 37U, 2U), 1, DataType::F32), // valid - TensorInfo(TensorShape(3U, 3U, 37U, 22U), 1, DataType::F32) // valid - }), - framework::dataset::make("OutputInfo", { - TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(1U, 1U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(2U, 37U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(22U, 37U, 36U), 1, DataType::F32) - })), - framework::dataset::make("WinogradInfo", { - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), - WinogradInfo(Size2D(3U, 3U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ), - WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */ ) - })), - framework::dataset::make("Expected", { true, false, false, false, true, true, true })), - input_info, output_info, winograd_info, expected) -{ - ARM_COMPUTE_EXPECT(bool(CLWinogradFilterTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); -} - -using CLWinogradFilterTransform = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradFilterTransformKernel, 0>; -using CLWinogradFilterTransformFixtureFP32 = WinogradFilterTransformValidationFixture<CLTensor, CLAccessor, CLWinogradFilterTransform, float>; -using CLWinogradFilterTransformFixtureFP16 = WinogradFilterTransformValidationFixture<CLTensor, CLAccessor, CLWinogradFilterTransform, half>; - -TEST_SUITE(NCHW) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, - combine(combine(SmallWinogradFilterTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradFilterTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() // FP32 -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::PRECOMMIT, - combine(combine(SmallWinogradFilterTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F16 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradFilterTransformDatasetNCHW, - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("DataType", { DataType::F16 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); -} -TEST_SUITE_END() // FP16 -TEST_SUITE_END() // NCHW - -TEST_SUITE(NHWC) -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::PRECOMMIT, - combine(combine(SmallWinogradFilterTransformDatasetNHWC_F16, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F16 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradFilterTransformDatasetNHWC_F16, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F16 }))) +using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>; +using CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float, float, true, true>; +TEST_SUITE(Conv3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -TEST_SUITE_END() // FP16 -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, - combine(combine(SmallWinogradFilterTransformDatasetNHWC_F32, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F32 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradFilterTransformDatasetNHWC_F32, - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - framework::dataset::make("DataType", { DataType::F32 }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() // FP32 -TEST_SUITE_END() // NHWC -TEST_SUITE_END() // FilterTransform - -TEST_SUITE(OutputTransform) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("InputInfo",{ - TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F16), // F16 supported - TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::QASYMM8), // QASYMM8 not supported - TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F32), // Kernel size not supported - TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(13U, 108U, 16U, 4U), 1, DataType::F32), // Padding needed - TensorInfo(TensorShape(7U, 20U, 16U, 7U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(7U, 20U, 16U, 7U), 1, DataType::F32), // Wrong WinogradInfo - TensorInfo(TensorShape(7U, 256U, 36U, 3U), 1, DataType::F32), // Valid - TensorInfo(TensorShape(7U, 256U, 16U, 3U), 1, DataType::F32) // Wrong number of batches - }), - framework::dataset::make("BiasInfo", { - TensorInfo(TensorShape(512U), 1, DataType::F16), - TensorInfo(TensorShape(512U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(512U), 1, DataType::F32), - TensorInfo(TensorShape(512U), 1, DataType::F32), - TensorInfo(TensorShape(13U), 1, DataType::F32), - TensorInfo(TensorShape(7U), 1, DataType::F32), - TensorInfo(TensorShape(7U), 1, DataType::F32), - TensorInfo(TensorShape(7U), 1, DataType::F32), - TensorInfo(TensorShape(7U), 1, DataType::F32) - })), - framework::dataset::make("OutputInfo", { - TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F16), - TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F32), - TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F32), - TensorInfo(TensorShape(17U, 23U, 13U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 10U, 7U, 7U), 1, DataType::F32), - TensorInfo(TensorShape(7U, 9U, 7U, 7U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 64U, 7U, 3U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 64U, 7U, 3U), 1, DataType::F32) - })), - framework::dataset::make("WinogradInfo", { - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2U, 2U), Size2D(5U, 5U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(17U, 23U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(2U, 3U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW), - WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(64U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW), - WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(64U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW) - })), - framework::dataset::make("Expected", { true, false, false, true, false, true, false, true, false })), - input_info, bias_info, output_info, winograd_info, expected) -{ - ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS); -} - -using CLWinogradOutputTransform = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradOutputTransformKernel, 0>; -using CLWinogradOutputTransformFixtureFP32 = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, float>; -using CLWinogradOutputTransformFixtureFP16 = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, half>; - -TEST_SUITE(NCHW) -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::ALL, - combine(combine(SmallWinogradOutputTransformDatasetNCHW, - framework::dataset::make("DataType", { DataType::F16 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(3U, 3U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(6U, 6U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } +TEST_SUITE_END() // Conv3x3 -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradOutputTransformDatasetNCHW, - framework::dataset::make("DataType", { DataType::F16 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16); -} -TEST_SUITE_END() // FP16 -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::ALL, - combine(combine(SmallWinogradOutputTransformDatasetNCHW, - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) +TEST_SUITE(Conv3x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradOutputTransformDatasetNCHW, - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -TEST_SUITE_END() // FP32 -TEST_SUITE_END() // NCHW +TEST_SUITE_END() // Conv3x1 -TEST_SUITE(NHWC) -TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::ALL, - combine(combine(SmallWinogradOutputTransformDatasetNHWC_F16, - framework::dataset::make("DataType", { DataType::F16 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) +TEST_SUITE(Conv1x3) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradOutputTransformDatasetNHWC_F16, - framework::dataset::make("DataType", { DataType::F16 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(1U, 3U, 32U, 1U)), + make("Bias", TensorShape(1U)), + make("Output", TensorShape(8U, 6U, 1U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16); -} -TEST_SUITE_END() // FP16 -TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::ALL, - combine(combine(SmallWinogradOutputTransformDatasetNHWC_F32, - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, - combine(combine(LargeWinogradOutputTransformDatasetNHWC_F32, - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) )) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_f32); -} -TEST_SUITE_END() // FP32 -TEST_SUITE_END() // NHWC -TEST_SUITE_END() // OutputTransform - -TEST_SUITE(ConvolutionLayer) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { - TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16), // Insufficient padding - TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32), // Datatype mismatch - TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported - TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32), // Padding needed - TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) // Kernel size not supported - }), - framework::dataset::make("WeightsInfo", { - TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16), - TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), - TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) - })), - framework::dataset::make("BiasesInfo", { - TensorInfo(TensorShape(19U), 1, DataType::F16), - TensorInfo(TensorShape(19U), 1, DataType::F32), - TensorInfo(TensorShape(21U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32), - TensorInfo(TensorShape(16U), 1, DataType::F32) - })), - framework::dataset::make("OutputInfo", { - TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16), - TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32), - TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32), - TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) - })), - framework::dataset::make("ConvInfo", { - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(1, 2, 0, 0), - PadStrideInfo(1, 1, 1, 1), - PadStrideInfo(1, 1, 1, 0) - })), - framework::dataset::make("Expected", { false, false, false, false, false })), - input_info, weights_info, bias_info, output_info, conv_info, expected) -{ - ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS); + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } +TEST_SUITE_END() // Conv1x3 -TEST_SUITE(FP32) -using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>; -TEST_SUITE(Conv3x3) +TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); -} -TEST_SUITE_END() // Conv3x3 + combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) -TEST_SUITE(Conv3x1) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(5U, 5U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(9U, 9U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -TEST_SUITE_END() // Conv3x1 +TEST_SUITE_END() // Conv5x5 -TEST_SUITE(Conv1x3) +TEST_SUITE(Conv5x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -TEST_SUITE_END() // Conv1x3 +TEST_SUITE_END() // Conv5x1 -TEST_SUITE(Conv5x5) +TEST_SUITE(Conv1x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset ), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -682,64 +428,63 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset ), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -TEST_SUITE_END() // Conv5x5 +TEST_SUITE_END() // Conv1x5 -TEST_SUITE(Conv5x1) +TEST_SUITE(Conv1x7) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) - +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(1U, 7U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(13U, 11U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 2)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } -TEST_SUITE_END() // Conv5x1 +TEST_SUITE_END() // Conv1x7 -TEST_SUITE(Conv1x5) +TEST_SUITE(Conv7x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); } +TEST_SUITE_END() // Conv7x1 -FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +/** @note: Although 7x7 is in the kernels, reference implementation + * does not support it. So, it remains as a "test gap". + */ -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32); -} -TEST_SUITE_END() // Conv1x5 TEST_SUITE_END() // FP32 @@ -748,20 +493,36 @@ TEST_SUITE(FP16) using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, half, float>; TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x3DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} + +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(3U, 3U, 32U, 6U)), + make("Bias", TensorShape(6U)), + make("Output", TensorShape(6U, 6U, 6U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -770,20 +531,20 @@ TEST_SUITE_END() // Conv3x3 TEST_SUITE(Conv3x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x1DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -792,20 +553,20 @@ TEST_SUITE_END() // Conv3x1 TEST_SUITE(Conv1x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x3DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -814,10 +575,10 @@ TEST_SUITE_END() // Conv1x3 TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -825,11 +586,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x5DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(5U, 5U, 32U, 6U)), + make("Bias", TensorShape(6U)), + make("Output", TensorShape(9U, 9U, 6U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); @@ -838,10 +615,10 @@ TEST_SUITE_END() // Conv5x5 TEST_SUITE(Conv5x1) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -849,10 +626,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x1DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -862,10 +639,10 @@ TEST_SUITE_END() // Conv5x1 TEST_SUITE(Conv1x5) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -873,10 +650,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x5DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -886,10 +663,10 @@ TEST_SUITE_END() // Conv1x5 TEST_SUITE(Conv1x7) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsSmallDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -897,19 +674,47 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x7DatasetFp16Subset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); +} +FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, + combine( + make("Input", TensorShape(13U, 13U, 32U)), + make("Weight", TensorShape(1U, 7U, 32U, 6U)), + make("Bias", TensorShape(6U)), + make("Output", TensorShape(13U, 7U, 6U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16); } TEST_SUITE_END() // Conv1x7 -TEST_SUITE_END() // FP16 +TEST_SUITE(Conv7x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsSmallDataset, + make("DataLayout", { DataLayout::NHWC }))) + +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16); +} +TEST_SUITE_END() // Conv7x1 +TEST_SUITE_END() // FP16 TEST_SUITE_END() // ConvolutionLayer TEST_SUITE_END() // Winograd TEST_SUITE_END() // CL diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt new file mode 100644 index 0000000000..448e96c4f9 --- /dev/null +++ b/tests/validation/CMakeLists.txt @@ -0,0 +1,146 @@ +# Copyright (c) 2023 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +target_sources( + arm_compute_validation + PRIVATE UNIT/SafeIntegerOps.cpp + UNIT/Version.cpp + UNIT/TensorInfo.cpp + UNIT/TensorShape.cpp + UNIT/Utils.cpp + UNIT/SubTensorInfo.cpp + UNIT/WindowIterator.cpp + UNIT/LifetimeManager.cpp + UNIT/GPUTarget.cpp + CPP/DetectionPostProcessLayer.cpp + CPP/TopKV.cpp + CPP/DFT.cpp + CPP/Permute.cpp + CPP/NonMaximumSuppression.cpp) + +if(ENABLE_NEON) + target_sources( + arm_compute_validation + PRIVATE NEON/ElementwiseNegation.cpp + NEON/BoundingBoxTransform.cpp + NEON/ChannelShuffle.cpp + NEON/Logical.cpp + NEON/DilatedConvolutionLayer.cpp + NEON/PoolingLayer.cpp + NEON/BitwiseNot.cpp + NEON/FillBorder.cpp + NEON/ElementwiseRsqrtLayer.cpp + NEON/DepthConcatenateLayer.cpp + NEON/ElementwisePower.cpp + NEON/Fill.cpp + NEON/ROIPoolingLayer.cpp + NEON/LSTMLayer.cpp + NEON/ArithmeticSubtraction.cpp + NEON/GEMMLowp.cpp + NEON/Unstack.cpp + NEON/Slice.cpp + NEON/Pooling3dLayer.cpp + NEON/BitwiseOr.cpp + NEON/HeightConcatenateLayer.cpp + NEON/ReshapeLayer.cpp + NEON/SoftmaxLayer.cpp + NEON/Gather.cpp + NEON/CropResize.cpp + NEON/ReductionOperation.cpp + NEON/PixelWiseMultiplication.cpp + NEON/LogSoftmaxLayer.cpp + NEON/DepthConvertLayer.cpp + NEON/Flatten.cpp + NEON/ElementwiseKernelSelection.cpp + NEON/DepthToSpaceLayer.cpp + NEON/ElementwiseAbsoluteValue.cpp + NEON/PadLayer.cpp + NEON/MeanStdDevNormalizationLayer.cpp + NEON/GlobalPoolingLayer.cpp + NEON/RNNLayer.cpp + NEON/DetectionPostProcessLayer.cpp + NEON/ElementwiseRound.cpp + NEON/BitwiseXor.cpp + NEON/GEMM.cpp + NEON/FuseBatchNormalization.cpp + NEON/BitwiseAnd.cpp + NEON/ElementwiseMax.cpp + NEON/ReduceMean.cpp + NEON/Reverse.cpp + NEON/L2NormalizeLayer.cpp + NEON/Convolution3D.cpp + NEON/ArithmeticAddition.cpp + NEON/ActivationLayer.cpp + NEON/SpaceToBatchLayer.cpp + NEON/ElementwiseLog.cpp + NEON/LSTMLayerQuantized.cpp + NEON/Im2Col.cpp + NEON/DequantizationLayer.cpp + NEON/DeconvolutionLayer.cpp + NEON/Select.cpp + NEON/ElementwiseSin.cpp + NEON/PReluLayer.cpp + NEON/BatchNormalizationLayer.cpp + NEON/ElementwiseMin.cpp + NEON/InstanceNormalizationLayer.cpp + NEON/ROIAlignLayer.cpp + NEON/ElementwiseDivision.cpp + NEON/ElementwiseExpLayer.cpp + NEON/ArgMinMax.cpp + NEON/QLSTMLayerNormalization.cpp + NEON/Col2Im.cpp + NEON/Split.cpp + NEON/Transpose.cpp + NEON/GenerateProposalsLayer.cpp + NEON/StackLayer.cpp + NEON/WidthConcatenateLayer.cpp + NEON/NormalizationLayer.cpp + NEON/Copy.cpp + NEON/ElementwiseSquareDiff.cpp + NEON/MaxUnpoolingLayer.cpp + NEON/Permute.cpp + NEON/Comparisons.cpp + NEON/BatchConcatenateLayer.cpp + NEON/Tile.cpp + NEON/BatchToSpaceLayer.cpp + NEON/SpaceToDepthLayer.cpp + NEON/DepthwiseConvolutionLayerNative.cpp + NEON/QuantizationLayer.cpp + NEON/ConvertFullyConnectedWeights.cpp + NEON/Floor.cpp + NEON/FFT.cpp + NEON/Cast.cpp + NEON/PriorBoxLayer.cpp + NEON/Scale.cpp + NEON/ReorgLayer.cpp + NEON/Range.cpp + NEON/DirectConvolutionLayer.cpp + NEON/DepthwiseConvolutionLayer.cpp + NEON/FullyConnectedLayer.cpp + NEON/ConvolutionLayer.cpp + NEON/StridedSlice.cpp + NEON/ReorderLayer.cpp + NEON/UNIT/DynamicTensor.cpp + NEON/UNIT/TensorAllocator.cpp + NEON/UNIT/MemoryManager.cpp + NEON/UNIT/RuntimeContext.cpp) +endif() diff --git a/tests/validation/CPP/DFT.cpp b/tests/validation/CPP/DFT.cpp index e19e850589..84431399be 100644 --- a/tests/validation/CPP/DFT.cpp +++ b/tests/validation/CPP/DFT.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -125,7 +125,7 @@ DATA_TEST_CASE(Real, framework::DatasetMode::ALL, shapes_2d_dft, auto backward = reference::ridft_2d(forward, is_odd); // Validate with input - validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f)); + validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f), 0.f, AbsoluteTolerance<float>(0.001f)); } DATA_TEST_CASE(Complex, framework::DatasetMode::ALL, shapes_2d_dft, diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index eb8bdcf5a7..560460fd33 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,12 @@ * SOFTWARE. */ #include "tests/validation/Helpers.h" +#include "tests/framework/Asserts.h" #include <algorithm> #include <cmath> +#include <cstdint> +#include <tuple> namespace arm_compute { @@ -32,82 +35,6 @@ namespace test { namespace validation { -void fill_mask_from_pattern(uint8_t *mask, int cols, int rows, MatrixPattern pattern) -{ - unsigned int v = 0; - std::mt19937 gen(library->seed()); - std::bernoulli_distribution dist(0.5); - - for(int r = 0; r < rows; ++r) - { - for(int c = 0; c < cols; ++c, ++v) - { - uint8_t val = 0; - - switch(pattern) - { - case MatrixPattern::BOX: - val = 255; - break; - case MatrixPattern::CROSS: - val = ((r == (rows / 2)) || (c == (cols / 2))) ? 255 : 0; - break; - case MatrixPattern::DISK: - val = (((r - rows / 2.0f + 0.5f) * (r - rows / 2.0f + 0.5f)) / ((rows / 2.0f) * (rows / 2.0f)) + ((c - cols / 2.0f + 0.5f) * (c - cols / 2.0f + 0.5f)) / ((cols / 2.0f) * - (cols / 2.0f))) <= 1.0f ? 255 : 0; - break; - case MatrixPattern::OTHER: - val = (dist(gen) ? 0 : 255); - break; - default: - return; - } - - mask[v] = val; - } - } - - if(pattern == MatrixPattern::OTHER) - { - std::uniform_int_distribution<uint8_t> distribution_u8(0, ((cols * rows) - 1)); - mask[distribution_u8(gen)] = 255; - } -} - -HarrisCornersParameters harris_corners_parameters() -{ - HarrisCornersParameters params; - - std::mt19937 gen(library->seed()); - std::uniform_real_distribution<float> threshold_dist(0.f, 0.001f); - std::uniform_real_distribution<float> sensitivity(0.04f, 0.15f); - std::uniform_real_distribution<float> euclidean_distance(0.f, 30.f); - std::uniform_int_distribution<uint8_t> int_dist(0, 255); - - params.threshold = threshold_dist(gen); - params.sensitivity = sensitivity(gen); - params.min_dist = euclidean_distance(gen); - params.constant_border_value = int_dist(gen); - - return params; -} - -CannyEdgeParameters canny_edge_parameters() -{ - CannyEdgeParameters params; - - std::mt19937 gen(library->seed()); - std::uniform_int_distribution<uint8_t> int_dist(0, 255); - std::uniform_int_distribution<uint8_t> threshold_dist(2, 255); - - params.constant_border_value = int_dist(gen); - params.upper_thresh = threshold_dist(gen); // upper_threshold >= 2 - threshold_dist = std::uniform_int_distribution<uint8_t>(1, params.upper_thresh - 1); - params.lower_thresh = threshold_dist(gen); // lower_threshold >= 1 && lower_threshold < upper_threshold - - return params; -} - template <> SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src) { @@ -401,6 +328,249 @@ std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo return std::pair<int, int> { min_bound, max_bound }; } +void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &data_layout, bool only_right_pad) +{ + if(data_layout == DataLayout::NHWC) + { + constexpr unsigned int lower = 1U; + constexpr unsigned int upper = 16U; + + std::uniform_int_distribution<unsigned int> distribution(lower, upper); + size_t seed_offset = 0; + + for(ITensor *tensor : tensors) + { + ARM_COMPUTE_ERROR_ON(!tensor->info()->is_resizable()); + + std::mt19937 gen(library->seed() + seed_offset++); + + const unsigned int right = distribution(gen); + const unsigned int left = only_right_pad ? 0 : distribution(gen); + + tensor->info()->extend_padding(PaddingSize(0U, right, 0U, left)); + } + } +} + +QuantizationHint suggest_conv_dst_q_info_and_bias(const QuantizationInfo &in_q_info, + const QuantizationInfo &weight_q_info, + int32_t height, + int32_t width, + int32_t channels, + DataType data_type, + float bias_fraction) +{ + /** Quantization Setup of convolution + * + * Just like any other multiply-accummulate, convolution (2D) operation + * multiplies and accumulates the input and weight tensors. This operation + * takes place in three dimensions: height, width and channels. All of them + * belong to the weight tensor. + * + * The formula for simple convolution can be written as: + * C = sum_h sum_w sum_c(I[h_offset + h, w_offset + w, c] * W[h, w, c]) + * + * Here, h_offset and w_offset are the starting positions in the image. Effects + * of paddings are ignored. This accumulation reduces to something like + * + * C = sum_m(I_index * W_hwc) + * where m is height x width x channels. + * + * Non-unit strides and/or dilations do not change the probabilistic nature of + * this sum because we always iterate as the size of the weight tensor. + * + * Paddings may affect this summation, but it's a boundary condition and so is + * neglected for brevity. + */ + + return suggest_mac_dst_q_info_and_bias(in_q_info, weight_q_info, height * width * channels, data_type, bias_fraction); +} + +QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, + int32_t m, int32_t n, int32_t k, DataType data_type, + float bias_fraction) +{ + ARM_COMPUTE_UNUSED(m, n); + + /** Quantization Setup of matrix multiplication + * + * We have a matrix multiplication of the form C = A * B + D + * where A is (m X k), B is (k x n) and C is therefore (m x n). + * The bias, D is (1 x n). + * + * If we have some distributional statistics of A, B and D, i.e. mean and variance, + * we can estimate the mean and variance of a single value in C matrix and pick + * good scale and offset values for the output and have non-saturated tests. + * + * Each element in the output matrix can be calculated as follows: + * C_ij = sum_k(A_ik * B_kj) + D_j + * + * Note: All possible A_ik, B_kj, D_j random variables are assumed mutually independent. + * Note: In quantized operators, bias is an integer. But, its quantization scale is + * assumed to be equal to lhs_scale * rhs_scale, and offset equal to 0. + * Note: Since, bias is an integer that should be given as input, we need to pick responsible + * values when adding it on top of the summation. This is where "bias_fraction" comes + * into play. Based on the fraction given, we also return suggested bias range (min/max) + * for not saturating the output. + * + * Because all random variables are mutually independent, any C_ij has the same statistics, + * which is why we return a single destination quantization info object; which is why we can + * resort to a more general calculation explained in suggest_mac_dst_q_info_and_bias(). + * + * From a probabilistic perspective, the above calculation reduces to + * c = sum_k (a_k * b_k) + d + */ + + return suggest_mac_dst_q_info_and_bias(lhs_q_info, rhs_q_info, k, data_type, bias_fraction); +} + +QuantizationHint suggest_mac_dst_q_info_and_bias( + const QuantizationInfo &a_q_info, const QuantizationInfo &b_q_info, int32_t K, DataType data_type, float bias_fraction, int num_sd) +{ + QuantizationInfo c_q_info; + + ARM_COMPUTE_ASSERT(data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED); + + const int32_t t_max = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::max() : std::numeric_limits<int8_t>::max()); + const int32_t t_min = static_cast<int32_t>(data_type == DataType::QASYMM8 ? std::numeric_limits<uint8_t>::min() : std::numeric_limits<int8_t>::min()); + + /** Quantization Setup of multiply-accummulate + * + * Expression (in float): + * C = sum_k ( A_k * B_k ) + D + * + * Lemma: An affine transformation (i.e. aX + b) to a discrete uniform random variable + * creates another discrete uniform random variable. + * + * Terminology: + * E[X]: Mean of the random variable X (sometimes referred as mu_x) + * var(X): Variance of the random variable X (someimes referred as sigma^2_x) + * std(X): sqrt(var(X)), standard deviation of X + * + * 1) Calculate the mean: + * E[C] = sum_k( E[A_k] * E[B_k] ) + D = K * mean_a * mean_b + mean_d + * + * Since elements of A and B are uniformly distributed random variables, we have + * mean_a = (max_a + min_a) / 2, mean_b = (max_b + min_b ) / 2 + * max_a and min_a can be calculated with the scale_a/b and offset_a/b + * by replacing data type minimum and maximums in the equations + * + * We don't know mean_d because we have to choose it based on bias_fraction. If we call + * the summation as M_int, similar to above, we have: + * + * E[C_int] = sum_k( E[A_k_int] * E[B_k_int] ) + E[D_int] = K * mean_a_int * mean_b_int + mean_d_int + * \___________________________/ + * E[M_int] + * + * We choose a bias mean proportional to the integer summation. This proportion is "bias_fraction". + * So, we have D_int = f * M_int (f: fraction), and + * E[D_int] = mean_d_int = f * E[M_int] + * + * This also means, for floating point value of D, the following: + * E[D] = mean_d = E[D_int] * a_scale * b_scale + * + * 2) Calculate the variance: + * var(C) = sum_k( var(A_k * B_k) ) + var(D) + * = sum_k ( E[A_k^2 * B_k^2] - E[A_k]^2E[B_k^2] ) + * = ... + * = K * (var_a * var_b + var_a * mean^2_b + var_b * mean^2_a) + var_d + * + * Similarly, due to uniform random variable properties, we have + * var_a = (max_a - min_a)^2 / 12 + * var_b = (max_b - min_b)^2 / 12 + * + * Again, we don't know var_d as we don't know the bias. As set out in the previous section, we have + * var(D_int) = var(f * M_int) = f^2 * var(M_int) + * + * Using the same expression, we can find var(M_int): + * var(C_int) = sum_k( var(A_k_int * B_k_int) ) + var(D_int) + * = sum_k ( E[A_k_int^2 * B_k_int^2] - E[A_k_int]^2E[B_k_int^2] ) + * = ... + * = K * (var_a_int * var_b_int + var_a_int * mean^2_b_int + var_b_int * mean^2_a_int) + var_d_int + * \_______________________________________________________________________________/ + * var(M_int) + * + * Now, we know mean and variance of D_int, we can return a suitable bias range with + * [mean_d_int +/- 2 * std_d_int] + * + * This also means, for floating point value of D, the following: + * var(D) = var_d = var(D_int) * a_scale^2 * b_scale^2 + * + * E[D] and var(D) calculated in steps (1) and (2) can be substituted into E[C] and var(C) calculatons. + * + * 3) Now, we have an idea of what would an average C will look like and how much deviation + * is present around it. The exact distribution of C is difficult to come up with dependent on K. + * But, as K increases, due to Central Limit Theorem, it'll look more like a bell shaped figure, + * approaching normal distribution. + * + * This is useful because, in normal distribution, we know that values +- 2 std_deviation around + * the mean constitute 95% of the values. Therefore, setting a plausible range for us: + * C_range = [C_min, C_max] = [mean_c - 2 * std_c, mean_c + 2 * std_c] + * + * 4) + * If we map this [C_min, C_max] to [0, 255] or [-128, 127] depending on the signedness of the + * data type, we can find a suitable scale and offset for the output. On average, it's expected + * that 5% of the output values will saturate and 95% will remain in the range. + * + * The equations to be solved for offset_c and scale_c are: + * C_min = scale_c * (type_min - offset_c) + * C_max = scale_c * (type_max - offset_c) + */ + + const int32_t a_offset = a_q_info.uniform().offset; + const float a_scale = a_q_info.uniform().scale; + const int32_t b_offset = b_q_info.uniform().offset; + const float b_scale = b_q_info.uniform().scale; + + // Integer value statistics. Valid for both Lhs/A and Rhs/B + const float mean_a_int = (t_max + t_min) / 2.f; + constexpr float var_a_int = (256 * 256 - 1) / 12.f; // Discrete uniform RV variance + const float mean_b_int = mean_a_int; // A_int and B_int has the same stats + constexpr float var_b_int = var_a_int; + + // Lhs/A stats + const float max_a = (t_max - a_offset) * a_scale; + const float min_a = (t_min - a_offset) * a_scale; + const float mean_a = (max_a + min_a) / 2; + const float var_a = (max_a - min_a) * (max_a - min_a) / 12; + + // Rhs/B stats + const float max_b = (t_max - b_offset) * b_scale; + const float min_b = (t_min - b_offset) * b_scale; + const float mean_b = (max_b + min_b) / 2; + const float var_b = (max_b - min_b) * (max_b - min_b) / 12; + + // Integer multiplication output/M stats + const float mean_m_int = K * mean_a_int * mean_b_int; + const float var_m_int = K * (var_a_int * var_b_int + mean_a_int * var_b_int + mean_b_int + var_a_int); + const float std_m_int = sqrt(var_m_int); + + // Bias/D both Int and Float statistics + const float mean_d_int = bias_fraction * mean_m_int; + const float std_d_int = bias_fraction * std_m_int; + const float mean_d = a_scale * b_scale * mean_d_int; + const float std_d = a_scale * b_scale * std_d_int; + const float var_d = std_d * std_d; + + // Also calculate the suggested bias range + const int32_t min_bias = mean_d_int - (num_sd * std_d_int); + const int32_t max_bias = mean_d_int + (num_sd * std_d_int); + + // Output/C stats + const float mean_out = K * mean_a * mean_b + mean_d; + const float var_out = K * (var_a * var_b + var_a * mean_b * mean_b + var_b * mean_a * mean_a) + var_d; + const float std_out = sqrt(var_out); + + // Output quantization setup + const float scale_out = (2 * num_sd) * std_out / 255; + const int32_t offset_out = static_cast<int32_t>(t_min - (mean_out - (num_sd * std_out)) / scale_out); + + c_q_info = QuantizationInfo(scale_out, offset_out); + + return { c_q_info, min_bias, max_bias }; +} + template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord); template void get_tile(const SimpleTensor<half> &in, SimpleTensor<half> &roi, const Coordinates &coord); template void get_tile(const SimpleTensor<int> &in, SimpleTensor<int> &roi, const Coordinates &coord); @@ -413,6 +583,8 @@ template void transpose_matrix(const SimpleTensor<half> &in, SimpleTensor<half> template void transpose_matrix(const SimpleTensor<int> &in, SimpleTensor<int> &out); template void transpose_matrix(const SimpleTensor<short> &in, SimpleTensor<short> &out); template void transpose_matrix(const SimpleTensor<char> &in, SimpleTensor<char> &out); +template void transpose_matrix(const SimpleTensor<int8_t> &in, SimpleTensor<int8_t> &out); +template void transpose_matrix(const SimpleTensor<uint8_t> &in, SimpleTensor<uint8_t> &out); template void matrix_multiply(const SimpleTensor<float> &a, const SimpleTensor<float> &b, SimpleTensor<float> &out); template void matrix_multiply(const SimpleTensor<half> &a, const SimpleTensor<half> &b, SimpleTensor<half> &out); diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index 604840b33e..e044620556 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,16 +21,19 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_VALIDATION_HELPERS_H -#define ARM_COMPUTE_TEST_VALIDATION_HELPERS_H +#ifndef ACL_TESTS_VALIDATION_HELPERS_H +#define ACL_TESTS_VALIDATION_HELPERS_H #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/function_info/ActivationLayerInfo.h" + #include "support/Half.h" #include "tests/Globals.h" #include "tests/SimpleTensor.h" -#include <math.h> +#include <cmath> +#include <cstdint> #include <random> #include <type_traits> #include <utility> @@ -50,6 +53,23 @@ template <> struct is_floating_point<half> : public std::true_type { }; +template <> +struct is_floating_point<bfloat16> : public std::true_type +{ +}; + +/** Helper struct to store the hints for + * - destination quantization info + * - minimum bias value + * - maximum bias value + * in quantized test construction. + */ +struct QuantizationHint +{ + QuantizationInfo q_info; + int32_t bias_min; + int32_t bias_max; +}; /** Helper function to get the testing range for each activation layer. * @@ -63,13 +83,13 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation { std::pair<T, T> bounds; - switch(data_type) + switch (data_type) { case DataType::F16: { using namespace half_float::literal; - switch(activation) + switch (activation) { case ActivationLayerInfo::ActivationFunction::TANH: case ActivationLayerInfo::ActivationFunction::SQUARE: @@ -89,7 +109,7 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation break; } case DataType::F32: - switch(activation) + switch (activation) { case ActivationLayerInfo::ActivationFunction::SOFT_RELU: // Reduce range as exponent overflows @@ -111,72 +131,6 @@ std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::Activation return bounds; } -/** Fill mask with the corresponding given pattern. - * - * @param[in,out] mask Mask to be filled according to pattern - * @param[in] cols Columns (width) of mask - * @param[in] rows Rows (height) of mask - * @param[in] pattern Pattern to fill the mask according to - */ -void fill_mask_from_pattern(uint8_t *mask, int cols, int rows, MatrixPattern pattern); - -/** Calculate output tensor shape give a vector of input tensor to concatenate - * - * @param[in] input_shapes Shapes of the tensors to concatenate across depth. - * - * @return The shape of output concatenated tensor. - */ -TensorShape calculate_depth_concatenate_shape(const std::vector<TensorShape> &input_shapes); - -/** Calculate output tensor shape for the concatenate operation along a given axis - * - * @param[in] input_shapes Shapes of the tensors to concatenate across width. - * @param[in] axis Axis to use for the concatenate operation - * - * @return The shape of output concatenated tensor. - */ -TensorShape calculate_concatenate_shape(const std::vector<TensorShape> &input_shapes, size_t axis); - -/** Parameters of Harris Corners algorithm. */ -struct HarrisCornersParameters -{ - float threshold{ 0.f }; /**< Threshold */ - float sensitivity{ 0.f }; /**< Sensitivity */ - float min_dist{ 0.f }; /**< Minimum distance */ - uint8_t constant_border_value{ 0 }; /**< Border value */ -}; - -/** Generate parameters for Harris Corners algorithm. */ -HarrisCornersParameters harris_corners_parameters(); - -/** Parameters of Canny edge algorithm. */ -struct CannyEdgeParameters -{ - int32_t upper_thresh{ 255 }; - int32_t lower_thresh{ 0 }; - uint8_t constant_border_value{ 0 }; -}; - -/** Generate parameters for Canny edge algorithm. */ -CannyEdgeParameters canny_edge_parameters(); - -/** Helper function to fill the Lut random by a ILutAccessor. - * - * @param[in,out] table Accessor at the Lut. - * - */ -template <typename T> -void fill_lookuptable(T &&table) -{ - std::mt19937 generator(library->seed()); - std::uniform_int_distribution<typename T::value_type> distribution(std::numeric_limits<typename T::value_type>::min(), std::numeric_limits<typename T::value_type>::max()); - - for(int i = std::numeric_limits<typename T::value_type>::min(); i <= std::numeric_limits<typename T::value_type>::max(); i++) - { - table[i] = distribution(generator); - } -} - /** Convert an asymmetric quantized simple tensor into float using tensor quantization information. * * @param[in] src Quantized tensor. @@ -191,6 +145,7 @@ SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<T> &src); * @param[in] src Float tensor. * @param[in] quantization_info Quantification information. * + * \relates arm_compute::test::SimpleTensor * @return Quantized tensor. */ template <typename T> @@ -209,7 +164,7 @@ SimpleTensor<float> convert_from_symmetric(const SimpleTensor<T> &src); * * @param[in] src Float tensor. * @param[in] quantization_info Quantification information. - * + * \relates arm_compute::test::SimpleTensor * @return Quantized tensor. */ template <typename T> @@ -277,8 +232,83 @@ std::pair<int, int> get_quantized_qasymm8_signed_bounds(const QuantizationInfo & * @param[in] max Floating point maximum value to be quantized * @param[in] channel_id Channel id for per channel quantization info. */ -std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0); +std::pair<int, int> +get_symm_quantized_per_channel_bounds(const QuantizationInfo &quant_info, float min, float max, size_t channel_id = 0); + +/** Add random padding along the X axis (between 1 and 16 columns per side) to all the input tensors. + * This is used in our validation suite in order to simulate implicit padding addition after configuring, but before allocating. + * + * @param[in] tensors List of tensors to add padding to + * @param[in] data_layout (Optional) Data layout of the operator + * @param[in] only_right_pad (Optional) Only right padding testing, in case of cl image padding + * + * @note This function adds padding to the input tensors only if data_layout == DataLayout::NHWC + */ +void add_padding_x(std::initializer_list<ITensor *> tensors, + const DataLayout &data_layout = DataLayout::NHWC, + bool only_right_pad = false); + +/** For 2d convolution, given the Lhs/Rhs matrix quantization informations and the convolution dimension, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] in_q_info Input matrix quantization info + * @param[in] weight_q_info Weights matrix quantization info + * @param[in] height Height of the weights tensor + * @param[in] width Width of the weights tensors + * @param[in] channels Number of input channels + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction see @ref suggest_mac_dst_q_info_and_bias() for explanation + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range + */ +QuantizationHint suggest_conv_dst_q_info_and_bias(const QuantizationInfo &in_q_info, + const QuantizationInfo &weight_q_info, + int32_t height, + int32_t width, + int32_t channels, + DataType data_type, + float bias_fraction); + +/** For a matrix multiplication, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] lhs_q_info Lhs matrix quantization info + * @param[in] rhs_q_info Rhs matrix quantization info + * @param[in] m Number of rows of Lhs matrix + * @param[in] n Number of columns of Rhs Matrix + * @param[in] k Number of rows/columns of Rhs/Lhs Matrix + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction see @ref suggest_mac_dst_q_info_and_bias() for explanation + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range + */ +QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, + int32_t m, + int32_t n, + int32_t k, + DataType data_type, + float bias_fraction); + +/** For a multiply-accumulate (mac), given the Lhs/Rhs vector quantization informations and the dot product dimensions, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] lhs_q_info Lhs matrix quantization info + * @param[in] rhs_q_info Rhs matrix quantization info + * @param[in] k number of accumulations taking place in the sum, i.e. c_k = sum_k(a_k * b_k) + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation. + * @param[in] num_sd (Optional) number of standard deviations we allow from the mean. Default value is 2. + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range + */ +QuantizationHint suggest_mac_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, + int32_t k, + DataType data_type, + float bias_fraction, + int num_sd = 2); } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_VALIDATION_HELPERS_H */ +#endif // ACL_TESTS_VALIDATION_HELPERS_H diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp index 577603d07d..73f5de68ac 100644 --- a/tests/validation/NEON/ActivationLayer.cpp +++ b/tests/validation/NEON/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,10 +23,13 @@ */ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/RuntimeContext.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuActivationKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ActivationFunctionsDataset.h" @@ -37,7 +40,8 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ActivationLayerFixture.h" -#include "support/Requires.h" +#include "arm_compute/Acl.hpp" +#include "support/AclRequires.h" namespace arm_compute { @@ -65,14 +69,16 @@ RelativeTolerance<float> relative_tolerance(DataType data_type, ActivationLayerI case ActivationLayerInfo::ActivationFunction::SQRT: case ActivationLayerInfo::ActivationFunction::TANH: case ActivationLayerInfo::ActivationFunction::HARD_SWISH: + case ActivationLayerInfo::ActivationFunction::SWISH: + case ActivationLayerInfo::ActivationFunction::GELU: switch(data_type) { case DataType::F16: -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) return RelativeTolerance<float>(0.25f); -#else // !defined(__ARM_FEATURE_SVE) +#else // !defined(ENABLE_SVE) return RelativeTolerance<float>(0.1f); -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ENABLE_SVE) default: return RelativeTolerance<float>(0.05f); } @@ -80,11 +86,11 @@ RelativeTolerance<float> relative_tolerance(DataType data_type, ActivationLayerI switch(data_type) { case DataType::F16: -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) return RelativeTolerance<float>(0.9f); -#else // !defined(__ARM_FEATURE_SVE) +#else // !defined(ENABLE_SVE) return RelativeTolerance<float>(0.01f); -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ENABLE_SVE) default: return RelativeTolerance<float>(0.00001f); } @@ -107,15 +113,16 @@ AbsoluteTolerance<float> absolute_tolerance(DataType data_type, ActivationLayerI case ActivationLayerInfo::ActivationFunction::LOGISTIC: case ActivationLayerInfo::ActivationFunction::SQRT: case ActivationLayerInfo::ActivationFunction::TANH: + case ActivationLayerInfo::ActivationFunction::SWISH: case ActivationLayerInfo::ActivationFunction::HARD_SWISH: switch(data_type) { case DataType::F16: -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) return AbsoluteTolerance<float>(0.25f); -#else // !defined(__ARM_FEATURE_SVE) +#else // !defined(ENABLE_SVE) return AbsoluteTolerance<float>(0.01f); -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ENABLE_SVE) default: return AbsoluteTolerance<float>(0.00001f); } @@ -123,11 +130,11 @@ AbsoluteTolerance<float> absolute_tolerance(DataType data_type, ActivationLayerI switch(data_type) { case DataType::F16: -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) return AbsoluteTolerance<float>(0.9f); -#else // !defined(__ARM_FEATURE_SVE) +#else // !defined(ENABLE_SVE) return AbsoluteTolerance<float>(0.01f); -#endif // defined(__ARM_FEATURE_SVE) +#endif // defined(ENABLE_SVE) default: return AbsoluteTolerance<float>(0.00001f); } @@ -169,7 +176,8 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH)); +const auto NeonActivationFunctionsDataset = concat(datasets::ActivationFunctions(), + framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::SWISH })); /** Input data sets. */ const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), NeonActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); @@ -215,6 +223,48 @@ void test_float_sqrt_boundary_value() TEST_SUITE(NEON) TEST_SUITE(ActivationLayer) +/** Test case for memory injection in @ref cpu::CpuWinogradConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(ActivationAPI, framework::DatasetMode::ALL) +{ + acl::StatusCode err = acl::StatusCode::Success; + + // Create context & Queue + acl::Context ctx(acl::Target::Cpu, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::Queue queue(ctx, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + // Create activation operator + acl::TensorDescriptor src_info({ 2, 3 }, acl::DataType::Float32); + acl::TensorDescriptor dst_info({ 2, 3 }, acl::DataType::Float32); + acl::ActivationDesc desc{ AclRelu, 6.f, 0.f, false }; + + acl::Activation act(ctx, src_info, dst_info, desc, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + // Create tensors and feed + acl::Tensor src(ctx, src_info, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + acl::Tensor dst(ctx, dst_info, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::TensorPack pack(ctx); + err = pack.add(src, ACL_SRC); + err = pack.add(dst, ACL_DST); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + // Execute operator + err = act.run(queue, pack); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); +} + // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( @@ -236,6 +286,49 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + })), + combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + }))), + combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + }))), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuActivationKernel::get_implementation(ActivationDataTypeISASelectorData{data_type, CPUModel::GENERIC, cpu_isa,ActivationLayerInfo::ActivationFunction::BOUNDED_RELU}, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation"; + if( data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED) + { +#ifdef __aarch64__ + expected = "neon_q8_activation_lut"; +#else // __aarch64__ + expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_activation"; +#endif // __aarch64__ + } + std::string actual = selected_impl->name; + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* @@ -316,9 +409,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<int8_t>, fram TEST_SUITE_END() // QASYMM8_SIGNED /** Input data sets. */ -const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LOGISTIC, - ActivationLayerInfo::ActivationFunction::TANH - }); +const auto Int16QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationFunction", +{ + ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH, + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, +}); const auto Int16QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false }), Int16QuantizedActivationFunctionsDataset), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); diff --git a/tests/validation/NEON/AddMulAdd.cpp b/tests/validation/NEON/AddMulAdd.cpp new file mode 100644 index 0000000000..77e3d80fe6 --- /dev/null +++ b/tests/validation/NEON/AddMulAdd.cpp @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifdef __aarch64__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEAddMulAdd.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/AddMulAddFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +const AbsoluteTolerance<half> tolerance_fp16(half(0.1f)); /**< Tolerance for 16-bit floating point tests */ +constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance for quantized tests */ + +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + + // Boundaries are aligned with Quantized Data ranges -- DOUBLE check before changing + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, -2.f) +}); + +// QASYMM8 test quantizations +const auto qasymm8_input1_qinfo_set = framework::dataset::make("Input1QInfo", { QuantizationInfo(0.1, 10) }); // Representable Range: [-1, 24.5] +const auto qasymm8_input2_qinfo_set = framework::dataset::make("Input2QInfo", { QuantizationInfo(0.2, 60) }); // Representable Range: [-12, 39] +const auto qasymm8_bn_mul_qinfo_set = framework::dataset::make("BnMulInfo", { QuantizationInfo(0.001, 55) }); // Representable Range: [-0.11, 0.2] +const auto qasymm8_bn_add_qinfo_set = framework::dataset::make("BnAddInfo", { QuantizationInfo(0.02, 20) }); // Representable Range: [-0.4, 4.7] + +// Representable Range: [-9.36, 51.84], Expected F32 range: [-13, 63.5], leaving some space for saturation +const auto qasymm8_add_output_qinfo_set = framework::dataset::make("AddOutputInfo", { QuantizationInfo(0.24, 39) }); + +// Representable Range: [-4.8, 10.5], Expected FP32 range: [-6.985, 12.7], leaving some space for saturation +// This range also makes sense with the activation boundaries above, i.e. [-2, 8] for LU_BOUNDED_RELU and [0, 6] for BOUNDED_RELU +const auto qasymm8_final_output_qinfo_set = framework::dataset::make("FinalOutputInfo", { QuantizationInfo(0.06, 80) }); + +// QASYMM8_SIGNED test quantizations +const auto qasymm8_signed_input1_qinfo_set = framework::dataset::make("Input1QInfo", { QuantizationInfo(0.1, 10) }); // Representable Range: [-13.8, 11.7] +const auto qasymm8_signed_input2_qinfo_set = framework::dataset::make("Input2QInfo", { QuantizationInfo(0.2, -60) }); // Representable Range: [-13.6, 39.4] +const auto qasymm8_signed_bn_mul_qinfo_set = framework::dataset::make("BnMulInfo", { QuantizationInfo(0.001, 55) }); // Representable Range: [-0.183, 0.072] +const auto qasymm8_signed_bn_add_qinfo_set = framework::dataset::make("BnAddInfo", { QuantizationInfo(0.4, -120) }); // Representable Range: [-0.32, 9.08] + +// Representable Range: [-21.36, 39.84], Expected F32 range: [-27.4, 51.1], leaving some space for saturation +const auto qasymm8_signed_add_output_qinfo_set = framework::dataset::make("AddOutputInfo", { QuantizationInfo(0.24, -39) }); + +// Representable Range: [-4.8, 10.5], Expected FP32 range: [-9.6713, 14.0942], leaving some space for saturation +// This range also makes sense with the activation boundaries above, i.e. [-2, 8] for LU_BOUNDED_RELU and [0, 6] for BOUNDED_RELU +const auto qasymm8_signed_final_output_qinfo_set = framework::dataset::make("FinalOutputInfo", { QuantizationInfo(0.06, -48) }); + +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(AddMulAdd) + +template <typename T> +using NEAddMulAddFloatFixture = AddMulAddFloatValidationFixture<Tensor, Accessor, NEAddMulAdd, T, true>; + +template <typename T> +using NEAddMulAddFloatFixtureWoIntermOut = AddMulAddFloatValidationFixture<Tensor, Accessor, NEAddMulAdd, T, false>; + +TEST_SUITE(Float) + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp32); +} + +// This test is to stress the case when there is no intermediate output required (i.e. nullptr) +FIXTURE_DATA_TEST_CASE(RunSmallWithoutIntermOutput, NEAddMulAddFloatFixtureWoIntermOut<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo() }))) +{ + // Validate outputs + validate(Accessor(_target), _reference, tolerance_fp32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp32); +} + +TEST_SUITE_END() // F32 + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulAddFloatFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulAddFloatFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::F16)), + ActivationFunctionsDataset)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference); // Arithmetic Addition has more strict tolerance + validate(Accessor(_target), _reference, tolerance_fp16); +} +TEST_SUITE_END() // F16 +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +TEST_SUITE_END() // Float + +template <typename T> +using NEAddMulQuantizedFixture = AddMulAddQuantizedValidationFixture<Tensor, Accessor, NEAddMulAdd, T, true>; + +template <typename T> +using NEAddMulAddQuantizedFixtureWoIntermOut = AddMulAddQuantizedValidationFixture<Tensor, Accessor, NEAddMulAdd, T, false>; + +TEST_SUITE(Quantized) + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + ActivationFunctionsDataset), + qasymm8_input1_qinfo_set), + qasymm8_input2_qinfo_set), + qasymm8_bn_mul_qinfo_set), + qasymm8_bn_add_qinfo_set), + qasymm8_add_output_qinfo_set), + qasymm8_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + ActivationFunctionsDataset), + qasymm8_input1_qinfo_set), + qasymm8_input2_qinfo_set), + qasymm8_bn_mul_qinfo_set), + qasymm8_bn_add_qinfo_set), + qasymm8_add_output_qinfo_set), + qasymm8_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAddMulQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + ActivationFunctionsDataset), + qasymm8_signed_input1_qinfo_set), + qasymm8_signed_input2_qinfo_set), + qasymm8_signed_bn_mul_qinfo_set), + qasymm8_signed_bn_add_qinfo_set), + qasymm8_signed_add_output_qinfo_set), + qasymm8_signed_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEAddMulQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + ActivationFunctionsDataset), + qasymm8_signed_input1_qinfo_set), + qasymm8_signed_input2_qinfo_set), + qasymm8_signed_bn_mul_qinfo_set), + qasymm8_signed_bn_add_qinfo_set), + qasymm8_signed_add_output_qinfo_set), + qasymm8_signed_final_output_qinfo_set)) +{ + // Validate outputs + validate(Accessor(_interm_target), _interm_reference, tolerance_quant); + validate(Accessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // Quantized + +TEST_SUITE_END() // AddMulAdd +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // __aarch64__ diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp index 0a4071076a..91b8128dea 100644 --- a/tests/validation/NEON/ArgMinMax.cpp +++ b/tests/validation/NEON/ArgMinMax.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,27 @@ namespace test { namespace validation { +namespace +{ +const auto OpsDataset = framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }); +const auto AxisDataset = framework::dataset::make("Axis", { 0, 1, 2, 3 }); +const auto QInfoDataset = framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }); + +const auto ArgMinMaxSmallDatasetAxis0 = framework::dataset::make("Shape", +{ + TensorShape{ 1U, 5U }, + TensorShape{ 2U, 3U }, + TensorShape{ 1U }, + TensorShape{ 3U }, + TensorShape{ 2U }, + TensorShape{ 5U }, + TensorShape{ 17U }, + TensorShape{ 15U, 2U }, +}); +using ArgMinMaxSmallDataset = datasets::Small4DShapes; +using ArgMinMaxLargeDataset = datasets::Large4DShapes; +} + TEST_SUITE(NEON) TEST_SUITE(ArgMinMax) @@ -70,23 +91,50 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // clang-format on // *INDENT-ON* -template <typename T> -using NEArgMinMaxValidationFixture = ArgMinMaxValidationFixture<Tensor, Accessor, NEArgMinMaxLayer, T>; +template <typename T1, typename T2> +using NEArgMinMaxValidationFixture = ArgMinMaxValidationFixture<Tensor, Accessor, NEArgMinMaxLayer, T1, T2>; + +using NEArgMinMaxValidationFixture_S32_S32 = NEArgMinMaxValidationFixture<int32_t, int32_t>; +using NEArgMinMaxValidationFixture_F16_S32 = NEArgMinMaxValidationFixture<half, int32_t>; +using NEArgMinMaxValidationFixture_F32_S32 = NEArgMinMaxValidationFixture<float, int32_t>; +#ifdef __aarch64__ +using NEArgMinMaxValidationFixture_F32_S64 = NEArgMinMaxValidationFixture<float, int64_t>; +#endif // __aarch64__ TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, - NEArgMinMaxValidationFixture<int32_t>, +FIXTURE_DATA_TEST_CASE(RunSmallAxis0, + NEArgMinMaxValidationFixture_S32_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxSmallDatasetAxis0, + framework::dataset::make("DataTypeIn", DataType::S32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + framework::dataset::make("Axis", { 0 })), + OpsDataset)) { // Validate output validate(Accessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmall, + NEArgMinMaxValidationFixture_S32_S32, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(ArgMinMaxSmallDataset(), + framework::dataset::make("DataTypeIn", DataType::S32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} FIXTURE_DATA_TEST_CASE(RunLarge, - NEArgMinMaxValidationFixture<int32_t>, + NEArgMinMaxValidationFixture_S32_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxLargeDataset(), + framework::dataset::make("DataTypeIn", DataType::S32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -97,18 +145,26 @@ TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, - NEArgMinMaxValidationFixture<half>, + NEArgMinMaxValidationFixture_F16_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxSmallDataset(), + framework::dataset::make("DataTypeIn", DataType::F16)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, - NEArgMinMaxValidationFixture<half>, + NEArgMinMaxValidationFixture_F16_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxLargeDataset(), + framework::dataset::make("DataTypeIn", DataType::F16)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -118,18 +174,41 @@ TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, - NEArgMinMaxValidationFixture<float>, + NEArgMinMaxValidationFixture_F32_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxSmallDataset(), + framework::dataset::make("DataTypeIn", DataType::F32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(Accessor(_target), _reference); } +#ifdef __aarch64__ +FIXTURE_DATA_TEST_CASE(RunSmall_F32_S64, + NEArgMinMaxValidationFixture_F32_S64, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(ArgMinMaxSmallDataset(), + framework::dataset::make("DataTypeIn", DataType::F32)), + framework::dataset::make("DataTypeOut", DataType::S64)), + AxisDataset), + OpsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +#endif // __aarch64__ + FIXTURE_DATA_TEST_CASE(RunLarge, - NEArgMinMaxValidationFixture<float>, + NEArgMinMaxValidationFixture_F32_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) + combine(combine(combine(combine(ArgMinMaxLargeDataset(), + framework::dataset::make("DataTypeIn", DataType::F32)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -137,27 +216,35 @@ FIXTURE_DATA_TEST_CASE(RunLarge, TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float -template <typename T> -using NEArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<Tensor, Accessor, NEArgMinMaxLayer, T>; +template <typename T1, typename T2> +using NEArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<Tensor, Accessor, NEArgMinMaxLayer, T1, T2>; + +using NEArgMinMaxQuantizedValidationFixture_U8_S32 = NEArgMinMaxQuantizedValidationFixture<uint8_t, int32_t>; +using NEArgMinMaxQuantizedValidationFixture_S8_S32 = NEArgMinMaxQuantizedValidationFixture<int8_t, int32_t>; TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, - NEArgMinMaxQuantizedValidationFixture<uint8_t>, + NEArgMinMaxQuantizedValidationFixture_U8_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxSmallDataset(), + framework::dataset::make("DataTypeIn", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(Accessor(_target), _reference); } - FIXTURE_DATA_TEST_CASE(RunLarge, - NEArgMinMaxQuantizedValidationFixture<uint8_t>, + NEArgMinMaxQuantizedValidationFixture_U8_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxLargeDataset(), + framework::dataset::make("DataTypeIn", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -166,22 +253,27 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, - NEArgMinMaxQuantizedValidationFixture<int8_t>, + NEArgMinMaxQuantizedValidationFixture_S8_S32, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxSmallDataset(), + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(Accessor(_target), _reference); } - FIXTURE_DATA_TEST_CASE(RunLarge, - NEArgMinMaxQuantizedValidationFixture<int8_t>, + NEArgMinMaxQuantizedValidationFixture_S8_S32, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) }))) + combine(combine(combine(combine(combine(ArgMinMaxLargeDataset(), + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", DataType::S32)), + AxisDataset), + OpsDataset), + QInfoDataset)) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index 98341805ed..535c3e634e 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,12 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuAddKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ConvertPolicyDataset.h" @@ -43,31 +46,13 @@ namespace validation { namespace { -#if !defined(__aarch64__) || defined(__ARM_FEATURE_SVE) +#if !defined(__aarch64__) || defined(ENABLE_SVE) constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ -#else // !defined(__aarch64__) || defined(__ARM_FEATURE_SVE) +#else // !defined(__aarch64__) || defined(ENABLE_SVE) constexpr AbsoluteTolerance<float> tolerance_quant(0); -#endif // !defined(__aarch64__) || defined(__ARM_FEATURE_SVE) - -/** Input data sets **/ -const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType", - DataType::U8)); -const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), - framework::dataset::make("DataType", DataType::S16)); -const auto ArithmeticAdditionS32Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S32 }), framework::dataset::make("DataType", DataType::S32)), - framework::dataset::make("DataType", DataType::S32)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataType", DataType::F32)); -const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataType", DataType::QASYMM8)); -const auto ArithmeticAdditionQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); -const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), - framework::dataset::make("DataType", DataType::QSYMM16)); +#endif // !defined(__aarch64__) || defined(ENABLE_SVE) +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(NEON) @@ -79,25 +64,22 @@ using NEArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<Tensor, // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Unsupported broadcast TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),// Mismatching shapes }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(1U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false, false})), + framework::dataset::make("Expected", { true, false, false, false})), input1_info, input2_info, output_info, expected) { Status s = NEArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), @@ -106,6 +88,63 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( ConvertPolicy::WRAP); ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS); } + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, concat(concat( + combine(combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::U8, + DataType::S16, + DataType::S32, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + })), + framework::dataset::make("CanUseFixedpoint", {true, false})), + combine(combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::U8, + DataType::S16, + DataType::S32 + })), + framework::dataset::make("CanUseFixedpoint", {true, false}))), + combine(combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED, + DataType::QSYMM16 + })), + framework::dataset::make("CanUseFixedpoint", {true, false}))), + cpu_ext, data_type, can_use_fixedpoint) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{data_type, cpu_isa, can_use_fixedpoint}, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + bool qasymm8_any = (data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED); + + std::string expected; + if(qasymm8_any && can_use_fixedpoint) + { + expected = "neon_" + cpu_impl_dt(data_type) + "_add_fixedpoint"; + } + else + { + expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_add"; + } + + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* @@ -127,8 +166,10 @@ TEST_CASE(NoPaddingAdded, framework::DatasetMode::PRECOMMIT) TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::U8)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -136,15 +177,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<uint8_t>, framework TEST_SUITE_END() // U8 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S16)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::S16)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -152,8 +197,10 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<int16_t>, framework TEST_SUITE_END() // S16 TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S32)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -164,8 +211,9 @@ TEST_SUITE_END() // Integer TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -174,15 +222,19 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -191,17 +243,19 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture<float>, framework:: template <typename T> using NEArithmeticAdditionBroadcastFixture = ArithmeticAdditionBroadcastValidationFixture<Tensor, Accessor, NEArithmeticAddition, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(), - ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(), - ArithmeticAdditionFP32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticAdditionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -220,11 +274,12 @@ TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), - framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); @@ -235,22 +290,24 @@ TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8SIGNEDDataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - framework::dataset::make("Src0QInfo", { QuantizationInfo(0.5f, 20) })), - framework::dataset::make("Src1QInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) }))) + combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("Src0QInfo", { QuantizationInfo(0.5f, 20) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); } -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionQuantizedBroadcastFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine( - datasets::SmallShapesBroadcast(), ArithmeticAdditionQASYMM8SIGNEDDataset), +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticAdditionQuantizedBroadcastFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine( + datasets::SmallShapesBroadcast(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("Src0QInfo", { QuantizationInfo(0.5f, 20) })), framework::dataset::make("Src1QInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) }))) + framework::dataset::make("OutQInfo", { QuantizationInfo(0.5f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); @@ -261,11 +318,12 @@ TEST_SUITE(QSYMM16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQSYMM16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), - framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), - framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), - framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) }))) + combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QSYMM16)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), + framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_quant); diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp index 7a36893445..8886ca2db5 100644 --- a/tests/validation/NEON/ArithmeticSubtraction.cpp +++ b/tests/validation/NEON/ArithmeticSubtraction.cpp @@ -50,45 +50,16 @@ constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< Tolerance value fo #endif //__aarch64__ constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ -/** Input data sets **/ -const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataType", DataType::QASYMM8)); - -const auto ArithmeticSubtractionQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); - -const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), - framework::dataset::make("DataType", DataType::QSYMM16)), - framework::dataset::make("DataType", DataType::QSYMM16)); - -const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), - framework::dataset::make("DataType", DataType::U8)), - framework::dataset::make("DataType", DataType::U8)); - -const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), - framework::dataset::make("DataType", DataType::S16)), - framework::dataset::make("DataType", DataType::S16)); - -const auto ArithmeticSubtractionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), - framework::dataset::make("DataType", DataType::S32)), - framework::dataset::make("DataType", DataType::S32)); -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataType", DataType::F16)); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataType", DataType::F32)); - +// Quantization Infomation DataSet const auto ArithmeticSubtractionQuantizationInfoDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(10, 120) }), framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(20, 110) })), framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(15, 125) })); const auto ArithmeticSubtractionQuantizationInfoSignedDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.5f, 10) }), framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.5f, 20) })), framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.5f, 50) })); +const auto ArithmeticSubtractionQuantizationInfoSignedInPlaceDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.8f, 10) }), + framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.8f, 10) })), + framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.8f, 10) })); const auto ArithmeticSubtractionQuantizationInfoSymmetric = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.3f, 0) }), framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.7f, 0) })), framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.2f, 0) })); @@ -105,35 +76,31 @@ using NEArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<Te // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::QASYMM8), // Mismatching types TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid convert policy }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), })), - framework::dataset::make("ConvertPolicy",{ ConvertPolicy::WRAP, - ConvertPolicy::SATURATE, - ConvertPolicy::SATURATE, - ConvertPolicy::WRAP, - ConvertPolicy::WRAP, - ConvertPolicy::WRAP, + framework::dataset::make("ConvertPolicy",{ ConvertPolicy::SATURATE, + ConvertPolicy::SATURATE, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, })), - framework::dataset::make("Expected", { true, true, false, false, false, false})), + framework::dataset::make("Expected", { true, false, false, false, false})), input1_info, input2_info, output_info, policy, expected) { ARM_COMPUTE_EXPECT(bool(NEArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy)) == expected, framework::LogLevel::ERRORS); @@ -194,7 +161,8 @@ TEST_CASE(InvalidBroadcastBoth, framework::DatasetMode::ALL) TEST_SUITE_END() // InPlaceValidate TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionU8Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::U8)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -210,10 +178,11 @@ using NEArithmeticSubtractionQSYMM16Fixture = ArithmeticSubtracti TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8Fixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionQASYMM8Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8Fixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::QASYMM8)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), ArithmeticSubtractionQuantizationInfoDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -222,19 +191,17 @@ TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine( - datasets::SmallShapes(), - ArithmeticSubtractionQASYMM8SIGNEDDataset), + datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), ArithmeticSubtractionQuantizationInfoSignedDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } - FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionQASYMM8SignedBroadcastFixture, framework::DatasetMode::ALL, combine(combine(combine(combine( datasets::SmallShapesBroadcast(), - ArithmeticSubtractionQASYMM8SIGNEDDataset), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), ArithmeticSubtractionQuantizationInfoSignedDataset), OutOfPlaceDataSet)) @@ -242,12 +209,22 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionQASYMM8SignedBr // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEArithmeticSubtractionQASYMM8SignedBroadcastFixture, framework::DatasetMode::ALL, combine(combine(combine(combine( + datasets::TinyShapesBroadcastInplace(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + ArithmeticSubtractionQuantizationInfoSignedInPlaceDataset), + InPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM16) FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQSYMM16Fixture, framework::DatasetMode::ALL, combine(combine(combine(combine( datasets::SmallShapes(), - ArithmeticSubtractionQSYMM16Dataset), + framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), ArithmeticSubtractionQuantizationInfoSymmetric), OutOfPlaceDataSet)) @@ -259,7 +236,8 @@ TEST_SUITE_END() // QSYMM16 TEST_SUITE_END() // Quantized TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -267,7 +245,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framew validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset), +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::S16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -277,7 +256,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framew TEST_SUITE_END() // S16 TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS32Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -285,7 +265,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int32_t>, framew validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS32Dataset), +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::S32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -297,7 +278,8 @@ TEST_SUITE_END() // S32 TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F16)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -308,7 +290,8 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), InPlaceDataSet)) { @@ -316,7 +299,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<float>, framewor validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionFP32Dataset), +FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -328,7 +312,7 @@ template <typename T> using NEArithmeticSubtractionBroadcastFixture = ArithmeticSubtractionBroadcastValidationFixture<Tensor, Accessor, NEArithmeticSubtraction, T>; FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { @@ -337,7 +321,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEArithmeticSubtractionBroadcastFixtur } FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEArithmeticSubtractionBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(), - ArithmeticSubtractionFP32Dataset), + framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), OutOfPlaceDataSet)) { diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp index a1ae6971f4..50eaf0c667 100644 --- a/tests/validation/NEON/BatchNormalizationLayer.cpp +++ b/tests/validation/NEON/BatchNormalizationLayer.cpp @@ -51,7 +51,7 @@ namespace RelativeTolerance<float> rel_tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.015f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto act_infos = framework::dataset::make("ActivationInfo", diff --git a/tests/validation/NEON/BatchToSpaceLayer.cpp b/tests/validation/NEON/BatchToSpaceLayer.cpp index a305dcbcc4..8cf11b7b95 100644 --- a/tests/validation/NEON/BatchToSpaceLayer.cpp +++ b/tests/validation/NEON/BatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,55 +49,38 @@ using NEBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<Tensor, Ac // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), // Wrong data type block shape - TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape - }), - framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(2U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(2U, 4U), 1, DataType::S32), - TensorInfo(TensorShape(4U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(2U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(2U, 2U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32), - })), - framework::dataset::make("Expected", { true, true, true, false, false, false})), - input_info, block_shape_info, output_info, expected) -{ - bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))); - ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); -} -DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip( +DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blockx > blocky - TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // blockx != blocky && blocky > blockx - TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Negative block shapes - TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blockx > blocky + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: blockx != blocky && blocky > blockx + TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Mismatching data types + TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32), // Invalid: Negative block shapes + TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32),// Unsupported tensor rank + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid batch dimension) + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid output tensor shape (invalid spatial dimension) + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Supported: correct tensor shape with cropping + TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32), // Invalid tensor shape with cropping }), - framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })), - framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })), + framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2, 2, 2, 2, 2 })), + framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2, 2, 2, 2, 2 })), + framework::dataset::make("CropInfo", { + CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{3, 2, 1, 3}, CropInfo{3, 2, 1, 3} + })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(64U, 16U, 2U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 32U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16), TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32), TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(33U, 32U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(27, 12U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 2U, 4U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, true, false, false, false})), - input_info, block_shape_x, block_shape_y, output_info, expected) + framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true, false})), + input_info, block_shape_x, block_shape_y, crop_info, output_info, expected) { - bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false))); + bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false), crop_info)); ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -112,6 +95,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchToSpaceLayerFixture<float>, framework::D // Validate output validate(Accessor(_target), _reference); } + +FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, NEBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) @@ -129,6 +122,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchToSpaceLayerFixture<half>, framework::Da // Validate output validate(Accessor(_target), _reference); } +FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, NEBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp index db73bea9cb..b56594546b 100644 --- a/tests/validation/NEON/Cast.cpp +++ b/tests/validation/NEON/Cast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,12 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NECast.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuCastKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ConvertPolicyDataset.h" @@ -34,7 +37,6 @@ #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/CastFixture.h" - namespace arm_compute { namespace test @@ -99,6 +101,11 @@ const auto CastF32toS32Dataset = combine(framework::dataset::make("Da const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); const auto CastF32toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); +// U64 +const auto CastU64toF32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F32)); + +// S64 +const auto CastS64toF32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F32)); } // namespace TEST_SUITE(NEON) @@ -106,6 +113,8 @@ TEST_SUITE(Cast) template <typename T> using NECastToU8Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint8_t>; template <typename T> +using NECastToS8Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int8_t>; +template <typename T> using NECastToU16Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint16_t>; template <typename T> using NECastToS16Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int16_t>; @@ -114,6 +123,10 @@ using NECastToU32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, ui template <typename T> using NECastToS32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int32_t>; template <typename T> +using NECastToU64Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint64_t>; +template <typename T> +using NECastToS64Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, int64_t>; +template <typename T> using NECastToF16Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, half>; template <typename T> using NECastToF32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, float>; @@ -187,6 +200,66 @@ CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture<float>, CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance) CAST_SUITE(F32_to_U8, DataType::F32, DataType::S32, NECastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance) +#ifdef __aarch64__ +// S64 +CAST_SUITE(S64_to_F32, DataType::S64, DataType::F32, NECastToF32Fixture<int64_t>, CastS64toF32Dataset, zero_tolerance) + +// U64 +CAST_SUITE(U64_to_F32, DataType::U64, DataType::F32, NECastToF32Fixture<uint64_t>, CastU64toF32Dataset, zero_tolerance) +#endif // __aarch64__ + +DATA_TEST_CASE(KernelSelectionDstFP16, framework::DatasetMode::ALL, + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", +{ + DataType::F16, + DataType::U8, + DataType::S32, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED, +})), +cpu_ext, data_type) +{ + using namespace cpu::kernels; + const CpuCastKernel::CastKernel *selected_impl; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = true; + + selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F16, cpu_isa }, cpu::KernelSelectionType::Preferred); + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_cast"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(KernelSelectionSrcFP32, framework::DatasetMode::ALL, + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", +{ + DataType::F16, +})), +cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ DataType::F32, data_type, cpu_isa }, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_fp32_to_" + cpu_impl_dt(data_type) + "_cast"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + TEST_SUITE_END() // Cast TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/Col2Im.cpp b/tests/validation/NEON/Col2Im.cpp index 9139f0cca8..7eb8cbf0f6 100644 --- a/tests/validation/NEON/Col2Im.cpp +++ b/tests/validation/NEON/Col2Im.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "src/core/NEON/kernels/NECol2ImKernel.h" +#include "src/cpu/kernels/CpuCol2ImKernel.h" #include "tests/NEON/Helper.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" @@ -39,7 +39,7 @@ namespace validation TEST_SUITE(NEON) TEST_SUITE(Col2Im) -using NECol2Im = NESynthetizeFunction<NECol2ImKernel>; +using CpuCol2Im = NESynthetizeFunction<cpu::kernels::CpuCol2ImKernel>; // *INDENT-OFF* // clang-format off @@ -59,7 +59,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("Expected", { false, false, false, true })), input_info, output_info, convolved_width, convolved_height, expected) { - bool status = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height))); + bool status = bool(CpuCol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height))); ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS); } // clang-format on diff --git a/tests/validation/NEON/Convolution3D.cpp b/tests/validation/NEON/Convolution3D.cpp new file mode 100644 index 0000000000..4185488742 --- /dev/null +++ b/tests/validation/NEON/Convolution3D.cpp @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEConv3D.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DirectConvolution3DFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */ +const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */ + +/** Activation function Dataset*/ +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) +}); + +const auto data_precommit = combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + datasets::SmallDirectConv3DShapes(), + framework::dataset::make("StrideX", { 1, 5, 8 })), + framework::dataset::make("StrideY", { 1, 2, 3 })), + framework::dataset::make("StrideZ", { 1, 2, 1 })), + framework::dataset::make("PadX", { 0, 1, 2 })), + framework::dataset::make("PadY", { 0, 2, 1 })), + framework::dataset::make("PadZ", { 0, 3, 5 })), + framework::dataset::make("KernelWidth", { 3, 5, 9 })), + framework::dataset::make("KernelHeight", { 2, 1, 3 })), + framework::dataset::make("KernelDepth", { 1, 2, 3 })), + framework::dataset::make("NumKernels", { 2, 3, 8 })), + framework::dataset::make("HasBias", { true, false })), + ActivationFunctionsDataset); +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(Convolution3D) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Mismatching data type input/weights + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Mismatching input feature maps + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid weights dimensions + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NHWC), // Invalid data layout + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid biases size + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid biases dimensions + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::F32, DataLayout::NDHWC), // Invalid output size + TensorInfo(TensorShape(27U, 13U, 2U, 4U), 1U, DataType::U32, DataLayout::NDHWC), // Invalid data type + }), + framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F16), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 3U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U, 3U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 3U, 3U, 3U, 2U), 1U, DataType::U32), + })), + framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1U, DataType::F32), + TensorInfo(TensorShape(4U), 1U, DataType::F32), + TensorInfo(TensorShape(4U), 1U, DataType::F32), + TensorInfo(TensorShape(4U), 1U, DataType::F32), + TensorInfo(TensorShape(3U), 1U, DataType::F32), + TensorInfo(TensorShape(4U, 2U), 1U, DataType::F32), + TensorInfo(TensorShape(4U), 1U, DataType::F32), + TensorInfo(TensorShape(4U), 1U, DataType::F32), + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(26U, 11U, 4U), 1U, DataType::F32), + TensorInfo(TensorShape(25U, 11U, 4U), 1U, DataType::U32), + })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false, false})), + input_info, weights_info, biases_info, output_info, expected) +{ + const Conv3dInfo conv3d_info(Size3D(1, 1, 1), Padding3D(0, 0, 0), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false); + bool is_valid = bool(NEConv3D::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv3d_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using NEDirectConvolution3DFixture = DirectConvolution3DValidationFixture<Tensor, Accessor, NEConv3D, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(data_precommit, + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NDHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // FP32 + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(data_precommit, + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NDHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); +} +TEST_SUITE_END() // FP16 +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +TEST_SUITE_END() // Float + +template <typename T> +using NEDirectConvolution3DQuantizedFixture = DirectConvolution3DValidationQuantizedFixture<Tensor, Accessor, NEConv3D, T>; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolution3DQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U), + TensorShape(15U, 7U, 11U, 7U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U) + }), + framework::dataset::make("StrideX", { 1, 3, 2, 1 })), + framework::dataset::make("StrideY", { 2, 1, 3, 1 })), + framework::dataset::make("StrideZ", { 3, 2, 1, 1 })), + framework::dataset::make("PadX", { 0, 2, 1, 0 })), + framework::dataset::make("PadY", { 1, 0, 2, 0 })), + framework::dataset::make("PadZ", { 2, 1, 0, 0 })), + framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })), + framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })), + framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })), + framework::dataset::make("NumKernels", { 5, 3, 1, 11 })), + framework::dataset::make("HasBias", { true, true, true, false })), + framework::dataset::make("Activation", ActivationLayerInfo())), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", DataLayout::NDHWC)), + framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))), + framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))), + framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5)))) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + +TEST_SUITE_END() // Convolution3D +TEST_SUITE_END() // Neon +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 6b152c9b68..d739d4e1a4 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,11 +28,16 @@ #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" + +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "src/cpu/operators/CpuGemmConv2d.h" +#include "src/cpu/operators/CpuGemmDirectConv2d.h" +#include "src/cpu/operators/CpuWinogradConv2d.h" + #include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" #include "tests/datasets/LargeConvolutionLayerDataset.h" #include "tests/datasets/SmallConvolutionLayerDataset.h" -#include "tests/datasets/TinyConvolutionLayerDataset.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" @@ -46,6 +51,8 @@ namespace test { namespace validation { +using framework::dataset::make; + namespace detail { template <> @@ -77,10 +84,17 @@ const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */ constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ + +#ifdef ARM_COMPUTE_ENABLE_SME +// TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode. +// Temporarily increase the tolerance for quantized data. +constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +#else // ARM_COMPUTE_ENABLE_SME +constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +#endif // ARM_COMPUTE_ENABLE_SME /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -88,14 +102,41 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, DataType::QASYMM8, }); -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +const auto ActivationFunctionsDataset = make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f) }); -const auto QuantizationData = framework::dataset::make("QuantizationInfo", +const auto NoActivation = make("ActivationInfo", +{ + ActivationLayerInfo(), +}); + +const auto ActivationFunctionsDatasetNightly = make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), +#ifdef __aarch64__ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU), +#endif // __aarch64__ +}); + +const auto QuantizationData = make("QuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(0.3f, 3), @@ -110,32 +151,32 @@ TEST_SUITE(ConvolutionLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32), + make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32), TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32), TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32) }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16) })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), + make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32), TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32), TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32) })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(3, 2, 1, 0) })), - framework::dataset::make("FastMath", { true, + make("FastMath", { true, true, false, false })), - framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), + make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), input_info, weights_info, output_info, conv_info, fast_math, expected) { ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true), @@ -147,30 +188,267 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z // *INDENT-ON* TEST_SUITE_END() // ConvolutionLayer +/* + Testing Strategy of Neon Winograd: + - There is no need to thoroughly test nchw cases because winograd kernels accept + nhwc and the tensors are permuted before and after if they're nchw. + - Except relu and bounded relu, testing activations for a single input + combination is enough because activation is not fused into winograd and called + separately. +*/ TEST_SUITE(WinogradLayer) template <typename T> using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>; +template <typename T> +using NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true>; template <typename T> using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>; +/** Test case for memory injection in @ref cpu::CpuWinogradConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) +{ + auto winograd = std::make_unique<cpu::CpuWinogradConv2d>(); + const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32); + const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32); + const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32); + const PadStrideInfo pad_info{}; + + winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info); + + // telhs are newly created every call of this lambda function + auto a = create_tensor<Tensor>(src_info); + auto b = create_tensor<Tensor>(b_info); + auto c = create_tensor<Tensor>(w_info); + a.allocator()->allocate(); + b.allocator()->allocate(); + c.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &a }, { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack); + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + + run_pack.add_tensor(TensorType::ACL_DST, &dst); + library->fill_tensor_value(Accessor(a), 1.f); + library->fill_tensor_value(Accessor(b), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + + // This operator is configured once and captured by this lambda. + winograd->prepare(prep_pack); + winograd->run(run_pack); + return dst; + }; + + auto result_0 = run_conv(); + auto result_1 = run_conv(); + + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +/** Test case for memory injection in @ref NEWinogradConvolutionLayer. + * + * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) +{ + auto gemm = std::make_unique<NEWinogradConvolutionLayer>(); + const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32); + const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32); + const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32); + const PadStrideInfo pad_info{}; + + auto run_conv = [&]() + { + auto src = create_tensor<Tensor>(src_info); + auto w = create_tensor<Tensor>(w_info); + auto b = create_tensor<Tensor>(b_info); + auto dst = create_tensor<Tensor>(dst_info); + + gemm->configure(&src, &b, &w, &dst, pad_info); + + src.allocator()->allocate(); + b.allocator()->allocate(); + w.allocator()->allocate(); + dst.allocator()->allocate(); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(b), 2.f); + library->fill_tensor_value(Accessor(w), 3.f); + gemm->run(); + return dst; + }; + + auto result_0 = run_conv(); + auto result_1 = run_conv(); + + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip( + make("WeightsInfo", +{ + // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted + + // Fp32, NCHW/NHWC (layout does not matter as it's ) + // 3x1, 1x3, 3x3 --> all TRUE + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // 7x1, 1x7, 7x7 + // --> all FALSE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW), + + // Fp16 + TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + + // 5x1, 1x5, 5x5 --> all TRUE + TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + + // 7x1, 1x7, 7x7 + // --> all FALSE + TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + + // unsupported kernel sizes + TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW), + +}), +make("Expected", +{ + // fp32 + true, true, true, // 3x3, 1x3, 3x1 + true, true, true, // 5x5, 1x5, 5x1 + false, true, true, // 7x7, 1x7, 7x1 + false, false, false, // random unsupported kernels + + // fp16 + true, false, false, // 3x3, 1x3, 3x1 + false, false, false, // 5x5, 1x5, 5x1 + false, false, false, // 7x7, 1x7, 7x1 + false, false, false, // random unsupported kernels +})), +weights_info_const, expected_const) +{ + DataType data_type = weights_info_const.data_type(); + DataLayout data_layout = weights_info_const.data_layout(); + + TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type); + TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type); + TensorInfo weights_info = weights_info_const; + + if(data_layout == DataLayout::NHWC) + { + // Convert to NHWC + PermutationVector perm = PermutationVector(2U, 0U, 1U); + + TensorShape input_shape = input_info.tensor_shape(); + TensorShape weights_shape = weights_info.tensor_shape(); + permute(input_shape, perm); + permute(weights_shape, perm); + + input_info.set_tensor_shape(input_shape); + weights_info.set_tensor_shape(weights_shape); + + input_info.set_data_layout(data_layout); + weights_info.set_data_layout(data_layout); + bias_info.set_data_layout(data_layout); + } + + PadStrideInfo conv_info(1, 1, 0, 0); + + TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info); + TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout); + + Status status = NEWinogradConvolutionLayer::validate( + &input_info, + &weights_info, + &bias_info, + &output_info, + conv_info, + ActivationLayerInfo(), + true /* fast math */); + + Status fp16_supported = ::arm_compute::error_on_unsupported_cpu_fp16("N/A", "N/A", 0, &input_info); + bool expected = expected_const && static_cast<bool>(fp16_supported); + + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} + TEST_SUITE(FP32) TEST_SUITE(Conv1x3) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(8U, 8U, 32U)), + make("Weight", TensorShape(1U, 3U, 32U, 1U)), + make("Bias", TensorShape(1U)), + make("Output", TensorShape(8U, 6U, 1U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -180,19 +458,19 @@ TEST_SUITE_END() // Conv1x3 TEST_SUITE(Conv3x1) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -202,19 +480,19 @@ TEST_SUITE_END() // Conv3x1 TEST_SUITE(Conv1x5) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -224,19 +502,19 @@ TEST_SUITE_END() // Conv1x5 TEST_SUITE(Conv5x1) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -246,10 +524,10 @@ TEST_SUITE_END() // Conv5x1 TEST_SUITE(Conv7x1) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); @@ -257,9 +535,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + make("DataType", { DataType::F32 })), + make("ActivationInfo", { ActivationLayerInfo() })), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -268,20 +546,20 @@ TEST_SUITE_END() // Conv7x1 TEST_SUITE(Conv1x7) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_1xN_f32); @@ -290,20 +568,40 @@ TEST_SUITE_END() // Conv1x7 TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } + +/// It's enough to run the activations for a single weight/input combination and data type because +/// activation function is called on top of the winograd output as a separate operator +/// TODO: Enable after COMPMID-6573 is resolved +FIXTURE_DATA_TEST_CASE(RunActivations, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + make("Input", TensorShape(3U, 3U, 32U)), + make("Weight", TensorShape(3U, 3U, 32U, 4U)), + make("Bias", TensorShape(4U)), + make("Output", TensorShape(1U, 1U, 4U)), + make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1U, 1U)), + make("DataType", { DataType::F32 }), + ActivationFunctionsDatasetNightly, + make("DataLayout", { DataLayout::NHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference, abs_tolerance_f32); +} + FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -314,20 +612,20 @@ TEST_SUITE_END() // Conv3x3 TEST_SUITE(Conv5x5) FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(), + make("DataType", { DataType::F32 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -337,12 +635,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, frame TEST_SUITE_END() // Conv5x5 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - datasets::SmallWinogradConvolutionLayer5x5Dataset()), - framework::dataset::make("DataType", { DataType::F32 })), - ActivationFunctionsDataset), - - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(framework::dataset::concat( + datasets::SmallWinogradConvolutionLayer3x3Dataset(), + datasets::SmallWinogradConvolutionLayer5x5Dataset()), + make("DataType", { DataType::F32 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output validate(Accessor(_target), _reference, abs_tolerance_f32); @@ -354,12 +652,39 @@ TEST_SUITE_END() // FP32 TEST_SUITE(FP16) using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>; +DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip( + make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16), + TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16) + }), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16), + TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16) + }), + make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16) + }), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0) + }), + make("FastMath", +{ + false, // case fp16 and fast_math False then disable Winograd + true // case fp16 and fast_math True then enable Winograd +}), +make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })), +input_info, weights_info, output_info, conv_info, fast_math, expected) +{ + ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true), + &weights_info.clone()->set_is_resizable(true), + &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} + TEST_SUITE(Conv3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F16 }), + ActivationFunctionsDataset, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output @@ -367,10 +692,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr } FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), - framework::dataset::make("DataType", { DataType::F16 })), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(), + make("DataType", { DataType::F16 }), + make("ActivationInfo", { ActivationLayerInfo() }), + make("DataLayout", { DataLayout::NHWC }))) { // Validate output @@ -381,16 +706,470 @@ TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE_END() // WinogradLayer +#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS +TEST_SUITE(FIXED_FORMAT_KERNELS) +TEST_SUITE(VariableWeightUtils) + +// UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it. + +template <typename ConvolutionClass> +using HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>; + +template <typename ConvolutionClass> +using HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>; + +// UC2_1 + +FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} +FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +// UC2_2_* tests: the user requests a specific fixed format, and a +// kernel that support that fixed format is found. + +FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS); +} + +#if defined(ARM_COMPUTE_ENABLE_BF16) +// These tests currently only works with SVE length 256 +// If other SVE length is used a kernel will fail to be found +// This needs to be addressed in order to ensure it doesn't revert to FP32 kernels for systems with SVE length other than 256 +FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) +{ + if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); + } +} + +FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 }))) +{ + if(Scheduler::get().cpu_info().has_bf16() && (arm_gemm::utils::get_vector_length<float>() == 8)){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); + } +} + +#endif // ARM_COMPUTE_ENABLE_BF16 + +// UC3_1_* tests: the user queries for ANY fixed format, but there is +// no kernel that support the use case specified by the user (for +// example, there is no fixed format kernel for the datatype of the +// problem). + +FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::S32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::S32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::S32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::S32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS); +} + +// UC3_2_* tests: the user queries for ANY fixed format. The search +// succeeded and the fixed format found is prompted back for +// consumption by the user. Note that we just test the +// _computed_weight_format to be anything but not the formats that are +// not fixed formats (ANY and UNSPECIFIED). This is because the weight +// format that the runtime produces depends on the size of the vector +// units of the hardware where the tests is executed. For example, a +// format like OHWIo4 for FP32 data returned for 128-bit NEON hardware +// is replaced by OHWIo8 when running on 256-bit SVE. + +FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); +} + +FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); +} + +#if defined(ARM_COMPUTE_ENABLE_BF16) + +FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + if(Scheduler::get().cpu_info().has_bf16()){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } +} + +FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL, + combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY }))) +{ + if(Scheduler::get().cpu_info().has_bf16()){ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } + else{ + ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS); + } +} + +#endif // ARM_COMPUTE_ENABLE_BF16 + +namespace +{ +using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>; +auto prepare_weights_shapes = framework::dataset::make("TensorShape", +{ + // OHWIo<interleave_by>i<block_by> + // + // OHWI --> O'HWI', where: + // + // O'= smallest multiple of <interleave_by> such that O<=O' + // I'= smallest multiple of <block_by> such that I<=I' + // + + // Change N for OHWIo4 + TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }), + // // Change N for OHWIo8 + TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }), + // // Change N for OHWIo4 when H, W and C are not 1 + TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }), + + // // Fix N and move HWI around, with different data layouts and formats + TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }), + TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }), + + // // Adding <block_by> on I (=C) + TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }), + TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }), + TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }), + + // --------- + TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }), + TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }), + +}); +} // unnamed namespace + +DATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL, + prepare_weights_shapes, shapes) +{ + const TensorShape input_shape = std::get<0>(shapes); + const TensorShape expected_shape = std::get<1>(shapes); + const arm_compute::WeightFormat wf = std::get<2>(shapes); + const DataType DT = DataType::F32; + const DataLayout DL = DataLayout::NHWC; + const auto TI = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL); + const TensorInfo computed_info = ::arm_compute::test::validation::prepare_weights(TI, wf); + ARM_COMPUTE_EXPECT_EQUAL(computed_info.tensor_shape(), expected_shape, framework::LogLevel::ERRORS); +} + +TEST_SUITE_END() // VariableWeightUtils + +TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures) + +template <typename ScalarType> +using VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>; + +FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F32 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} + +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F16 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#if defined(ARM_COMPUTE_ENABLE_BF16) +template <typename ScalarType> +using VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>; + +FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F32 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} +#endif // ARM_COMPUTE_ENABLE_BF16 + +TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures + +TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures) + +template <typename ScalarType> +using NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>; + +FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F32 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} + +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F16 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16)); +} +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#if defined(ARM_COMPUTE_ENABLE_BF16) +template <typename ScalarType> +using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>; + +FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL, + combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("ACL Scalar type", { DataType::F32 }))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} +#endif // ARM_COMPUTE_ENABLE_BF16 + +TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures +TEST_SUITE_END() // FIXED_FORMAT_KERNELS + +#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS + TEST_SUITE(GEMMConvolutionLayer) template <typename T> using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>; +template <typename T> +using NEGEMMConvolutionLayerPaddedWeightsFixture = ConvolutionValidationPaddedWeightsFixture<Tensor, Accessor, NEConvolutionLayer, T>; +template <typename T> +using NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>; + +/** Test case for memory injection in @ref cpu::CpuGemmConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique<cpu::CpuGemmConv2d>(); + const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW); + const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW); + auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR); + WeightsInfo weights_info(false, 3U, 3U, 1U); + conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info); + + // tensors are newly created every call of this lambda function + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + // This operator is configured once and captured by this lambda. + conv->prepare(prep_pack); + conv->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +/** Test case for memory injection in @ref NEGEMMConvolutionLayer. + * + * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique<NEGEMMConvolutionLayer>(); + const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW); + const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW); + auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW); + const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR); + WeightsInfo weights_info(false, 3U, 3U, 1U); + auto run_conv = [&]() + { + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + auto dst = create_tensor<Tensor>(dst_info); + conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + conv->run(); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} TEST_SUITE(Float) -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) +#if defined(ARM_COMPUTE_ENABLE_BF16) TEST_SUITE(BFLOAT16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::BFLOAT16)), + framework::dataset::make("DataType", Scheduler::get().cpu_info().has_bf16() ? DataType::BFLOAT16 : DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { @@ -398,7 +1177,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); } TEST_SUITE_END() // BFLOAT16 -#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ +#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -424,11 +1203,59 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), + framework::dataset::make("Bias", TensorShape(2U))), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + framework::dataset::make("Dilation", Size2D(1, 1))), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} +/** Padded weights + * CpuGemmConv2d uses two different paths for reshaping the weights based on if the weight tensor has holes (a common + * way to have "holes" in tensor is via extended paddings) + * + * We only need to test the padded weight path here on a single floating data type and a single layout, because the fallback path is agnostic of them + */ +FIXTURE_DATA_TEST_CASE(RunPaddedWeights, NEGEMMConvolutionLayerPaddedWeightsFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true }), + framework::dataset::make("DataType", DataType::F32), + framework::dataset::make("DataLayout", { DataLayout::NHWC }) + )) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} + +// This very large shape test is required to test heuristic paths where the tensor size is > 1e7 bytes +// and weight dimensions larger than 7 +FIXTURE_DATA_TEST_CASE(RunVeryLarge, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine(datasets::VeryLargeConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true }), + framework::dataset::make("DataType", DataType::F32), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32)); +} + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +// TODO: COMPMID-6596 Extend quantized tests with at least one suite where the weight is padded (the legacy case, see floating point's RunPaddedWeights) template <typename T> using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>; +template <typename T> +using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>; template <typename T> using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>; @@ -440,17 +1267,39 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f) }); TEST_SUITE(Quantized) +/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored +/// This is because instead of using the same quantization information for all the tensors, the fixture generates +/// separate quantization info for each input and the output tensor. +/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged +/// again, with the explicitly specified quantization info removed TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), + framework::dataset::make("Bias", TensorShape(2U))), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + framework::dataset::make("Dilation", Size2D(1, 1))), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })), + QuantizedActivationFunctionsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) @@ -458,12 +1307,29 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), + framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), + framework::dataset::make("Bias", TensorShape(2U))), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + framework::dataset::make("Dilation", Size2D(1, 1))), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("QuantizationInfoIfActivationEnabled", { QuantizationInfo(2.f / 255.f, 10) })), + QuantizedActivationFunctionsDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM8_PER_CHANNEL) @@ -491,6 +1357,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannel // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } + +FIXTURE_DATA_TEST_CASE(MemoryStressLargeChannels, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, + framework::DatasetMode::ALL, + combine( + make("In", TensorShape(1U)), + make("Weights", TensorShape(1U, 1U, 1U, 17000U)), + make("Biases", TensorShape(17000U)), + make("Out", TensorShape(1U, 1U, 17000U)), + make("Info", PadStrideInfo(1, 1, 0, 0)), + make("Dilation", Size2D(1, 1)), + make("ReshapeWeights", { true }), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("DataLayout", { DataLayout::NHWC }), + make("QuantizationInfo", QuantizationInfo(0.5f, 10)), + make("ActivationInfo", ActivationLayerInfo()), + make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized @@ -500,6 +1387,99 @@ TEST_SUITE(DirectGEMMConv2d) template <typename T> using NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>; +/** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique<cpu::CpuGemmDirectConv2d>(); + const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC); + const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC); + const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC); + auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC); + const auto conv_info = Conv2dInfo{}; + conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info); + + // tensors are newly created every call of this lambda function + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + // This operator is configured once and captured by this lambda. + conv->prepare(prep_pack); + conv->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +/** Test case for memory injection in @ref NEGEMMConv2d. + * + * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) +{ + auto conv = std::make_unique<NEGEMMConv2d>(); + const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC); + const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC); + const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC); + auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC); + const auto conv_info = Conv2dInfo{}; + auto run_conv = [&]() + { + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + auto dst = create_tensor<Tensor>(dst_info); + conv->configure(&src, &weight, &bias, &dst, conv_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + conv->run(); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(), diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index adb5d1709d..b4c049f6f9 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,54 +47,86 @@ constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for constexpr AbsoluteTolerance<float> tolerance_quantized(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const RelativeTolerance<half_float::half> tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float tolerance_num_fp16 = 0.02f; /**< Tolerance number for FP16 tests -- follows a slightly stricter approach compared to ConvolutionLayer tests */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ -constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +constexpr float tolerance_num_quant = 0.07f; /**< Tolerance number for quantized types */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) - * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2) - * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1) - * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", +{ + 3 +}); -const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", - 2) - *framework::dataset::make("PadLeft", 3) - *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); +const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape +{ + 10U, 10U, 1U, 1U +}) +*framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", 2) *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", + 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); -const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", - 2) - *framework::dataset::make("PadLeft", 3) - *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); +const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape +{ + 640U, 360U, 56U, 1U +}) +*framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", 2) *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", + 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2) - * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) - * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", +{ + 3 +}); + +const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", +{ + 3 +}); -const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); +const auto data_layouts_dataset = framework::dataset::make("DataLayout", +{ + DataLayout::NCHW, DataLayout::NHWC +}); -const auto add_bias_dataset = framework::dataset::make("AddBias", { true, false }); +const auto add_bias_dataset = framework::dataset::make("AddBias", +{ + true, false +}); const auto input_qinfo_dataset = framework::dataset::make("InputQInfo", { QuantizationInfo(1.f / 255.f, 0), - QuantizationInfo(2.f, 0), + QuantizationInfo(2.f, 0), }); const auto output_qinfo_dataset = framework::dataset::make("OutputQInfo", { QuantizationInfo(3.f / 255.f, 0), - QuantizationInfo(4.f, 0), + QuantizationInfo(4.f, 0), }); + } // namespace TEST_SUITE(NEON) TEST_SUITE(DeconvolutionLayer) - // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( @@ -104,6 +136,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(2U,2U,1U,1U), 1, DataType::F32), // Small shape no padding + TensorInfo(TensorShape(3U,26U,26U,1U), 1, DataType::F32), // Negative padding }), framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), @@ -111,6 +145,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32), TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(3U,3U,1U,1U), 1, DataType::F32), + TensorInfo(TensorShape(1U,1U,26U,88U), 1, DataType::F32), })), framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16), TensorInfo(TensorShape(1U), 1, DataType::F32), @@ -118,6 +154,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(25U, 11U), 1, DataType::F32), TensorInfo(TensorShape(1U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + TensorInfo(TensorShape(88U), 1, DataType::F32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32), @@ -125,6 +163,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32), TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(4U,4U,1U,1U), 1, DataType::F32), + TensorInfo(TensorShape(1U,78U,88U,1U), 1, DataType::F32), })), framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), @@ -132,8 +172,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 3, 3, 1), })), - framework::dataset::make("Expected", { false, false, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, false, false, true,true, false })), input_info, weights_info, bias_info, output_info, pad_info, expected) { bool is_valid = bool(NEDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info)); @@ -157,6 +199,9 @@ using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture< template <typename T> using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>; +template <typename T> +using NEDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 5, 1>; + TEST_SUITE(Float) TEST_SUITE(FP32) TEST_SUITE(W4x4) @@ -220,6 +265,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9<float>, fra validate(Accessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W9x9 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // W5x1 TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -230,7 +284,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<half>, framework::Dat add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); } TEST_SUITE_END() // W4x4 TEST_SUITE(W3x3) @@ -240,14 +294,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3<half>, framework add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset), add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); } TEST_SUITE_END() // W3x3 TEST_SUITE(W1x1) @@ -256,9 +310,18 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<half>, framework::Dat add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_fp16); + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); +} +TEST_SUITE_END() // W5x1 TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ @@ -273,6 +336,21 @@ using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantized template <typename T> using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>; +template <typename T> +using NEDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 5, 1>; + +template <typename T> +using NEDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 4, 4>; + +template <typename T> +using NEDeconvolutionLayerQuantizedPerChannelFixture3x3 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 3, 3>; + +template <typename T> +using NEDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 1, 1>; + +template <typename T> +using NEDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDeconvolutionLayer, T, int8_t, 5, 1>; + TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) @@ -285,7 +363,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } TEST_SUITE_END() // W4x4 @@ -299,7 +377,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3, framework::dataset::make("DataType", @@ -310,7 +388,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } TEST_SUITE_END() // W3x3 @@ -323,10 +401,23 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) @@ -340,7 +431,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<int8_t>, fra add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } TEST_SUITE_END() // W4x4 @@ -354,7 +445,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<int8_t> add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3, framework::dataset::make("DataType", @@ -365,24 +456,160 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<int8_t> add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } TEST_SUITE_END() // W3x3 TEST_SUITE(W1x1) -FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), data_layouts_dataset), input_qinfo_dataset), output_qinfo_dataset), add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8_SIGNED + +const auto input_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", +{ + QuantizationInfo(1.f / 255.f, 10) +}); +const auto output_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", +{ + QuantizationInfo(3.f / 255.f, 0) +}); +const auto input_signed_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", +{ + QuantizationInfo(1.f / 255.f, -10) +}); +const auto output_signed_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", +{ + QuantizationInfo(3.f / 255.f, 10) +}); + +TEST_SUITE(QSYMM8_PER_CHANNEL) + +TEST_SUITE(W4x4) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture4x4<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_per_channel_dataset), + output_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data4x4, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_per_channel_dataset), + output_signed_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W4x4 + +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture3x3<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_per_channel_dataset), + output_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data3x3, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_per_channel_dataset), + output_signed_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W3x3 + +TEST_SUITE(W1x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture1x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_per_channel_dataset), + output_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data1x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_per_channel_dataset), + output_signed_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W1x1 + +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture5x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_per_channel_dataset), + output_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture5x1<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_per_channel_dataset), + output_signed_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W5x1 + +TEST_SUITE_END() // QSYMM8_PER_CHANNEL + TEST_SUITE_END() // Quantized TEST_SUITE_END() // DeconvolutionLayer diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp index 60631181bf..4972708144 100644 --- a/tests/validation/NEON/DepthConvertLayer.cpp +++ b/tests/validation/NEON/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,25 +56,21 @@ const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::ma const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertLayerBF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::BFLOAT16), framework::dataset::make("DataType", DataType::F32)); const auto DepthConvertLayerF16toU8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8)); const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); const auto DepthConvertLayerF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32)); const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); const auto DepthConvertLayerF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); const auto DepthConvertLayerF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertLayerF32toBF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::BFLOAT16)); const auto DepthConvertLayerS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32)); const auto DepthConvertLayerS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8)); const auto DepthConvertLayerS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16)); const auto DepthConvertLayerS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertLayerF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); -const auto DepthConvertLayerF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); -const auto DepthConvertLayerShiftDatasetNightly = framework::dataset::make("Shift", 0, 7); -const auto DepthConvertLayerShiftDatasetPrecommit = framework::dataset::make("Shift", { 0, 3, 6 }); -const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0); +const auto DepthConvertLayerF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto DepthConvertLayerF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0); constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); constexpr AbsoluteTolerance<int32_t> tolerance_one_int32(1); @@ -108,7 +104,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( ConvertPolicy::WRAP, ConvertPolicy::WRAP, })), - framework::dataset::make("Shift",{ 1, 1, 1, 1, 1, 1, 8, 1, + framework::dataset::make("Shift",{ 0, 0, 0, 1, 1, 1, 8, 1, })), framework::dataset::make("Expected", { false, false, false, false, true})), input_info, output_info, policy, shift, expected) @@ -129,8 +125,6 @@ using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<Tensor template <typename T> using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint32_t>; template <typename T> -using NEDepthConvertLayerToBF16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, bfloat16>; -template <typename T> using NEDepthConvertLayerToF16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, half>; template <typename T> using NEDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, float>; @@ -188,7 +182,7 @@ TEST_SUITE_END() // QASYMM8_to_S32 TEST_SUITE(U8_to_U16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -196,7 +190,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU16Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -206,7 +200,7 @@ TEST_SUITE_END() // U8_to_U16 TEST_SUITE(U8_to_S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -214,7 +208,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS16Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -223,7 +217,7 @@ TEST_SUITE_END() // U8_to_S16 TEST_SUITE(U8_to_S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -231,7 +225,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -241,7 +235,7 @@ TEST_SUITE_END() // U8_to_S32 TEST_SUITE(U8_to_F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toF32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -249,7 +243,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toF32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -260,7 +254,7 @@ TEST_SUITE_END() // U8_to_F32 TEST_SUITE(U8_to_F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toF16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -268,7 +262,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<uint8_t>, frame FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toF16Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -279,14 +273,14 @@ TEST_SUITE_END() // U8_to_F36 TEST_SUITE(U16_to_U8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -296,14 +290,14 @@ TEST_SUITE_END() // U16_to_U8 TEST_SUITE(U16_to_U32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -313,14 +307,14 @@ TEST_SUITE_END() // U16_to_U32 TEST_SUITE(S16_to_U8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); @@ -330,42 +324,20 @@ TEST_SUITE_END() // S16_to_U8 TEST_SUITE(S16_to_S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetPrecommit)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDatasetNightly)) + DepthConvertLayerZeroShiftDataset)) { // Validate output validate(Accessor(_target), _reference); } TEST_SUITE_END() // S16_to_S32 -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) -TEST_SUITE(BFLOAT16_to_F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<bfloat16>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerBF16toF32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerZeroShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // BFLOAT16_to_F32 - -TEST_SUITE(F32_to_BFLOAT16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToBF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toBF16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerZeroShiftDataset)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // F32_to_BFLOAT16 -#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ - #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16_to_QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index 6bb40be036..e9609b7b72 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,32 +42,70 @@ namespace test { namespace validation { +using framework::dataset::make; using namespace arm_compute::misc::shape_calculator; namespace { -constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ +constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8_SIGNED */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 8 }); -const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 32 }); +const auto depth_multipliers = make("DepthMultiplier", { 1, 2, 8 }); +const auto large_depth_multipliers = make("DepthMultiplier", { 5, 32 }); -//Activation Functions -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +// Activation Functions +const auto NoActivation = make("ActivationInfo", ActivationLayerInfo()); + +const auto ActivationFunctionsDataset = make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }); -const auto input_qinfo_dataset = framework::dataset::make("InputQInfo", +const auto ActivationFunctionsDatasetNightly = make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f), +#ifdef __aarch64__ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU), +#endif // __aarch64__ +}); + +const auto ActivationFunctionsQuantizedSmallDataset = make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}); + +const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f), +}); + +// This is only used when there is fused activation +const auto input_qinfo_dataset = make("InputQInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10), }); + +const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo()); + } // namespace TEST_SUITE(NEON) @@ -76,7 +114,7 @@ TEST_SUITE(DepthwiseConvolutionLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching data type input/weights + make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching data type input/weights TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32), // Mismatching input feature maps TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Unsupported weights dimensions TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32), // Mismatching depth multiplier @@ -88,7 +126,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // dilation < 1 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(5U, 5U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32), @@ -100,7 +138,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32), })), - framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32), + make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F32), @@ -112,7 +150,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32), + make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32), TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32), TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32), TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32), @@ -124,7 +162,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), @@ -136,7 +174,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), })), - framework::dataset::make("DepthMultiplier", { 1, + make("DepthMultiplier", { 1, 1, 1, 3, @@ -148,7 +186,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip 1, 1, })), - framework::dataset::make("Dilation", { Size2D(1U, 1U), + make("Dilation", { Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), @@ -160,7 +198,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip Size2D(0U, 1U), Size2D(1U, 1U), })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })), + make("Expected", { false, false, false, false, false, false, false, false, false, false, true })), input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier,dilation, expected) { bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), @@ -169,7 +207,7 @@ DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip } DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights + make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32), // Mismatching input feature maps TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching depth multiplier TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size @@ -178,7 +216,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // Patch size bigger than input width TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32), // Dilation < 1 }), - framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16), + make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16), TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32), @@ -187,7 +225,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32), })), - framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32), + make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(2U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), @@ -196,7 +234,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip TensorInfo(TensorShape(16U), 1, DataType::F32), TensorInfo(TensorShape(16U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), + make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), @@ -205,7 +243,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32), })), - framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), @@ -214,7 +252,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), })), - framework::dataset::make("DepthMultiplier", { 1, + make("DepthMultiplier", { 1, 1, 3, 1, @@ -223,7 +261,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip 2, 2, })), - framework::dataset::make("Dilation", { Size2D(1U, 1U), + make("Dilation", { Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), @@ -232,7 +270,7 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip Size2D(25U, 1U), Size2D(0U, 1U), })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false})), + make("Expected", { false, false, false, false, false, false, false, false})), input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier,dilation, expected) { bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier, ActivationLayerInfo(), dilation)); @@ -242,36 +280,60 @@ DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip // *INDENT-ON* template <typename T> using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>; +template <typename T> +using NEDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>; +template <typename T> +using NEDepthwiseConvolutionLayerVariableWeightsFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, false, false, true>; TEST_SUITE(Float) TEST_SUITE(F32) + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 5 }), + make("DataType", DataType::F32), + make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }), + ActivationFunctionsDatasetNightly)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} + TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + make("DepthMultiplier", { 2 })), + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f32); } - TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); @@ -279,10 +341,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -292,9 +354,9 @@ TEST_SUITE_END() // Generic TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); @@ -302,10 +364,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -313,9 +375,9 @@ TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); @@ -323,10 +385,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -337,31 +399,70 @@ TEST_SUITE_END() // W3x3 TEST_SUITE(Optimized) FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", + make("DepthMultiplier", 1)), + make("DataType", + DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1)), + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1)), + make("DataType", DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("ActivationInfo", ActivationLayerInfo()))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", + make("DepthMultiplier", 1)), + make("DataType", + DataType::F32)), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsSmall5x5, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + make("DepthMultiplier", 1)), + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", + make("DepthMultiplier", 1)), + make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) +{ + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE_NEW(RunVariableWeightsLarge3x3, NEDepthwiseConvolutionLayerVariableWeightsFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1)), + make("DataType", + DataType::F32)), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -370,22 +471,37 @@ TEST_SUITE_END() // F32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 5 }), + make("DataType", DataType::F16), + make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }), + ActivationFunctionsDatasetNightly)) +{ + validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); +} + TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } @@ -394,9 +510,8 @@ TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataType", DataType::F16)), + make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); @@ -404,10 +519,9 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), large_depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataType", DataType::F16)), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } @@ -419,9 +533,9 @@ using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFi TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); @@ -429,10 +543,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f16); } @@ -442,9 +556,9 @@ TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); @@ -452,10 +566,10 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, f FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), large_depth_multipliers), - framework::dataset::make("DataType", + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f16); } @@ -466,31 +580,31 @@ TEST_SUITE_END() // W3x3 TEST_SUITE(Optimized) FIXTURE_DATA_TEST_CASE_NEW(RunSmallW3x3, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", + make("DepthMultiplier", 1)), + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } FIXTURE_DATA_TEST_CASE_NEW(RunSmallW5x5, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", + make("DepthMultiplier", 1)), + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } FIXTURE_DATA_TEST_CASE_NEW(RunLargeW3x3, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", + make("DepthMultiplier", 1)), + make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_f16); } @@ -501,94 +615,162 @@ TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float template <typename T> -using NEDepthwiseConvolutionLayerQuantizedFixtureOptimized = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>; -template <typename T> using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>; +template <typename T> +using NEDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T, true>; using NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture = DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, uint8_t, int8_t>; TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 5 }), + make("DataType", DataType::QASYMM8), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.05f, 4) }), + make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }), + ActivationFunctionsQuantizedDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, NEDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + make("DepthMultiplier", { 2 }), + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } - TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers, + make("DataType", DataType::QASYMM8), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) - -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers, + make("DataType", DataType::QASYMM8), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } @@ -596,39 +778,69 @@ TEST_SUITE_END() // Dilation TEST_SUITE_END() // W3x3 TEST_SUITE(Optimized) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout3x3, NEDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NHWC }), + NoActivation)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } @@ -636,143 +848,242 @@ TEST_SUITE_END() // Optimized TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 5 }), + make("DataType", DataType::QASYMM8_SIGNED), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.05f, 4) }), + make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }), + ActivationFunctionsQuantizedDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.8f, 1) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) + combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - large_depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.7f, 10) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } TEST_SUITE_END() // Dilation TEST_SUITE_END() // W3x3 TEST_SUITE(Optimized) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } -FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8_SIGNED), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixtureOptimized<int8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8_SIGNED)), - input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - ActivationFunctionsDataset)) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + NoActivation)) { - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE_NEW(RunSmall5x5WithActivation, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset5x5(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8_SIGNED), + input_qinfo_dataset, + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }), + ActivationFunctionsQuantizedSmallDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + make("DepthMultiplier", 1), + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationInfo, + IgnoredQuantizationInfo, + make("DataLayout", { DataLayout::NCHW }), + NoActivation)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } TEST_SUITE_END() // Optimized TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QSYMM8_PER_CHANNEL) + +FIXTURE_DATA_TEST_CASE_NEW(RunActivations, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::NIGHTLY, + combine( + make("In", TensorShape(33U, 27U, 11U, 3U)), + make("Weights", Size2D(3U, 4U)), + make("Info", PadStrideInfo(1, 2, 0, 1)), + make("Dilation", Size2D(2U, 2U)), + make("DepthMultiplier", { 5 }), + make("InputDataType", DataType::QASYMM8), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL), + make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }), + make("DstQuantizationInfo", { QuantizationInfo(0.05f, 4) }), + make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW }), + ActivationFunctionsQuantizedDataset)) +{ + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("InputDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("InputDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -782,11 +1093,11 @@ TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("InputDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("InputDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -794,12 +1105,12 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerQuantizedSymmetr FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), - framework::dataset::make("InputDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("InputDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_qasymm8); } @@ -809,25 +1120,25 @@ TEST_SUITE_END() // Generic TEST_SUITE(Optimized) FIXTURE_DATA_TEST_CASE_NEW(RunSmall3x3, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("InputDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("DepthMultiplier", 1)), + make("InputDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE_NEW(RunLarge3x3, NEDepthwiseConvolutionLayerQuantizedSymmetricPerChannelFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("InputDataType", DataType::QASYMM8)), - framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), + make("DepthMultiplier", 1)), + make("InputDataType", DataType::QASYMM8)), + make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)), input_qinfo_dataset), - framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), - ActivationFunctionsDataset)) + make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })), + make("DataLayout", { DataLayout::NHWC })), + make("ActivationInfo", { ActivationLayerInfo() }))) { validate(Accessor(_target), _reference, tolerance_qasymm8); } diff --git a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp index 3314227bec..221fc5d249 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h" +#include "arm_compute/core/utils/StringUtils.h" +#include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/framework/Macros.h" @@ -37,12 +38,12 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -// Create function for NEDepthwiseConvolutionLayerKernel -using NEDepthwiseConvolutionLayerNative = NESynthetizeFunctionWithZeroConstantKernelBorder<NEDepthwiseConvolutionLayerNativeKernel>; +// Create function for CpuDepthwiseConvolutionKernel +using CpuDepthwiseConvolutionNative = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuDepthwiseConv2dNativeKernel>; // Fixture for NEDepthwiseConvolutionLayerKernel template <typename T> -using NEDepthwiseConvolutionLayerNativeFixture = DepthwiseConvolutionLayerNativeValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerNative, T>; +using CpuDepthwiseConvolutionNativeFixture = DepthwiseConvolutionLayerNativeValidationFixture<Tensor, Accessor, CpuDepthwiseConvolutionNative, T>; namespace { @@ -124,8 +125,9 @@ TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL) auto biases = create_tensor<Tensor>(bias_shape, data_type, 1, QuantizationInfo(), data_layout); auto dst = create_tensor<Tensor>(TensorShape(), data_type, 1, QuantizationInfo(), data_layout); - NEDepthwiseConvolutionLayerNativeKernel dwc; - dwc.configure(&src, &weights, &biases, &dst, pad_stride_info); + cpu::kernels::CpuDepthwiseConv2dNativeKernel dwc; + const ConvolutionInfo info{pad_stride_info, 1, ActivationLayerInfo(), Size2D(1, 1)}; + dwc.configure(src.info(), weights.info(), biases.info(), dst.info(), info); ARM_COMPUTE_EXPECT(src.info()->padding().empty(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(weights.info()->padding().empty(), framework::LogLevel::ERRORS); @@ -133,9 +135,47 @@ TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(dst.info()->padding().empty(), framework::LogLevel::ERRORS); } +TEST_SUITE(KERNEL_SELECTION) +DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL, + combine(combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::QASYMM8_SIGNED, + DataType::QASYMM8, + DataType::QSYMM8_PER_CHANNEL + })), + framework::dataset::make("DataType_per_channel", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED + })), + cpu_ext, data_type, data_type_per_channel) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuDepthwiseConv2dNativeKernel::get_implementation( + DepthwiseConv2dNativeDataTypeISASelectorData{ data_type, data_type_per_channel,cpu_isa }, + cpu::KernelSelectionType::Preferred ); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string per_channel_str = "_"; + if (data_type == DataType::QSYMM8_PER_CHANNEL) + { + per_channel_str = "_" + cpu_impl_dt(data_type_per_channel) + "_" ; + } + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + per_channel_str + "deptwiseconv2dnative"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() // KERNEL_SELECTION + TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL, +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(width_values_precommit, height_values_precommit), channel_values_precommit), @@ -152,7 +192,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerNativeFixture<fl validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); } -FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY, +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(width_values_nightly, height_values_nightly), channel_values_nightly), diff --git a/tests/validation/NEON/DetectionPostProcessLayer.cpp b/tests/validation/NEON/DetectionPostProcessLayer.cpp index a166402a79..7d725327b7 100644 --- a/tests/validation/NEON/DetectionPostProcessLayer.cpp +++ b/tests/validation/NEON/DetectionPostProcessLayer.cpp @@ -150,7 +150,7 @@ inline void base_test_case(DetectionPostProcessLayerInfo info, DataType data_typ quantize_and_fill_tensor(Accessor(anchors), anchors_vector); } - // Determine the output through the Neon kernel + // Determine the output through the Compute Library operator Tensor output_boxes; Tensor output_classes; Tensor output_scores; diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp index 2f0fce2ce0..fbfe8b8a7a 100644 --- a/tests/validation/NEON/DilatedConvolutionLayer.cpp +++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/cpu/operators/CpuConv2d.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/DilatedConvolutionLayerDataset.h" @@ -49,7 +50,7 @@ const AbsoluteTolerance<float> abs_tolerance_f16(0.3f); const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num_f16 = 0.07f; /**< Tolerance number for FP16 */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", @@ -96,7 +97,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), input_info, weights_info, output_info, conv_info, dilation, expected) { - ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false), + ConvolutionMethod is_valid = cpu::CpuConv2d::get_convolution_method(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, WeightsInfo(), dilation); @@ -161,13 +162,18 @@ template <typename T> using NEGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConvolutionLayer, T>; TEST_SUITE(Quantized) +/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored +/// This is because instead of using the same quantization information for all the tensors, the fixture generates +/// separate quantization info for each input and the output tensor. +/// When we can also support dynamic quantization with the presence of activation, we can remove the explicit +/// quantization info. TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })), framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo()))) { // Validate output @@ -178,7 +184,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerQuantizedFixture<u framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })), framework::dataset::make("ActivationLayerInfo", ActivationLayerInfo()))) { // Validate output diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp index 6c47fa1cf8..0779c9d388 100644 --- a/tests/validation/NEON/DirectConvolutionLayer.cpp +++ b/tests/validation/NEON/DirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,9 +23,12 @@ */ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuDirectConv2dKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ShapeDatasets.h" @@ -70,8 +73,8 @@ const auto data_pad_f16 = concat(combine(framework::dataset::make("PadX", { 0, 1 framework::dataset::make("KernelSize", 1)))); const auto data_f32 = combine(datasets::SmallDirectConvolutionShapes(), - combine(framework::dataset::make("StrideX", { 1, 2, 3 }), - combine(framework::dataset::make("StrideY", { 1, 2, 3 }), + combine(framework::dataset::make("StrideX", { 1, 2, 3, 4 }), + combine(framework::dataset::make("StrideY", { 1, 2, 3, 4 }), data_pad_f32))); const auto data_f16 = combine(datasets::SmallDirectConvolutionShapes(), @@ -87,17 +90,25 @@ const auto data_prec = combine(datasets::SmallDirectConvolutionShapes(), framework::dataset::make("KernelSize", 3)))))); const auto data9x9 = combine(datasets::SmallDirectConvolutionShapes(), - combine(framework::dataset::make("StrideX", { 1 }), - combine(framework::dataset::make("StrideY", { 1 }), + combine(framework::dataset::make("StrideX", { 1, 2, 3 }), + combine(framework::dataset::make("StrideY", { 1, 2, 3 }), combine(framework::dataset::make("PadX", { 0, 2 }), combine(framework::dataset::make("PadY", { 0, 3 }), framework::dataset::make("KernelSize", 9)))))); -const auto data_f32_nightly = combine(data_f32, framework::dataset::make("NumKernels", { 1, 4 })); -const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKernels", { 1, 4 })); +const auto data8x8 = combine(datasets::SmallDirectConvolutionShapes(), + combine(framework::dataset::make("StrideX", { 1, 2, 3 }), + combine(framework::dataset::make("StrideY", { 1, 2, 3 }), + combine(framework::dataset::make("PadX", { 0 }), + combine(framework::dataset::make("PadY", { 0 }), + framework::dataset::make("KernelSize", 8)))))); + +const auto data_f32_nightly = combine(data_f32, framework::dataset::make("NumKernels", { 1, 4, 5 })); +const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKernels", { 1, 4, 5 })); const auto data_precommit = combine(data_prec, framework::dataset::make("NumKernels", { 1 })); const auto data_precommit9x9 = combine(data9x9, framework::dataset::make("NumKernels", { 4 })); +const auto data_precommit8x8 = combine(data8x8, framework::dataset::make("NumKernels", { 4 })); /* The following tests is from real use-case that made DirectConvolution * overflows in terms of its tensor indexing. This test case is using @@ -129,17 +140,95 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo TEST_SUITE(NEON) TEST_SUITE(DirectConvolutionLayer) +/** Check whether the configuration of a Direct Convolution layer with no + * bias leads to a successful execution. + */ +TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT) +{ + const auto src_shape = TensorShape(27U, 13U, 2U); + const auto weights_shape = TensorShape(3U, 3U, 2U, 4U); + const auto bias_shape = TensorShape(4U); + const auto dst_shape = TensorShape(25U, 11U, 4U); + constexpr auto dt = DataType::F32; + + auto src = create_tensor<Tensor>(src_shape, dt); + auto weights = create_tensor<Tensor>(weights_shape, dt); + auto dst = create_tensor<Tensor>(dst_shape, dt); + + const auto conv_info = PadStrideInfo(1, 1, 0, 0); + + // Create Direct Convolution function + NEDirectConvolutionLayer conv{}; + conv.configure(&src, &weights, nullptr, &dst, conv_info); + + src.allocator()->allocate(); + weights.allocator()->allocate(); + dst.allocator()->allocate(); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weights), 1.f); + + conv.run(); + + // Compute reference to compare + SimpleTensor<float> ref_src{ src_shape, dt }; + SimpleTensor<float> ref_weights{ weights_shape, dt }; + SimpleTensor<float> ref_bias{ bias_shape, dt }; + library->fill_tensor_value(ref_src, 1.f); + library->fill_tensor_value(ref_weights, 1.f); + // No bias + library->fill_tensor_value(ref_bias, 0.f); + auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info); + + validate(Accessor(dst), ref_dst); +} + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, + concat(combine(combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + combine(combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))), + cpu_ext, data_type, data_layout) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuDirectConv2dKernel::get_implementation(DataTypeDataLayoutISASelectorData{ data_type, data_layout, cpu_isa }, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string data_layout_str; + if(data_layout == DataLayout::NCHW) + { + data_layout_str = "nchw"; + } + else + { + data_layout_str = "nhwc"; + } + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_" + data_layout_str + "_directconv2d"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching data type input/weights + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching input feature maps TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported non-rectangular weights dimensions TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported stride + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases size + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases dimensions TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size }), framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16), @@ -185,7 +274,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(), + ActivationLayerInfo(), + ActivationLayerInfo(), + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), })), framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false })), input_info, weights_info, biases_info, output_info, conv_info, act_info, expected) @@ -235,6 +331,8 @@ DATA_TEST_CASE(NoPaddingNHWCKernel, framework::DatasetMode::ALL, combine(combine template <typename T> using NEDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T>; +template <typename T> +using NEDirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T, true>; TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -266,6 +364,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<float>, framewo // Validate output validate(Accessor(_target), _reference, tolerance_fp32); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, + framework::dataset::make("DataType", DataType::F32)), + ActivationFunctionsDataset), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} + +FIXTURE_DATA_TEST_CASE(RunSmall8x8, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit8x8, framework::dataset::make("DataType", + DataType::F32)), + ActivationFunctionsDataset), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} + FIXTURE_DATA_TEST_CASE(RunSmall9x9, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit9x9, framework::dataset::make("DataType", DataType::F32)), ActivationFunctionsDataset), diff --git a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp index 87f4c7f187..0667ac73f9 100644 --- a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp +++ b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -46,6 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance<float> tolerance_fp16(0.01f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); +#else // #if !defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO" +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); +#endif // #if !defined(__aarch64__) } // namespace TEST_SUITE(NEON) @@ -53,6 +60,9 @@ TEST_SUITE(AbsLayer) template <typename T> using NEAbsLayerFixture = AbsValidationFixture<Tensor, Accessor, NEAbsLayer, T>; +template <typename T> +using NEAbsLayerQuantizedFixture = AbsQuantizedValidationFixture<Tensor, Accessor, NEAbsLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -107,6 +117,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsLayerFixture<int32_t>, framework::DatasetM TEST_SUITE_END() // S32 TEST_SUITE_END() // Integer +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.075, 6) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // AbsLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp index 3656560281..95db4ad5fd 100644 --- a/tests/validation/NEON/ElementwiseDivision.cpp +++ b/tests/validation/NEON/ElementwiseDivision.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ namespace validation namespace { RelativeTolerance<float> tolerance_fp32(0.000001f); -AbsoluteTolerance<int> tolerance_zero_s32(1); // Tolerance for S32 division +AbsoluteTolerance<int> tolerance_zero_s32(0); // Tolerance for S32 division /** Input data sets **/ const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), @@ -56,6 +56,8 @@ const auto ElementwiseDivisionFP16Dataset = combine(combine(framewo #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ElementwiseDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(NEON) @@ -93,10 +95,41 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // clang-format on // *INDENT-ON* +// Test test cases will execute the function with dynamic-stated shapes +// Since other elementwise operations share the same kernel, this tests are added only here. +// Also, only FP32 is tested since data type doesn't/shouldn't matter with dynamic shapes. +TEST_SUITE(DynamicShape) +template <typename T> +using CpuElementwiseDivisionDynamicShapeFixture = ArithmeticDivisionDynamicShapeValidationFixture<Tensor, Accessor, NEElementwiseDivision, T>; + +template <typename T> +using CpuElementwiseDivisionBroadcastDynamicShapeFixture = ArithmeticDivisionBroadcastDynamicShapeValidationFixture<Tensor, Accessor, NEElementwiseDivision, T>; + +TEST_SUITE(F32) + +FIXTURE_DATA_TEST_CASE(RunSmall, CpuElementwiseDivisionDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset), + InPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CpuElementwiseDivisionBroadcastDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseDivisionFP32Dataset), + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); +} + +TEST_SUITE_END() // F32 +TEST_SUITE_END() // DynamicShape + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionFP16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp16, 0.01); @@ -105,7 +138,8 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); @@ -114,8 +148,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<float>, framework: template <typename T> using NEElementwiseDivisionBroadcastFixture = ArithmeticDivisionBroadcastValidationFixture<Tensor, Accessor, NEElementwiseDivision, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseDivisionBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(), - ElementwiseDivisionFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseDivisionBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseDivisionFP32Dataset), + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); +} +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseDivisionBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(), + ElementwiseDivisionFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); @@ -125,7 +167,8 @@ TEST_SUITE_END() // Float TEST_SUITE(Integer) TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<int32_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionS32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseDivisionS32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_zero_s32); @@ -134,7 +177,7 @@ TEST_SUITE_END() // S32 TEST_SUITE_END() // Integer TEST_SUITE_END() // ElementwiseDivision -TEST_SUITE_END() // Neon +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/ElementwiseExpLayer.cpp b/tests/validation/NEON/ElementwiseExpLayer.cpp index 211e10fa45..31cd78626f 100644 --- a/tests/validation/NEON/ElementwiseExpLayer.cpp +++ b/tests/validation/NEON/ElementwiseExpLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -46,6 +46,15 @@ RelativeTolerance<float> tolerance_fp32(0.000001f); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance<float> tolerance_fp16(0.01f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#if defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); +#else // #if !defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO" +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); +#endif // #if !defined(__aarch64__) + } // namespace TEST_SUITE(NEON) TEST_SUITE(ExpLayer) @@ -53,6 +62,9 @@ TEST_SUITE(ExpLayer) template <typename T> using NEExpLayerFixture = ExpValidationFixture<Tensor, Accessor, NEExpLayer, T>; +template <typename T> +using NEExpLayerQuantizedFixture = ExpQuantizedValidationFixture<Tensor, Accessor, NEExpLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -82,6 +94,32 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerFixture<float>, framework::DatasetMod TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.01, 0) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.003, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.02, -1) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.002, -2) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // ExpLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwiseKernelSelection.cpp b/tests/validation/NEON/ElementwiseKernelSelection.cpp new file mode 100644 index 0000000000..7990a51936 --- /dev/null +++ b/tests/validation/NEON/ElementwiseKernelSelection.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuElementwiseKernel.h" +#include "src/cpu/kernels/CpuElementwiseUnaryKernel.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(NEON) +TEST_SUITE(KernelSelection) + +DATA_TEST_CASE(KernelSelection_elementwise_unary, framework::DatasetMode::ALL, concat( + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::S32 + })), + combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::S32 + }))), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuElementwiseUnaryKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_elementwise_unary"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(KernelSelection_elementwise_arithmetic, framework::DatasetMode::ALL, concat(concat( + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::S32, + DataType::S16, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED + })), + combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::S32, + DataType::S16 + }))), + combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED + }))), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuArithmeticKernel::get_implementation( + ElementwiseDataTypeISASelectorData{ data_type, cpu_isa, static_cast<int>(ArithmeticOperation::ADD) }, + cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_arithmetic"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + +DATA_TEST_CASE(KernelSelection_elementwise_comparison, framework::DatasetMode::ALL, concat(concat( + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::S32, + DataType::S16, + DataType::U8, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED + })), + combine(framework::dataset::make("CpuExt", std::string("SVE")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::S32, + DataType::S16, + DataType::U8 + }))), + combine(framework::dataset::make("CpuExt", std::string("SVE2")), + framework::dataset::make("DataType", { DataType::QASYMM8, + DataType::QASYMM8_SIGNED + }))), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.sve2 = (cpu_ext == "SVE2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuComparisonKernel::get_implementation( + ElementwiseDataTypeISASelectorData{ data_type, cpu_isa, static_cast<int>(ComparisonOperation::Equal) }, + cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_comparison"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + +TEST_SUITE_END() +TEST_SUITE_END() // Neon +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/ElementwiseLog.cpp b/tests/validation/NEON/ElementwiseLog.cpp index 3115ed6065..1175903dac 100644 --- a/tests/validation/NEON/ElementwiseLog.cpp +++ b/tests/validation/NEON/ElementwiseLog.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -46,6 +46,15 @@ RelativeTolerance<float> tolerance_fp32(0.000001f); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance<float> tolerance_fp16(0.01f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#if defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); +#else // #if !defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO" +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); +#endif // #if !defined(__aarch64__) + } // namespace TEST_SUITE(NEON) TEST_SUITE(LogLayer) @@ -53,6 +62,9 @@ TEST_SUITE(LogLayer) template <typename T> using NELogLayerFixture = LogValidationFixture<Tensor, Accessor, NELogLayer, T>; +template <typename T> +using NELogLayerQuantizedFixture = LogQuantizedValidationFixture<Tensor, Accessor, NELogLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -88,6 +100,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NELogLayerFixture<float>, framework::DatasetMod } TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(10.5, 0), QuantizationInfo(0.5, -10) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(5, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.75, -128) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(12.5, -2) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // LogLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwiseMax.cpp b/tests/validation/NEON/ElementwiseMax.cpp index 4bc263184e..61421ab3e5 100644 --- a/tests/validation/NEON/ElementwiseMax.cpp +++ b/tests/validation/NEON/ElementwiseMax.cpp @@ -62,6 +62,8 @@ const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make( #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ElementwiseMaxFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(NEON) @@ -111,7 +113,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // *INDENT-ON* TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMaxS32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMaxS32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -119,7 +122,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int32_t>, framework::Da TEST_SUITE_END() // S32 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -131,11 +135,12 @@ using NEElementwiseMaxQuantizedFixture = ElementwiseMaxValidationQuantizedFixtur TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxQASYMM8Dataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); @@ -144,11 +149,13 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<uint8_t>, fram template <typename T> using NEElementwiseMaxQuantizedBroadcastFixture = ElementwiseMaxQuantizedBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMax, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(), - ElementwiseMaxQASYMM8Dataset), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseMaxQASYMM8Dataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -156,16 +163,26 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxQuantizedBroadcastFixt TEST_SUITE_END() TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxQASYMM8SignedDataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f, 0) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) }))) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) })), + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE(RunSmallInPlace, NEElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), + ElementwiseMaxQASYMM8SignedDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, -20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, -20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, -20) })), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } - TEST_SUITE_END() TEST_SUITE_END() @@ -173,7 +190,8 @@ TEST_SUITE_END() TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -182,7 +200,8 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -190,8 +209,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMaxFixture<float>, framework::Data template <typename T> using NEElementwiseMaxBroadcastFixture = ElementwiseMaxBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMax, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(), - ElementwiseMaxFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMaxBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseMaxFP32Dataset), + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseMaxBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(), + ElementwiseMaxFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/ElementwiseMin.cpp b/tests/validation/NEON/ElementwiseMin.cpp index 3836b90308..a134eb354d 100644 --- a/tests/validation/NEON/ElementwiseMin.cpp +++ b/tests/validation/NEON/ElementwiseMin.cpp @@ -62,6 +62,8 @@ const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make( #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ElementwiseMinFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(NEON) @@ -110,7 +112,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // *INDENT-ON* TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMinS32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMinS32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -118,7 +121,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int32_t>, framework::Da TEST_SUITE_END() // S32 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinS16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -133,23 +137,34 @@ TEST_SUITE(QASYMM8) template <typename T> using NEElementwiseMinQuantizedBroadcastFixture = ElementwiseMinQuantizedBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMin, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(), - ElementwiseMinQASYMM8Dataset), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseMinQASYMM8Dataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); } - -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseMinQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(), + ElementwiseMinQASYMM8Dataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 20) })), + InPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMinQASYMM8Dataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })) - - ) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); @@ -157,11 +172,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<uint8_t>, fram TEST_SUITE_END() TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxQASYMM8SignedDataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(10.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f, 0) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) }))) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f, -27) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); @@ -174,7 +190,8 @@ TEST_SUITE_END() TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -183,7 +200,8 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -192,8 +210,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseMinFixture<float>, framework::Data template <typename T> using NEElementwiseMinBroadcastFixture = ElementwiseMinBroadcastValidationFixture<Tensor, Accessor, NEElementwiseMin, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(), - ElementwiseMinFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseMinBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseMinFP32Dataset), + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseMinBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(), + ElementwiseMinFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/ElementwiseNegation.cpp b/tests/validation/NEON/ElementwiseNegation.cpp index 629baa80e6..5b8ae8fc64 100644 --- a/tests/validation/NEON/ElementwiseNegation.cpp +++ b/tests/validation/NEON/ElementwiseNegation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -46,6 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance<float> tolerance_fp16(0.01f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); +#else // #if !defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO" +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); +#endif // #if !defined(__aarch64__) } // namespace TEST_SUITE(NEON) TEST_SUITE(NegLayer) @@ -53,6 +60,9 @@ TEST_SUITE(NegLayer) template <typename T> using NENegLayerFixture = NegValidationInPlaceFixture<Tensor, Accessor, NENegLayer, T>; +template <typename T> +using NENegLayerQuantizedFixture = NegQuantizedValidationFixture<Tensor, Accessor, NENegLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -113,6 +123,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<int32_t>, framework::DatasetM TEST_SUITE_END() // S32 TEST_SUITE_END() // Integer +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.075, 6) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // NegLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwisePower.cpp b/tests/validation/NEON/ElementwisePower.cpp index 4305387c5f..9ac9eec280 100644 --- a/tests/validation/NEON/ElementwisePower.cpp +++ b/tests/validation/NEON/ElementwisePower.cpp @@ -51,6 +51,8 @@ const auto ElementwisePowerFP16Dataset = combine(combine(framework: #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ElementwisePowerFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(NEON) @@ -91,7 +93,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp16, 0.01); @@ -101,13 +104,15 @@ TEST_SUITE_END() // F16 TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwisePowerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEElementwisePowerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), ElementwisePowerFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEElementwisePowerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ElementwisePowerFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); @@ -116,15 +121,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEElementwisePowerFixture<float>, framework::Da template <typename T> using NEElementwisePowerBroadcastFixture = ElementwisePowerBroadcastValidationFixture<Tensor, Accessor, NEElementwisePower, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapesBroadcast(), - ElementwisePowerFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(), + ElementwisePowerFP32Dataset), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); } - -FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapesBroadcast(), - ElementwisePowerFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcastInplace(), + ElementwisePowerFP32Dataset), + InPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32, 0.01); +} +FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwisePowerBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(), + ElementwisePowerFP32Dataset), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); diff --git a/tests/validation/NEON/ElementwiseRound.cpp b/tests/validation/NEON/ElementwiseRound.cpp index 5ff81a5d8a..620618cb0b 100644 --- a/tests/validation/NEON/ElementwiseRound.cpp +++ b/tests/validation/NEON/ElementwiseRound.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -40,12 +40,20 @@ namespace test { namespace validation { +namespace +{ +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); +} // namespace TEST_SUITE(NEON) TEST_SUITE(RoundLayer) template <typename T> using NERoundLayerFixture = RoundValidationFixture<Tensor, Accessor, NERoundLayer, T>; +template <typename T> +using NERoundLayerQuantizedFixture = RoundQuantizedValidationFixture<Tensor, Accessor, NERoundLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -81,6 +89,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NERoundLayerFixture<float>, framework::DatasetM } TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.075, 6) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // RoundLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp index f41500cc0b..80788c893f 100644 --- a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp +++ b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -46,13 +46,42 @@ RelativeTolerance<float> tolerance_fp32(0.000001f); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance<float> tolerance_fp16(0.01f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); +#else // #if !defined(__aarch64__) +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO" +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); +#endif // #if !defined(__aarch64__) } // namespace TEST_SUITE(NEON) TEST_SUITE(RsqrtLayer) +// Test test cases will execute the function with dynamic-stated shapes +// Since other elementwise unary operations share the same kernel, this tests are added only here. +// Also, only FP32 is tested since data type doesn't/shouldn't matter with dynamic shapes. +TEST_SUITE(DynamicShape) +TEST_SUITE(FP32) + +template <typename T> +using CpuRsqrtDynamicShapeFixture = RsqrtDynamicShapeValidationFixture<Tensor, Accessor, NERsqrtLayer, T>; + +FIXTURE_DATA_TEST_CASE(RunSmall, CpuRsqrtDynamicShapeFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // DynamicShape + template <typename T> using NERsqrtLayerFixture = RsqrtValidationFixture<Tensor, Accessor, NERsqrtLayer, T>; +template <typename T> +using NERsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture<Tensor, Accessor, NERsqrtLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -83,6 +112,32 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerFixture<float>, framework::DatasetM TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(20, 0) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.5, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(25, -128) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(0.1, -7) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // RsqrtLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwiseSin.cpp b/tests/validation/NEON/ElementwiseSin.cpp index 9b212e264f..9c2d7ae268 100644 --- a/tests/validation/NEON/ElementwiseSin.cpp +++ b/tests/validation/NEON/ElementwiseSin.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,7 +32,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/ElementWiseUnaryFixture.h" +#include "tests/validation/fixtures/ElementwiseUnaryFixture.h" namespace arm_compute { @@ -46,6 +46,8 @@ AbsoluteTolerance<float> tolerance_fp32(0.00001f); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC AbsoluteTolerance<float> tolerance_fp16(0.0005f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0); } // namespace TEST_SUITE(NEON) TEST_SUITE(SinLayer) @@ -53,6 +55,9 @@ TEST_SUITE(SinLayer) template <typename T> using NESinLayerFixture = SinValidationFixture<Tensor, Accessor, NESinLayer, T>; +template <typename T> +using NESinLayerQuantizedFixture = SinQuantizedValidationFixture<Tensor, Accessor, NESinLayer, T>; + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) @@ -89,6 +94,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESinLayerFixture<float>, framework::DatasetMod TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.2, -3) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(200, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine( + datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("InputQInfo", { QuantizationInfo(0.07, 6) })), + framework::dataset::make("OutputQInfo", { QuantizationInfo(123, -7) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // Quantized TEST_SUITE_END() // SinLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/ElementwiseSquareDiff.cpp b/tests/validation/NEON/ElementwiseSquareDiff.cpp index 069cbbd7fa..9a86b541de 100644 --- a/tests/validation/NEON/ElementwiseSquareDiff.cpp +++ b/tests/validation/NEON/ElementwiseSquareDiff.cpp @@ -68,6 +68,8 @@ const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::datase #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ const auto ElementwiseSquaredDiffFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataType", DataType::F32)); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); } // namespace TEST_SUITE(NEON) @@ -109,7 +111,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // *INDENT-ON* TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -117,7 +120,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int32_t>, frame TEST_SUITE_END() // S32 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -129,13 +133,12 @@ using NEElementwiseSquaredDiffQuantizedFixture = ElementwiseSquaredDiffValidatio TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffQASYMM8Dataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })) - - ) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32, 0.01); @@ -144,11 +147,23 @@ template <typename T> using NEElementwiseSquaredDiffQuantizedBroadcastFixture = ElementwiseSquaredDiffQuantizedBroadcastValidationFixture<Tensor, Accessor, NEElementwiseSquaredDiff, T>; FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallShapesBroadcast(), - ElementwiseSquaredDiffQASYMM8Dataset), + combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseSquaredDiffQASYMM8Dataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })), + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, NEElementwiseSquaredDiffQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(), + ElementwiseSquaredDiffQASYMM8Dataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -156,11 +171,12 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffQuantizedBroad TEST_SUITE_END() TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffQASYMM8SignedDataset), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f, 5) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(.5f, 5) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(.2f, 5) }))) + framework::dataset::make("QuantizationInfo", { QuantizationInfo(.2f, 5) })), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -171,7 +187,8 @@ TEST_SUITE_END() TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp16, 0.01); @@ -180,7 +197,8 @@ TEST_SUITE_END() // F16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(F32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -188,15 +206,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseSquaredDiffFixture<float>, framewo template <typename T> using NEElementwiseSquaredDiffBroadcastFixture = ElementwiseSquaredDiffBroadcastValidationFixture<Tensor, Accessor, NEElementwiseSquaredDiff, T>; -FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapesBroadcast(), - ElementwiseSquaredDiffFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(), + ElementwiseSquaredDiffFP32Dataset), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapesBroadcast(), - ElementwiseSquaredDiffFP32Dataset)) +FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, NEElementwiseSquaredDiffBroadcastFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(), + ElementwiseSquaredDiffFP32Dataset), + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/FFT.cpp b/tests/validation/NEON/FFT.cpp index 7125158a21..f7ef0a314e 100644 --- a/tests/validation/NEON/FFT.cpp +++ b/tests/validation/NEON/FFT.cpp @@ -158,6 +158,8 @@ TEST_SUITE(FFTConvolutionLayer) template <typename T> using NEFFTConvolutionLayerFixture = FFTConvolutionValidationFixture<Tensor, Accessor, NEFFTConvolutionLayer, T>; +template <typename T> +using NEFFTConvolutionLayerMixedDataLayoutFixture = FFTConvolutionValidationFixture<Tensor, Accessor, NEFFTConvolutionLayer, T, true>; TEST_SUITE(Float) TEST_SUITE(FP32) @@ -169,10 +171,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEFFTConvolutionLayerFixture<float>, framework: // Validate output validate(Accessor(_target), _reference, tolerance_f32, tolerance_num); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFFTConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32, tolerance_num); +} TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float TEST_SUITE_END() // FFTConvolutionLayer - TEST_SUITE_END() // Neon } // namespace validation } // namespace test diff --git a/tests/validation/NEON/FillBorder.cpp b/tests/validation/NEON/FillBorder.cpp index 343ad831e4..928990b2b4 100644 --- a/tests/validation/NEON/FillBorder.cpp +++ b/tests/validation/NEON/FillBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,10 +60,10 @@ DATA_TEST_CASE(FillBorder, framework::DatasetMode::ALL, combine(combine(combine( { BorderSize border_size{ static_cast<unsigned int>(size) }; - std::mt19937 generator(library->seed()); - std::uniform_int_distribution<uint8_t> distribution_u8(0, 255); - const uint8_t border_value = distribution_u8(generator); - const uint8_t tensor_value = distribution_u8(generator); + std::mt19937 generator(library->seed()); + std::uniform_int_distribution<uint32_t> distribution_u8(0, 255); + const uint8_t border_value = distribution_u8(generator); + const uint8_t tensor_value = distribution_u8(generator); // Create tensors Tensor src = create_tensor<Tensor>(shape, data_type); @@ -77,7 +77,7 @@ DATA_TEST_CASE(FillBorder, framework::DatasetMode::ALL, combine(combine(combine( validate(src.info()->padding(), padding); // Fill tensor with constant value - std::uniform_int_distribution<uint8_t> distribution{ tensor_value, tensor_value }; + std::uniform_int_distribution<uint32_t> distribution{ tensor_value, tensor_value }; library->fill(Accessor(src), distribution, 0); // Create and configure kernel diff --git a/tests/validation/NEON/Floor.cpp b/tests/validation/NEON/Floor.cpp index 419ce56e44..3cd1033ef9 100644 --- a/tests/validation/NEON/Floor.cpp +++ b/tests/validation/NEON/Floor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,12 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NEFloor.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuFloorKernel.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/ShapeDatasets.h" @@ -62,6 +65,30 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( const Status status = NEFloor::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)); ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); } + + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + })), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuFloorKernel::get_implementation(DataTypeISASelectorData{data_type, cpu_isa}, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_floor"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} // clang-format on // *INDENT-ON* diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp index d8c2203802..ee7e56227d 100644 --- a/tests/validation/NEON/FullyConnectedLayer.cpp +++ b/tests/validation/NEON/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,8 @@ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "src/cpu/operators/CpuFullyConnected.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/FullyConnectedLayerDataset.h" @@ -40,6 +42,7 @@ namespace test { namespace validation { +using framework::dataset::make; namespace { /** Tolerance for float operations */ @@ -56,7 +59,7 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -64,18 +67,25 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true })); +const auto FullyConnectedParameters = combine(make("TransposeWeights", { false, true }), make("ReshapeWeights", { false, true })); -const auto QuantizationData = framework::dataset::make("QuantizationInfo", +const auto QuantizationData = make("QuantizationInfo", { QuantizationInfo(1.f / 256.f, 10), QuantizationInfo(1.1f, 10), }); -const auto EmptyActivationFunctionDataset = framework::dataset::make("ActivationInfo", + +const auto IgnoredQuantizationData = make("IgnoredQuantizationInfo", +{ + QuantizationInfo(), +}); + +const auto NoActivationFunctionDataset = make("ActivationInfo", { ActivationLayerInfo(), }); -const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", + +const auto ActivationFunctionsDataset = make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), @@ -83,7 +93,7 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH), }); -const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo", +const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f), @@ -94,40 +104,183 @@ const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("Activ TEST_SUITE(NEON) TEST_SUITE(FullyConnectedLayer) +/** Test case for memory injection in @ref cpu::CpuFullyConnected. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) +{ + auto fc = std::make_unique<cpu::CpuFullyConnected>(); + const auto src_info = TensorInfo(TensorShape(8U), 1, DataType::F32, DataLayout::NHWC); + const auto weight_info = TensorInfo(TensorShape(8U, 4U), 1, DataType::F32, DataLayout::NHWC); + const auto bias_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC); + auto dst_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC); + const auto fc_info = FullyConnectedLayerInfo{}; + fc->configure(&src_info, &weight_info, &bias_info, &dst_info, fc_info); + + // telhs are newly created every call of this lambda function + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(fc->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + // This operator is configured once and captured by this lambda. + fc->prepare(prep_pack); + fc->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +/** Test case for memory injection in @ref NEFullyConnectedLayer. + * + * Make sure @ref NEFullyConnectedLayer still works through injecting the memory at configure time using the old API. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) +{ + auto fc = std::make_unique<NEFullyConnectedLayer>(); + const auto src_info = TensorInfo(TensorShape(8U), 1, DataType::F32, DataLayout::NHWC); + const auto weight_info = TensorInfo(TensorShape(8U, 4U), 1, DataType::F32, DataLayout::NHWC); + const auto bias_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC); + auto dst_info = TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC); + const auto fc_info = FullyConnectedLayerInfo{}; + auto run_conv = [&]() + { + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + auto dst = create_tensor<Tensor>(dst_info); + fc->configure(&src, &weight, &bias, &dst, fc_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + library->fill_tensor_value(Accessor(src), 1.f); + library->fill_tensor_value(Accessor(weight), 2.f); + library->fill_tensor_value(Accessor(bias), 3.f); + fc->run(); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +/** Unit test for @ref cpu::CpuFullyConnected with quantized multipler > 1 + * + * Tests output correctness. + */ +TEST_CASE(Quant8_Signed_Mult_gt_1, framework::DatasetMode::ALL) +{ + auto fc = std::make_unique<cpu::CpuFullyConnected>(); + const auto src_info = TensorInfo(TensorShape(1U, 3U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(0.5f, -1)); + const auto weight_info = TensorInfo(TensorShape(1U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(0.5, -8)); + const auto bias_info = TensorInfo(TensorShape(1U), 1, DataType::S32); + auto dst_info = TensorInfo(TensorShape(1U, 3U), 1, DataType::QASYMM8_SIGNED, QuantizationInfo(0.1f, 0)); + const auto fc_info = FullyConnectedLayerInfo{}; + fc->configure(&src_info, &weight_info, &bias_info, &dst_info, fc_info); + + // telhs are newly created every call of this lambda function + auto src = create_tensor<Tensor>(src_info); + auto weight = create_tensor<Tensor>(weight_info); + auto bias = create_tensor<Tensor>(bias_info); + auto dst = create_tensor<Tensor>(dst_info); + src.allocator()->allocate(); + weight.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias }, { TensorType::ACL_DST, &dst } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(fc->workspace(), mg, run_pack, prep_pack); + + // Initialize input values + const std::vector<int8_t> src_values = { 3, 63, 31 }; + const std::vector<int8_t> weight_values = { -4 }; + const std::vector<int32_t> bias_values = { 16 }; + const std::vector<int32_t> expected = { 80, 127, 127 }; + library->fill_static_values(Accessor(src), src_values); + library->fill_static_values(Accessor(weight), weight_values); + library->fill_static_values(Accessor(bias), bias_values); + + // Run FC layer + fc->prepare(prep_pack); + fc->run(run_pack); + + auto dst_ptr = reinterpret_cast<int8_t *>(dst.buffer()); + for(size_t i = 0; i < dst.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(dst_ptr[i] == expected[i], framework::LogLevel::ERRORS); + } +} + // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types + make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Invalid weights dimensions TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Wrongly reshaped weights TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), }), - framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16), + make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16), TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), TensorInfo(TensorShape(217U, 315U), 1, DataType::F32), TensorInfo(TensorShape(217U, 315U), 1, DataType::F32), TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), })), - framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32), + make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32), TensorInfo(TensorShape(192U), 1, DataType::F32), TensorInfo(TensorShape(192U), 1, DataType::F32), TensorInfo(TensorShape(271U), 1, DataType::F32), TensorInfo(TensorShape(271U), 1, DataType::F32), TensorInfo(TensorShape(192U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), })), - framework::dataset::make("TransposeWeights",{ true, true, false, true, true, true })), - framework::dataset::make("ReshapedWeights",{ false, false, false, false, false , false})), - framework::dataset::make("Expected", { false, true, true, false, false, true })), + make("TransposeWeights",{ true, true, false, true, true, true })), + make("ReshapedWeights",{ false, false, false, false, false , false})), + make("Expected", { false, true, true, false, false, true })), input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected) { // Create Fully Connected layer info @@ -143,130 +296,251 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( template <typename T> using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T>; +template <typename T> +using NEFullyConnectedLayerMixedDataLayoutFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>; +template <typename T> +using NEFullyConnectedLayerDynamicWeightsFixture = FullyConnectedWithDynamicWeightsFixture<Tensor, Accessor, NEFullyConnectedLayer, T>; +template <typename T> +using NEFullyConnectedLayerDynamicBiasFixture = FullyConnectedWithDynamicBiasFixture<Tensor, Accessor, NEFullyConnectedLayer, T>; TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16)), - EmptyActivationFunctionDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + FullyConnectedParameters, + make("DataType", DataType::F16), + NoActivationFunctionDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine( +FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::FullyConnectedLayerWithActivationDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16)), + FullyConnectedParameters, + make("DataType", DataType::F16), ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F16)), - EmptyActivationFunctionDataset)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), + FullyConnectedParameters, + make("DataType", DataType::F16), + NoActivationFunctionDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::F16), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)), + make("WeightsReshaped", { false, true }))) +{ +} TEST_SUITE_END() #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32)), - EmptyActivationFunctionDataset)) +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, + make("DataType", DataType::F32), + NoActivationFunctionDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine( - combine(datasets::FullyConnectedLayerWithActivationDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32)), +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::F32), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::FullyConnectedLayerWithActivationDataset(), + FullyConnectedParameters, + make("DataType", DataType::F32), ActivationFunctionsDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), - framework::dataset::make("DataType", DataType::F32)), - EmptyActivationFunctionDataset)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, + make("DataType", DataType::F32), + NoActivationFunctionDataset)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::F32), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)), + make("WeightsReshaped", { false, true }))) +{ +} TEST_SUITE_END() TEST_SUITE_END() template <typename T> using NEFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuantizedFixture<Tensor, Accessor, NEFullyConnectedLayer, T>; +template <typename T> +using NEFullyConnectedLayerQuantizedMixedDataLayoutFixture = FullyConnectedLayerValidationQuantizedFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>; TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine( - combine(datasets::SmallFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::QASYMM8)), - QuantizationData), - EmptyActivationFunctionDataset)) +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8), + QuantizationData, + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } - -FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine( +FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::FullyConnectedLayerWithActivationDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::QASYMM8)), - QuantizationData), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8), + QuantizationData, ActivationFunctionsQuantizedDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } +FIXTURE_DATA_TEST_CASE(RunDynamicWeightsWithActivation, NEFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)), + make("WeightsReshaped", { false }))) +{ +} +FIXTURE_DATA_TEST_CASE(RunDynamicBiasWithActivation, NEFullyConnectedLayerDynamicBiasFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) +{ +} -FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine( - combine(datasets::LargeFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::QASYMM8)), - QuantizationData), - EmptyActivationFunctionDataset)) +// Dynamic Quantization Tests here +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallFullyConnectedLayerDataset(), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationData, + NoActivationFunctionDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() +FIXTURE_DATA_TEST_CASE(RunLarge, NEFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine( + datasets::LargeFullyConnectedLayerDataset(), + FullyConnectedParameters, + framework::dataset::make("DataType", DataType::QASYMM8), + QuantizationData, + NoActivationFunctionDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunDynamicBias, NEFullyConnectedLayerDynamicBiasFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8), + NoActivationFunctionDataset)) +{ +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8), + IgnoredQuantizationData, + NoActivationFunctionDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8), + NoActivationFunctionDataset, + make("WeightsReshaped", { false }))) +{ +} +TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine( - combine(datasets::SmallFullyConnectedLayerDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - QuantizationData), - EmptyActivationFunctionDataset)) +FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8_SIGNED), + QuantizationData, + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } - -FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine( +FIXTURE_DATA_TEST_CASE(RunWithActivation, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::FullyConnectedLayerWithActivationDataset(), - FullyConnectedParameters), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - QuantizationData), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8_SIGNED), + QuantizationData, ActivationFunctionsQuantizedDataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } -TEST_SUITE_END() -TEST_SUITE_END() +FIXTURE_DATA_TEST_CASE(RunDynamicWeightsWithActivation, NEFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8_SIGNED), + make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)), + make("WeightsReshaped", { false }))) +{ +} -TEST_SUITE_END() -TEST_SUITE_END() +// Dynamic Quantization tests +FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine( + datasets::SmallFullyConnectedLayerDataset(), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8_SIGNED), + IgnoredQuantizationData, + NoActivationFunctionDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, + combine( + make("Input", TensorShape(9U, 5U, 7U)), + make("Weights", TensorShape(315U, 271U)), + make("Biases", TensorShape(271U)), + make("Output", TensorShape(271U)), + FullyConnectedParameters, + make("DataType", DataType::QASYMM8_SIGNED), + QuantizationData, + NoActivationFunctionDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} +FIXTURE_DATA_TEST_CASE(RunDynamicWeights, NEFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), + make("DataType", DataType::QASYMM8_SIGNED), + NoActivationFunctionDataset, + make("WeightsReshaped", { false }))) +{ +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // FullyConnectedLayer +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp index 2d8c61164b..5f6a402204 100644 --- a/tests/validation/NEON/GEMM.cpp +++ b/tests/validation/NEON/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,12 +22,15 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" -#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" -#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "src/cpu/kernels/CpuGemmInterleave4x4Kernel.h" +#include "src/cpu/kernels/CpuGemmMatrixMultiplyKernel.h" +#include "src/cpu/kernels/CpuGemmTranspose1xWKernel.h" +#include "src/cpu/operators/CpuGemm.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/PaddingCalculator.h" @@ -48,6 +51,8 @@ namespace test { namespace validation { +using framework::dataset::make; + namespace { constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ @@ -57,7 +62,7 @@ const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute constexpr float tolerance_num = 0.07f; /**< Tolerance number for FP16 data types */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -65,62 +70,210 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, }); -const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12); -const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14); +const auto data_interleave = make("M", 8, 12) * make("N", 8, 12); +const auto data_transpose = make("M", 8, 14) * make("N", 7, 14); /** Zero padding test */ template <typename FunctionType> bool validate_zero_padding(unsigned int dim0_value, unsigned int dim1_value) { const TensorShape in_shape(dim0_value, dim1_value); + TensorInfo in(in_shape, 1, DataType::U32); + TensorInfo dst; - // Create tensors - Tensor in = create_tensor<Tensor>(in_shape, DataType::U32); - Tensor dst; - - ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(in.is_resizable(), framework::LogLevel::ERRORS); // Validate zero-padding FunctionType func; func.configure(&in, &dst); - return in.info()->padding().empty(); + return in.padding().empty(); } /* Zero padding test for GEMM kernels */ bool validate_gemm_zero_padding(const TensorShape shape0, const TensorShape shape1) { // Create tensors - Tensor in0 = create_tensor<Tensor>(shape0, DataType::F32); - Tensor in1 = create_tensor<Tensor>(shape1, DataType::F32); - Tensor dst; + TensorInfo in0(shape0, 1, DataType::F32); + TensorInfo in1(shape1, 1, DataType::F32); + TensorInfo dst; // Validate zero-padding - NEGEMMMatrixMultiplyKernel gemm; + cpu::kernels::CpuGemmMatrixMultiplyKernel gemm; gemm.configure(&in0, &in1, &dst, 1.0, false); - return in0.info()->padding().empty() && in1.info()->padding().empty() && dst.info()->padding().empty(); + return in0.padding().empty() && in1.padding().empty() && dst.padding().empty(); } } // namespace TEST_SUITE(NEON) TEST_SUITE(GEMM) +/** Test case for memory injection in @ref cpu::CpuGemm. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) +{ + auto gemm = std::make_unique<cpu::CpuGemm>(); + const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32); + const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto gemm_info = GEMMInfo{}; + gemm->configure(&lhs_info, &rhs_info, &c_info, &dst_info, 1.f, 1.f, gemm_info); + + // telhs are newly created every call of this lambda function + auto lhs = create_tensor<Tensor>(lhs_info); + auto rhs = create_tensor<Tensor>(rhs_info); + auto c = create_tensor<Tensor>(c_info); + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + c.allocator()->allocate(); + + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &lhs }, { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &rhs }, { TensorType::ACL_SRC_2, &c } }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(lhs), 1.f); + library->fill_tensor_value(Accessor(rhs), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + // This operator is configured once and captured by this lambda. + gemm->prepare(prep_pack); + gemm->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +/** Test case for memory injection in @ref NEGEMM. + * + * Make sure @ref NEGEMM still works through injecting the memory at configure time using the old API. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) +{ + auto gemm = std::make_unique<NEGEMM>(); + const auto lhs_info = TensorInfo(TensorShape(3U, 3U), 1, DataType::F32); + const auto rhs_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto c_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + auto dst_info = TensorInfo(TensorShape(4U, 3U), 1, DataType::F32); + const auto gemm_info = GEMMInfo{}; + auto run_conv = [&]() + { + auto lhs = create_tensor<Tensor>(lhs_info); + auto rhs = create_tensor<Tensor>(rhs_info); + auto c = create_tensor<Tensor>(c_info); + auto dst = create_tensor<Tensor>(dst_info); + gemm->configure(&lhs, &rhs, &c, &dst, 1.f, 1.f, gemm_info); + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + c.allocator()->allocate(); + dst.allocator()->allocate(); + library->fill_tensor_value(Accessor(lhs), 1.f); + library->fill_tensor_value(Accessor(rhs), 2.f); + library->fill_tensor_value(Accessor(c), 3.f); + gemm->run(); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } +} + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), + }), + make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32), + TensorInfo(TensorShape(8U, 27U), 1, DataType::F32), + })), + make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32), + TensorInfo(TensorShape(8U, 13U), 1, DataType::F32), + })), + make("Expected", { false, true })), + lhs_info, rhs_info, output_info, expected) +{ + constexpr float alpha = 1.0; + constexpr float beta = 0.0; + const auto gemm_info = GEMMInfo(); + bool is_valid = bool(NEGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE(KERNEL_SELECTION) +DATA_TEST_CASE(KernelSelection_mul_and_add, framework::DatasetMode::ALL, + combine(make("CpuExt", std::string("NEON")), + make("DataType", { DataType::F32, + DataType::F16 + })), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl_mul = CpuGemmMatrixMultiplyKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_mul); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_mul"; + std::string actual = selected_impl_mul->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); + + const auto *selected_impl_add = CpuGemmMatrixAdditionKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl_add); + + expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_gemm_matrix_add"; + actual = selected_impl_add->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() // KERNEL_SELECTION + TEST_SUITE(TRANSPOSE_1XW) -using NEGEMMTranspose1xW = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMTranspose1xWKernel, 4>; +using CpuGemmTranspose1xW = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmTranspose1xWKernel>; DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( - framework::dataset::make("N", { 1, 23, 63, 101 }), - framework::dataset::make("K", { 1, 47, 29, 27 })), + make("N", { 1, 23, 63, 101 }), + make("K", { 1, 47, 29, 27 })), n_value, k_value) { - bool status = validate_zero_padding<NEGEMMTranspose1xWKernel>(n_value, k_value); + bool status = validate_zero_padding<CpuGemmTranspose1xW>(n_value, k_value); ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS); } TEST_SUITE(U32) -using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, uint32_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U32)) +using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint32_t>; +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U32)) { // Validate output validate(Accessor(_target), _reference); @@ -128,8 +281,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMo TEST_SUITE_END() // U32 TEST_SUITE(U16) -using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, uint16_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U16)) +using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint16_t>; +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U16)) { // Validate output validate(Accessor(_target), _reference); @@ -137,8 +290,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMo TEST_SUITE_END() // U16 TEST_SUITE(U8) -using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, uint8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::U8)) +using CpuGemmTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, CpuGemmTranspose1xW, uint8_t>; +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * make("DataType", DataType::U8)) { // Validate output validate(Accessor(_target), _reference); @@ -148,20 +301,20 @@ TEST_SUITE_END() // U8 TEST_SUITE_END() // TRANSPOSE_1XW TEST_SUITE(INTERLEAVE_4X4) -using NEGEMMInterleave4x4 = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMInterleave4x4Kernel, 4>; +using CpuGemmInterleave4x4 = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuGemmInterleave4x4Kernel>; DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip( - framework::dataset::make("M", { 1, 23, 63, 101 }), - framework::dataset::make("K", { 1, 47, 29, 27 })), + make("M", { 1, 23, 63, 101 }), + make("K", { 1, 47, 29, 27 })), m_value, k_value) { - bool status = validate_zero_padding<NEGEMMInterleave4x4Kernel>(m_value, k_value); + bool status = validate_zero_padding<cpu::kernels::CpuGemmInterleave4x4Kernel>(m_value, k_value); ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS); } TEST_SUITE(U32) -using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, uint32_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U32)) +using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint32_t>; +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U32)) { // Validate output validate(Accessor(_target), _reference); @@ -169,8 +322,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetM TEST_SUITE_END() // U32 TEST_SUITE(U16) -using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, uint16_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::U16)) +using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint16_t>; +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::U16)) { // Validate output validate(Accessor(_target), _reference); @@ -178,8 +331,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetM TEST_SUITE_END() // U16 TEST_SUITE(U8) -using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, uint8_t>; -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::QASYMM8)) +using CpuGemmInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, CpuGemmInterleave4x4, uint8_t>; +FIXTURE_DATA_TEST_CASE(RunSmall, CpuGemmInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * make("DataType", DataType::QASYMM8)) { // Validate output validate(Accessor(_target), _reference); @@ -188,23 +341,24 @@ TEST_SUITE_END() // U8 TEST_SUITE_END() // INTERLEAVE_4X4 -//TODO(COMPMID-415): Validate valid region - template <typename T> using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>; template <typename T> -using NEGEMMFixtureDisabledC = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true>; +using NEBatchedMatMulFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T, true, false, false, false, false, true>; + +template <typename T> +using NEGEMMAccumulateFixture = GEMMAccumulateValidationFixture<Tensor, Accessor, NEGEMM, T>; TEST_SUITE(Float) -DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework::dataset::make("In0", { TensorShape(21U, 13U), +DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(make("In0", { TensorShape(21U, 13U), TensorShape(31U, 1U), TensorShape(31U, 1U), TensorShape(8U, 2U), TensorShape(38U, 12U), TensorShape(32U, 1U) }), - framework::dataset::make("In1", { TensorShape(33U, 21U), + make("In1", { TensorShape(33U, 21U), TensorShape(23U, 31U), TensorShape(23U, 31U), TensorShape(16U, 8U), @@ -217,59 +371,111 @@ DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, zip(framework:: ARM_COMPUTE_EXPECT(status, framework::LogLevel::ERRORS); } +DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( + zip(make("In0",{ TensorShape(21U, 13U) }), + make("In1", { TensorShape(33U, 21U) }), + make("Dst", { TensorShape(33U, 13U) })), + zip( + make("alpha", { 1.0, 100.0, 1.0, 1.0 }), + make("beta", { 0.0, 0.0, 1.0, 1.0 }), + make("is_c_null", { false, false, false, true }), + make("Expected", { true, false, false, true }))), + shape_a, shape_b, shape_dst, alpha, beta, is_c_null, expected) +{ + /* Accumulation test for GEMM kernels */ + // Create tensors + TensorInfo in_a(shape_a, 1, DataType::F32); + TensorInfo in_b(shape_b, 1, DataType::F32); + TensorInfo in_c(shape_dst, 1, DataType::F32); + TensorInfo dst(shape_dst, 1, DataType::F32); + + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + // Validate accumulation + cpu::CpuGemm gemm; + Status status = gemm.validate(&in_a, &in_b, (is_c_null ? nullptr : &in_c), &dst, alpha, beta, gemm_info); + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - framework::dataset::make("DataType", DataType::F16))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); +} - framework::dataset::make("DataType", DataType::F16))) +TEST_SUITE(BATCHED_MATMUL) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), + make("ReshapeWeights", { false })), + make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16); } -TEST_SUITE_END() +TEST_SUITE_END() // BATCHED_MATMUL + +TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), - - framework::dataset::make("DataType", DataType::F32))) + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), + make("ReshapeWeights", { true, false })), + make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f); +} - framework::dataset::make("DataType", DataType::F32))) +TEST_SUITE(BATCHED_MATMUL) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(), + make("ReshapeWeights", { false })), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } -TEST_SUITE(DisabledC) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixtureDisabledC<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(), - framework::dataset::make("ReshapeWeights", { true, false })), +TEST_SUITE_END() // BATCHED_MATMUL - framework::dataset::make("DataType", DataType::F32))) +TEST_SUITE(ACCUMULATE) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMAccumulateFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallAccumulateGEMMDataset(), + make("ReshapeWeights", { false }), + make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMAccumulateFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeAccumulateGEMMDataset(), + make("ReshapeWeights", { false }), + make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_f); } -TEST_SUITE_END() +TEST_SUITE_END() // ACCUMULATE -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // Float +TEST_SUITE_END() // GEMM +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 518f4804a0..d25f43a330 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,8 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/PaddingCalculator.h" @@ -37,7 +39,6 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/GEMMLowpAssemblyFixture.h" #include "tests/validation/fixtures/GEMMLowpFixture.h" namespace arm_compute @@ -46,10 +47,26 @@ namespace test { namespace validation { +using framework::dataset::make; + +namespace +{ + constexpr AbsoluteTolerance<float> tolerance_batched(1); + constexpr AbsoluteTolerance<float> tolerance_quant(1); +} // namespace + + TEST_SUITE(NEON) TEST_SUITE(GEMMLowp) TEST_SUITE(MatrixMultiplyCore) + using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpMatrixMultiplyCoreAccumulateFixture = GEMMLowpMatrixMultiplyAccumulateValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>; +using NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture = GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +using NEGEMMLowpDequantizedMatrixMultiplyValidationFixture = GEMMLowpDequantizedMatrixMultiplyValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; + +using framework::dataset::make; DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()), shape_a, shape_b, shape_c, a_offset, b_offset) @@ -75,29 +92,69 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c validate(b.info()->padding(), PaddingSize()); validate(c.info()->padding(), PaddingSize()); } +// accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __aarch64__ +DATA_TEST_CASE(ValidateAccumulate, framework::DatasetMode::ALL, combine( + zip( + make("In0",{ TensorShape(21U, 1U) }), + make("In1", { TensorShape(1U, 21U) }), + make("Dst", { TensorShape(1U, 1U) }), + make("a_offset", { -2 }), + make("a_offset", { 13 }) + ), + zip( + make("OutputDataType", { DataType::S32, DataType::QASYMM8, DataType::QASYMM8_SIGNED}), + make("Expected", { true, false, false }) + )), + shape_a, shape_b, shape_dst, a_offset, b_offset, output_data_type, expected) +{ + DataType input_data_type = (output_data_type == DataType::S32 ? DataType::QASYMM8 : output_data_type); + // Accumulation test for GEMM kernels + TensorInfo a(shape_a, 1, input_data_type, QuantizationInfo(1.0f / 255, a_offset)); + TensorInfo b(shape_b, 1, input_data_type, QuantizationInfo(1.0f / 255, b_offset)); + TensorInfo dst(shape_dst, 1, output_data_type, QuantizationInfo()); + + // Create and configure function + GEMMInfo gemm_info = GEMMInfo(); + gemm_info.set_accumulate(true); + + if (is_data_type_quantized(output_data_type)) + { + GEMMLowpOutputStageInfo gemmLowpOutputStageInfo = GEMMLowpOutputStageInfo(); + gemmLowpOutputStageInfo.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + + gemm_info.set_gemmlowp_output_stage(gemmLowpOutputStageInfo); + } + + cpu::CpuGemmLowpMatrixMultiplyCore gemmlowp_mm; + Status status = gemmlowp_mm.validate(&a, &b, nullptr, &dst, gemm_info); + + ARM_COMPUTE_EXPECT((expected == bool(status)), framework::LogLevel::ERRORS); +} +#endif // __arch64__ // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4 +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4 TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Mismatching data type TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), + make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), + }), + make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(33U, 13U), 1, DataType::S32), TensorInfo(TensorShape(8U, 11U), 1, DataType::S32), TensorInfo(TensorShape(64U, 32U), 1, DataType::S32), - })), - framework::dataset::make("Expected", { true, false, false, false, true })), + }), + make("Expected", { true, false, false, false, true })), a_info, b_info, output_info, expected) { // Lock tensors @@ -110,444 +167,224 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( // clang-format on // *INDENT-ON* -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +/** Test case for memory injection in @ref cpu::CpuGemmLowpMatrixMultiplyCore. + * + * Configure the operator once and inject memory at run-time in multiple executions. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MemoryInjection, framework::DatasetMode::ALL) { - // Validate output - validate(Accessor(_target), _reference); + auto gemm = std::make_unique<cpu::CpuGemmLowpMatrixMultiplyCore>(); + auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8); + auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8); + auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32); + a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9)); + b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1)); + const auto gemm_info = GEMMInfo{}; + gemm->configure(&a_info, &b_info, nullptr, &dst_info, gemm_info); + + // telhs are newly created every call of this lambda function + auto a = create_tensor<Tensor>(a_info); + auto b = create_tensor<Tensor>(b_info); + auto dst = create_tensor<Tensor>(dst_info); + a.allocator()->allocate(); + b.allocator()->allocate(); + dst.allocator()->allocate(); + + ITensorPack run_pack = + { + { TensorType::ACL_SRC_0, &a }, + { TensorType::ACL_SRC_1, &b }, + { TensorType::ACL_DST, &dst } + }; + ITensorPack prep_pack = + { + { TensorType::ACL_SRC_1, &b }, + }; + + auto mg = MemoryGroup{}; + auto ws = manage_workspace<Tensor>(gemm->workspace(), mg, run_pack, prep_pack); + + auto run_conv = [&]() -> Tensor + { + auto dst = create_tensor<Tensor>(dst_info); + dst.allocator()->allocate(); + run_pack.add_tensor(TensorType::ACL_DST, &dst); + + library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1)); + library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2)); + // This operator is configured once and captured by this lambda. + gemm->prepare(prep_pack); + gemm->run(run_pack); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +/** Test case for memory injection in @ref NEGEMMLowpMatrixMultiplyCore. + * + * Make sure @ref NEGEMMLowpMatrixMultiplyCore still works through injecting the memory at configure time using the old API. + * + * Checks performed in order: + * - Both runs compute the same output + */ +TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) { - // Validate output - validate(Accessor(_target), _reference); + auto gemm = std::make_unique<NEGEMMLowpMatrixMultiplyCore>(); + auto a_info = TensorInfo(TensorShape(32U, 72U), 1, DataType::QASYMM8); + auto b_info = TensorInfo(TensorShape(17U, 32U), 1, DataType::QASYMM8); + auto dst_info = TensorInfo(TensorShape(17U, 72U), 1, DataType::S32); + a_info.set_quantization_info(QuantizationInfo(1.0f / 255, -9)); + b_info.set_quantization_info(QuantizationInfo(1.0f / 255, 1)); + const auto gemm_info = GEMMInfo{}; + auto run_conv = [&]() + { + auto a = create_tensor<Tensor>(a_info); + auto b = create_tensor<Tensor>(b_info); + auto dst = create_tensor<Tensor>(dst_info); + gemm->configure(&a, &b, nullptr, &dst, gemm_info); + a.allocator()->allocate(); + b.allocator()->allocate(); + dst.allocator()->allocate(); + library->fill_tensor_value(Accessor(a), static_cast<uint8_t>(1)); + library->fill_tensor_value(Accessor(b), static_cast<uint8_t>(2)); + gemm->run(); + return dst; + }; + auto result_0 = run_conv(); + auto result_1 = run_conv(); + for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i) + { + ARM_COMPUTE_EXPECT(((uint8_t *)result_0.buffer())[i] == ((uint8_t *)result_1.buffer())[i], framework::LogLevel::ERRORS); + } } -using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; -TEST_SUITE(FusedOffsetOutput) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), - framework::dataset::make("DataType", { DataType::QASYMM8 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) { // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() // FusedOffsetOutput -TEST_SUITE_END() // MatrixMultiplyCore - -TEST_SUITE(OutputStage) - -TEST_SUITE(QuantizeDownInt32Scale) +TEST_SUITE(BatchedMatMul) TEST_SUITE(QASYMM8) - -const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, - 3) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, - 2) - * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true }); - -using NEGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpOutputStage>; - -// *INDENT-OFF* -// clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type - }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(20U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), - })), - framework::dataset::make("Min",{ 0, - 13, - })), - framework::dataset::make("Max",{ 205, - 180, - })), - framework::dataset::make("Expected", { true, false })), - a_info, b_info, output_info, min, max, expected) -{ - - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(); - output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN; - output_stage.gemmlowp_min_bound = min; - output_stage.gemmlowp_max_bound = max; - output_stage.output_data_type = DataType::QASYMM8; - - // Lock tensors - Status status = NEGEMMLowpOutputStage::validate(&a_info.clone()->set_is_resizable(false), - &b_info.clone()->set_is_resizable(false), - &output_info.clone()->set_is_resizable(false), - output_stage); - ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); -} -// clang-format on -// *INDENT-ON* - -TEST_CASE(NoPaddingAdded, framework::DatasetMode::PRECOMMIT) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>; +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { - Tensor input1 = create_tensor<Tensor>(TensorShape(21U, 13U), DataType::S32); - Tensor input2 = create_tensor<Tensor>(TensorShape(21U, 1U), DataType::S32); - Tensor output = create_tensor<Tensor>(TensorShape(21U, 13U), DataType::QASYMM8); - - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(); - output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN; - output_stage.gemmlowp_min_bound = 0; - output_stage.gemmlowp_max_bound = 205; - output_stage.output_data_type = DataType::QASYMM8; - - NEGEMMLowpOutputStage f; - f.configure(&input1, &input2, &output, output_stage); - - // Validate padding is zero - validate(input1.info()->padding(), PaddingSize()); - validate(input2.info()->padding(), PaddingSize()); - validate(output.info()->padding(), PaddingSize()); + validate(Accessor(_target), _reference, tolerance_batched); } - -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases)) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases)) -{ - // Validate output - validate(Accessor(_target), _reference); -} - -TEST_SUITE_END() // BoundedReLu - TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) - -const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2, - 3) - * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, - 2) - * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 74) * framework::dataset::make("addBias", { false, true }); - -using NEGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpOutputStage>; - -// *INDENT-OFF* -// clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type - }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(20U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), - })), - framework::dataset::make("Min",{ -10, - -200, - -113, - })), - framework::dataset::make("Max",{ 105, - 300, - -18, - })), - framework::dataset::make("Expected", { true, false, false })), - a_info, b_info, output_info, min, max, expected) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = + GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>; +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(), + make("DataType", { DataType::QASYMM8_SIGNED }), + make("reshape_b_only_on_first_run", { false }))) { - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(); - output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN; - output_stage.gemmlowp_min_bound = min; - output_stage.gemmlowp_max_bound = max; - output_stage.output_data_type = DataType::QASYMM8_SIGNED; - - // Lock tensors - Status status = NEGEMMLowpOutputStage::validate(&a_info.clone()->set_is_resizable(false), - &b_info.clone()->set_is_resizable(false), - &output_info.clone()->set_is_resizable(false), - output_stage); - ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + validate(Accessor(_target), _reference, tolerance_batched); } -// clang-format on -// *INDENT-ON* +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // BatchedMatMul -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases)) +TEST_SUITE(FusedOffsetOutput) +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>; +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, + combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output - validate(Accessor(_target), _reference); + validate(Accessor(_target), _reference, tolerance_quant); } - -TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, + combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), + make("DataType", { DataType::QASYMM8 }), + make("reshape_b_only_on_first_run", { false }))) { // Validate output - validate(Accessor(_target), _reference); + validate(Accessor(_target), _reference, tolerance_quant); } +TEST_SUITE_END() // FusedOffsetOutput -TEST_SUITE_END() // BoundedReLu - -TEST_SUITE_END() // QASYMM8_SIGNED - -TEST_SUITE_END() // QuantizeDownInt32Scale - -TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint) - -const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true }); - -using NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>; - -using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>; - -// *INDENT-OFF* -// clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type - }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(20U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8), - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), - })), - framework::dataset::make("Min",{ 0, - 13, - })), - framework::dataset::make("Max",{ 205, - 180, - })), - framework::dataset::make("Expected", { true, false })), - a_info, b_info, output_info, min, max, expected) -{ - // Lock tensors - Status status = NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false), - &b_info.clone()->set_is_resizable(false), - &output_info.clone()->set_is_resizable(false), - min, - max); - ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); -} -// clang-format on -// *INDENT-ON* - -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_cases)) +// accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __aarch64__ +TEST_SUITE(ACCUMULATION) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) { // Validate output validate(Accessor(_target), _reference); } - -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_cases)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreAccumulateFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) { // Validate output validate(Accessor(_target), _reference); } +TEST_SUITE_END() // S32 +TEST_SUITE_END() // ACCUMULATION +#endif // __arch64__ -TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases)) +TEST_SUITE(DynamicQuantization) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), - quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) { // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() // BoundedReLu - -TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint - -TEST_SUITE(QuantizeDownInt32ToInt8ScaleByFixedPoint) - -const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 128) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); - -using NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint>; - -// *INDENT-OFF* -// clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::F32), // Invalid input data type - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type - TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), - }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(20U), 1, DataType::S32), - TensorInfo(TensorShape(21U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), - TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8_SIGNED), - })), - framework::dataset::make("Min",{ -110, - -113, - -113, - })), - framework::dataset::make("Max",{ 87, - 97, - 97, - })), - framework::dataset::make("Expected", { false, false, true })), - a_info, b_info, output_info, min, max, expected) -{ - // Lock tensors - Status status = NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false), - &b_info.clone()->set_is_resizable(false), - &output_info.clone()->set_is_resizable(false), - min, - max); - ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); -} -// clang-format on -// *INDENT-ON* - -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int8_scale_by_fixedpoint_cases)) +TEST_SUITE_END() // DynamicQuantization + +#ifdef __aarch64__ +// Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64 +TEST_SUITE(Dequant) +constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f); +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallGEMMLowpDataset(), + make("accumulate", {true, false}) + )) { // Validate output - validate(Accessor(_target), _reference); + validate(Accessor(_target), _reference, tolerance_dequantized); } -TEST_SUITE(BoundedReLu) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases)) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeGEMMLowpDataset(), + make("accumulate", {false}) + )) { // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // BoundedReLu -TEST_SUITE_END() // QuantizeDownInt32ToInt8ScaleByFixedPoint - -TEST_SUITE(QuantizeDownInt32ToInt16ScaleByFixedPoint) - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, - 2) - * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823, - 1073741825) - * framework::dataset::make("result_shift", -3, - -2) - * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true }); - -const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, - 254601602) - * framework::dataset::make("result_shift", -3, - -1) - * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true }); - -using NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture = - GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>; - -// *INDENT-OFF* -// clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( - framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16 - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type - }), - framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32), - TensorInfo(TensorShape(20U), 1, DataType::S32), - })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QSYMM16), - TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), - })), - framework::dataset::make("Min",{ -205, - -180, - })), - framework::dataset::make("Max",{ 205, - 180, - })), - framework::dataset::make("Expected", { true, false })), - a_info, b_info, output_info, min, max, expected) -{ - // Lock tensors - Status status = NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false), - &b_info.clone()->set_is_resizable(false), - &output_info.clone()->set_is_resizable(false), - min, - max); - ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + validate(Accessor(_target), _reference, tolerance_dequantized); } -// clang-format on -// *INDENT-ON* +TEST_SUITE_END() // Dequant +#endif // __aarch64__ -TEST_SUITE(NoRelu) -TEST_SUITE(MultSmallerEq1) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_cases)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // MultSmallerEq1 -TEST_SUITE(MultGreater1) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // MultGreater1 -TEST_SUITE_END() // NoRelu -TEST_SUITE(BoundedReLu) -TEST_SUITE(MultSmallerEq1) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // MultSmallerEq1 -TEST_SUITE(MultGreater1) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), - quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases)) -{ - // Validate output - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // MultGreater1 -TEST_SUITE_END() // BoundedReLu -TEST_SUITE_END() // QuantizeDownInt32ToInt16ScaleByFixedPoint -TEST_SUITE_END() // OutputStage +TEST_SUITE_END() // MatrixMultiplyCore TEST_SUITE_END() // GEMMLowp -TEST_SUITE_END() // Neon +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/Gather.cpp b/tests/validation/NEON/Gather.cpp index ca1e166bd1..0aea19939e 100644 --- a/tests/validation/NEON/Gather.cpp +++ b/tests/validation/NEON/Gather.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -100,12 +100,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( template <typename T> using NEGatherFixture = GatherFixture<Tensor, Accessor, NEGather, T>; +const auto gather_small_shapes = arm_compute::test::framework::dataset::concat(datasets::SmallGatherDataset(), datasets::SmallGatherMultiDimIndicesDataset()); + TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGatherFixture<half>, framework::DatasetMode::PRECOMMIT, - combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::F16))) + combine(gather_small_shapes, framework::dataset::make("DataType", DataType::F16))) { // Validate output validate(Accessor(_target), _reference); @@ -125,7 +127,7 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEGatherFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::F32))) + combine(gather_small_shapes, framework::dataset::make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference); @@ -146,7 +148,7 @@ TEST_SUITE(U8) FIXTURE_DATA_TEST_CASE(RunSmall, NEGatherFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::U8))) + combine(gather_small_shapes, framework::dataset::make("DataType", DataType::U8))) { // Validate output validate(Accessor(_target), _reference); @@ -166,7 +168,7 @@ TEST_SUITE(U16) FIXTURE_DATA_TEST_CASE(RunSmall, NEGatherFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, - combine(datasets::SmallGatherDataset(), framework::dataset::make("DataType", DataType::U16))) + combine(gather_small_shapes, framework::dataset::make("DataType", DataType::U16))) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp index 156957a601..ef5e75c5db 100644 --- a/tests/validation/NEON/Im2Col.cpp +++ b/tests/validation/NEON/Im2Col.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "src/core/NEON/kernels/NEIm2ColKernel.h" +#include "src/cpu/kernels/CpuIm2ColKernel.h" #include "tests/NEON/Accessor.h" #include "tests/NEON/Helper.h" #include "tests/datasets/ShapeDatasets.h" @@ -57,7 +57,7 @@ const auto conv_args_small = combine(combine(combine(combine(conv_filter TEST_SUITE(NEON) TEST_SUITE(Im2Col) -using NEIm2Col = NESynthetizeFunction<NEIm2ColKernel>; +using CpuIm2Col = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuIm2ColKernel>; // *INDENT-OFF* // clang-format off @@ -78,26 +78,26 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("Expected", { false, false, false, false, true })), input_info, output_info, has_bias, expected) { - bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias)); + bool status = bool(cpu::kernels::CpuIm2ColKernel::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias)); ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* template <typename T> -using NEIm2ColFixture = Im2ColValidationFixture<Tensor, Accessor, NEIm2Col, T, false>; +using CpuIm2ColFixture = Im2ColOpValidationFixture<Tensor, Accessor, CpuIm2Col, T, false>; TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F32)), - conv_args_small)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F32)), + conv_args_small)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", - DataType::F32)), - conv_args)) +FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", + DataType::F32)), + conv_args)) { // Validate output validate(Accessor(_target), _reference); @@ -107,15 +107,15 @@ TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)), - conv_args_small)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::F16)), + conv_args_small)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", - DataType::F16)), - conv_args)) +FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), framework::dataset::make("DataType", + DataType::F16)), + conv_args)) { // Validate output validate(Accessor(_target), _reference); @@ -127,15 +127,15 @@ TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEIm2ColFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::QASYMM8)), - conv_args_small)) +FIXTURE_DATA_TEST_CASE(RunSmall, CpuIm2ColFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(im2col_shapes, framework::dataset::make("DataType", DataType::QASYMM8)), + conv_args_small)) { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEIm2ColFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), - framework::dataset::make("DataType", DataType::QASYMM8)), - conv_args)) +FIXTURE_DATA_TEST_CASE(RunLarge, CpuIm2ColFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(concat(im2col_shapes, datasets::LargeShapes()), + framework::dataset::make("DataType", DataType::QASYMM8)), + conv_args)) { // Validate output validate(Accessor(_target), _reference); @@ -165,8 +165,8 @@ TEST_CASE(PaddedChannelNHWC, framework::DatasetMode::PRECOMMIT) Tensor dst_target = create_tensor<Tensor>(dst_shape, data_type, 1, qinfo); // Configure target function - NEIm2Col im2col_func; - im2col_func.configure(&src_target, &dst_target, spatial_kernel, conv_info, has_bias); + CpuIm2Col im2col_func; + im2col_func.configure(src_target.info(), dst_target.info(), spatial_kernel, conv_info, has_bias); // Extend padding src_target.info()->extend_padding(PaddingSize(3, 5, 9, 1)); @@ -185,8 +185,13 @@ TEST_CASE(PaddedChannelNHWC, framework::DatasetMode::PRECOMMIT) // Fill target source library->fill_tensor_uniform(Accessor(src_target), 0); + ITensorPack pack = + { + { TensorType::ACL_SRC, &src_target }, + { TensorType::ACL_DST, &dst_target } + }; // Run target function - im2col_func.run(); + im2col_func.run(pack); // Calculate Reference SimpleTensor<float> src_ref{ src_shape, data_type, 1, qinfo, data_layout }; diff --git a/tests/validation/NEON/LSTMLayerQuantized.cpp b/tests/validation/NEON/LSTMLayerQuantized.cpp index d391267e3e..6b98ee2b67 100644 --- a/tests/validation/NEON/LSTMLayerQuantized.cpp +++ b/tests/validation/NEON/LSTMLayerQuantized.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,11 +64,7 @@ inline void fill_tensor(SimpleTensor<T> &tensor, const std::vector<T> &v) } /** Tolerance for quantized asymmetric operations */ -#if defined(__aarch64__) -constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(0); -#else // defined(__aarch64__) constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1); -#endif // defined(__aarch64__) } // namespace diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp new file mode 100644 index 0000000000..f22bd9e86a --- /dev/null +++ b/tests/validation/NEON/MatMul.cpp @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEMatMul.h" + +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/NEON/Accessor.h" +#include "tests/validation/fixtures/MatMulFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +TEST_SUITE(NEON) +TEST_SUITE(MatMul) + +constexpr AbsoluteTolerance<float> tolerance_fp32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ +const AbsoluteTolerance<half> tolerance_fp16(half(0.1f)); +#ifdef __aarch64__ +constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8(1); +constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8_signed(1); +#endif // __aarch64__ + +// clang-format off +// *INDENT-OFF* +// Validation Tests +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, + zip( + make("InputAInfo", { + TensorInfo(TensorShape(9U, 6U), 1, DataType::F32), // Mismatching datatype + TensorInfo(TensorShape(9U, 6U), 1, DataType::S32), // Unsupported datatypes + TensorInfo(TensorShape(9U, 6U, 2U), 1, DataType::F32), // Broadcasting in batch dimension not supported + TensorInfo(TensorShape(9U, 6U), 1, DataType::F32), // Invalid shape for multiplication + TensorInfo(TensorShape(9U, 6U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 6U , 12U) , 1 , DataType::F32), + TensorInfo(TensorShape(9U, 6U , 12U) , 1 , DataType::F32), // Tensors are not dynamic + TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8_SIGNED), // Mismatching data type + }), + make("InputBInfo", { + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(5U, 9U), 1, DataType::S32), + TensorInfo(TensorShape(5U, 9U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 12U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 9U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 9U, 12U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 9U, 12U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8_SIGNED), + }), + make("OutputInfo", { + TensorInfo(TensorShape(5U, 6U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U), 1, DataType::S32), + TensorInfo(TensorShape(5U, 6U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U, 12U) , 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U, 12U) , 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8), + }), + make("TensorIsConst", {false, false, false, false, false , false, true, false, false, false}), + make("Expected", { false, false, false, false, true, true, false, true, true, false })), + a_info, b_info, output_info, are_tensors_const, expected) +{ + TensorInfo a{a_info}; + TensorInfo b{b_info}; + a.set_are_values_constant(are_tensors_const); + b.set_are_values_constant(are_tensors_const); + Status status = NEMatMul::validate(&a, + &b, + &output_info, + MatMulInfo(), + CpuMatMulSettings()); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} +// *INDENT-ON* +// clang-format on + +// Generic Template +template <typename T> +using NEMatMulFixture = MatMulValidationWithActivationFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>; + +// Fast math Template +template <typename T> +using NEMatMulFastMathFixture = MatMulGenericValidationFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>; + +template <typename T> +using NEMatMulFixedFormatFixture = MatMulFixedFormatFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>; + +template <typename T> +using NEMatMulDynamicTensorsFixture = + MatMulValidationWithDynamicTensorsFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>; + +template <typename T> +using NEQuantizedMatMulFixture = QuantizedMatMulValidationFixture<Tensor, Accessor, NEMatMul, CpuMatMulSettings, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEMatMulFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F32), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + NEMatMulFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F32), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +FIXTURE_DATA_TEST_CASE(RunHighDimensions, + NEMatMulFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(datasets::HighDimensionalMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F32), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} + +FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors, + NEMatMulDynamicTensorsFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F32), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}), +make("NumberOfRuns", 5))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // FP32 + +#ifdef ARM_COMPUTE_ENABLE_BF16 +/* Note : MatMul BF16 is enabled by specifying FP32 datatype and enabling the fast math setting */ +constexpr AbsoluteTolerance<float> tolerance_bf16(0.02f); +TEST_SUITE(BF16) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEMatMulFastMathFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F32), + make("ActivationInfo", {ActivationLayerInfo()}), + make("RunTimes", {0}), + make("Settings", {CpuMatMulSettings().fast_math(true)}), + make("LhsQInfo", {QuantizationInfo()}), + make("RhsQInfo", {QuantizationInfo()}), + make("OutQInfo", {QuantizationInfo()}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_bf16); +} + +#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS +FIXTURE_DATA_TEST_CASE(RunTinyFixedFormat, + NEMatMulFixedFormatFixture<bfloat16>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::TinyMatMulDataset(), + make("TransposeA", {false}), + make("TransposeB", {false}), + make("DataType", DataType::BFLOAT16), + make("ActivationInfo", {ActivationLayerInfo()}), + make("RunTimes", {0}), + make("Settings", {CpuMatMulSettings().fast_math(true).fixed_format(true)}), + make("LhsQInfo", {QuantizationInfo()}), + make("RhsQInfo", {QuantizationInfo()}), + make("OutQInfo", {QuantizationInfo()}))) +{ + if (CPUInfo::get().has_bf16()) + { + // Validate output + validate(Accessor(_target), _reference, tolerance_bf16); + } +} +#endif /* ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS */ + +FIXTURE_DATA_TEST_CASE(RunLarge, + NEMatMulFastMathFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F32), + make("ActivationInfo", {ActivationLayerInfo()}), + make("RunTimes", {0}), + make("Settings", {CpuMatMulSettings().fast_math(true)}), + make("LhsQInfo", {QuantizationInfo()}), + make("RhsQInfo", {QuantizationInfo()}), + make("OutQInfo", {QuantizationInfo()}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_bf16, 0.01 /* tolerance_num */); +} +TEST_SUITE_END() // BF16 +#endif /* ARM_COMPUTE_ENABLE_BF16 */ + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEMatMulFixture<half>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F16), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + NEMatMulFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F16), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +FIXTURE_DATA_TEST_CASE(RunStressDynamicTensors, + NEMatMulDynamicTensorsFixture<half>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::F16), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}), +make("NumberOfRuns", 5))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +TEST_SUITE_END() // FP16 +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +TEST_SUITE_END() // Float + +#ifdef __aarch64__ // All the GeMM CPU assembly kernels for integer datatypes require aarch64 +TEST_SUITE(Quantized) + +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(RunSmall, + NEQuantizedMatMulFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::QASYMM8), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}), +make("NumberOfExtraRuns", {0, 1}), +make("LhsQInfo", {QuantizationInfo(1.f / 50, 1)}), +make("RhsQInfo", {QuantizationInfo(1.f / 30, -1)}), +make("OutQInfo", {QuantizationInfo(1.f, 2)}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunSmallExtraActivation, + NEQuantizedMatMulFixture<uint8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::SmallerMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::QASYMM8), + make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) +}), +make("NumberOfExtraRuns", {0, 1}), +make("LhsQInfo", {QuantizationInfo(1.f / 50, 1)}), +make("RhsQInfo", {QuantizationInfo(1.f / 30, -1)}), +make("OutQInfo", {QuantizationInfo(1.f, 2)}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, + NEQuantizedMatMulFixture<uint8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::QASYMM8), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}), +make("NumberOfExtraRuns", {0, 1}), +make("LhsQInfo", {QuantizationInfo(1.f / 100, 1)}), +make("RhsQInfo", {QuantizationInfo(1.f / 200, -1)}), +make("OutQInfo", {QuantizationInfo(1.f, 2)}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) + +FIXTURE_DATA_TEST_CASE(RunSmall, + NEQuantizedMatMulFixture<int8_t>, + framework::DatasetMode::PRECOMMIT, + combine(datasets::SmallMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::QASYMM8_SIGNED), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}), +make("NumberOfExtraRuns", {0, 1}), +make("LhsQInfo", {QuantizationInfo(1.f / 40, -2)}), +make("RhsQInfo", {QuantizationInfo(1.f / 50, 1)}), +make("OutQInfo", {QuantizationInfo(1.f, 1)}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} + +FIXTURE_DATA_TEST_CASE(RunSmallExtraActivation, + NEQuantizedMatMulFixture<int8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::SmallerMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::QASYMM8_SIGNED), + make("ActivationInfo", +{ + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) +}), +make("NumberOfExtraRuns", {0, 1}), +make("LhsQInfo", {QuantizationInfo(1.f / 40, -2)}), +make("RhsQInfo", {QuantizationInfo(1.f / 50, 1)}), +make("OutQInfo", {QuantizationInfo(1.f, 1)}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, + NEQuantizedMatMulFixture<int8_t>, + framework::DatasetMode::NIGHTLY, + combine(datasets::LargeMatMulDataset(), + make("TransposeA", {false, true}), + make("TransposeB", {false, true}), + make("DataType", DataType::QASYMM8_SIGNED), + make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}), +make("NumberOfExtraRuns", {0, 1}), +make("LhsQInfo", {QuantizationInfo(1.f / 150, -2)}), +make("RhsQInfo", {QuantizationInfo(1.f / 250, 1)}), +make("OutQInfo", {QuantizationInfo(1.f, 1)}))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} + +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // Quantized +#endif // __aarch64__ + +TEST_SUITE_END() // MatMul +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/MaxUnpoolingLayer.cpp b/tests/validation/NEON/MaxUnpoolingLayer.cpp index 27f131fa51..0eb021fe71 100644 --- a/tests/validation/NEON/MaxUnpoolingLayer.cpp +++ b/tests/validation/NEON/MaxUnpoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,10 +22,12 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h" #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/cpu/kernels/CpuMaxUnpoolingLayerKernel.h" #include "tests/NEON/Accessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" @@ -33,7 +35,6 @@ #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/MaxUnpoolingLayerFixture.h" - namespace arm_compute { namespace test @@ -51,7 +52,7 @@ const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(framework::datase TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall, framework::dataset::make("DataType", DataType::F32))), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }) @@ -63,7 +64,7 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<float>, framewor TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall, framework::dataset::make("DataType", DataType::F16))), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }) @@ -74,7 +75,37 @@ FIXTURE_DATA_TEST_CASE(MaxUnpooling, NEMaxUnpoolingLayerFixture<half>, framework } TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + TEST_SUITE_END() // Float + +TEST_SUITE(KernelSelection) + +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, + combine(framework::dataset::make("CpuExt", std::string("NEON")), + framework::dataset::make("DataType", { DataType::F32, + DataType::F16, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED + })), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "NEON"); + cpu_isa.sve = (cpu_ext == "SVE"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuMaxUnpoolingLayerKernel::get_implementation(DataTypeISASelectorData{ data_type, cpu_isa }, cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_maxunpooling"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() // KernelSelection TEST_SUITE_END() // PoolingLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp index 20e3bd5325..085f3608a0 100644 --- a/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/NEON/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,8 @@ namespace #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance<half> tolerance_f16(half(0.2f)); #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -RelativeTolerance<float> tolerance_f32(1e-8f); +RelativeTolerance<float> tolerance_f32(1e-4f); +RelativeTolerance<uint8_t> tolerance_qasymm8(1); } // namespace TEST_SUITE(NEON) @@ -81,7 +82,7 @@ TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("InPlace", { false, true })), - framework::dataset::make("Epsilon", { 1e-8 }))) + framework::dataset::make("Epsilon", { 1e-3 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f16); @@ -101,7 +102,7 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("InPlace", { false, true })), - framework::dataset::make("Epsilon", { 1e-8 }))) + framework::dataset::make("Epsilon", { 1e-7 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); @@ -114,9 +115,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEMeanStdDevNormalizationLayerFixture<float>, f // Validate output validate(Accessor(_target), _reference, tolerance_f32); } + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEMeanStdDevNormalizationLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("InPlace", { false, true })), + framework::dataset::make("Epsilon", { 1e-7 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // QASYMM8 + TEST_SUITE_END() // MeanStdNormalizationLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp index 1bb0588919..964d1c5deb 100644 --- a/tests/validation/NEON/PixelWiseMultiplication.cpp +++ b/tests/validation/NEON/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,6 +56,11 @@ const auto PixelWiseMultiplicationQASYMM8QuantDataset = combine(combine( framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0) })), framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 32768.f, 0) })); +const auto PixelWiseMultiplicationQASYMM8QuantInPlaceDataset = combine(combine( + framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 32768.f, 10) }), + framework::dataset::make("Src1QInfo", { QuantizationInfo(5.f / 32768.f, 10) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 10) })); + const auto PixelWiseMultiplicationPolicySTNUDataset = combine( framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE }), framework::dataset::make("RoundingPolicy", { RoundingPolicy::TO_NEAREST_UP })); @@ -75,7 +80,8 @@ const auto PixelWiseMultiplicationPolicySTZDataset = combine( * expected to have either different quantization information, data type * or different shape we are not testing in-place computation. */ -const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto InPlaceDataSet = framework::dataset::make("InPlace", { false, true }); +const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false }); #define DEFAULT_VALIDATE validate(Accessor(_target), _reference); #define VALIDATE(TYPE, TOLERANCE) validate(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f); @@ -275,7 +281,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::dataset::make("Scale", { scale_unity })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQASYMM8QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunSmallInPlace, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataTypeIn1", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeIn2", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", DataType::QASYMM8_SIGNED)), + framework::dataset::make("Scale", { scale_unity })), + PixelWiseMultiplicationPolicySTZDataset), + PixelWiseMultiplicationQASYMM8QuantInPlaceDataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -292,7 +310,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framew framework::dataset::make("Scale", { scale_255 })), PixelWiseMultiplicationPolicySTNUDataset), PixelWiseMultiplicationQASYMM8QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -306,7 +324,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framew framework::dataset::make("Scale", { scale_unity })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQASYMM8QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -320,7 +338,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framew framework::dataset::make("Scale", { scale_other })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQASYMM8QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -335,7 +353,20 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationBroadcastQASYMM8Fixtur framework::dataset::make("Scale", { scale_other })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQASYMM8QuantDataset), - framework::dataset::make("InPlace", { false }))) + OutOfPlaceDataSet)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunTinyInPlace, NEPixelWiseMultiplicationBroadcastQASYMM8Fixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(), + framework::dataset::make("DataTypeIn1", DataType::QASYMM8)), + framework::dataset::make("DataTypeIn2", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", DataType::QASYMM8)), + framework::dataset::make("Scale", { scale_other })), + PixelWiseMultiplicationPolicySTZDataset), + PixelWiseMultiplicationQASYMM8QuantInPlaceDataset), + InPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -351,7 +382,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framew framework::dataset::make("Scale", { scale_255 })), PixelWiseMultiplicationPolicySTNUDataset), PixelWiseMultiplicationQSYMM16QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qsymm16); @@ -365,7 +396,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framew framework::dataset::make("Scale", { scale_unity })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQSYMM16QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qsymm16); @@ -379,7 +410,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16Fixture, framew framework::dataset::make("Scale", { scale_other })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQSYMM16QuantDataset), - InPlaceDataSet)) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference, tolerance_qsymm16); @@ -394,7 +425,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQSYMM16ToS32Fixture, f framework::dataset::make("Scale", { scale_unity })), PixelWiseMultiplicationPolicySTZDataset), PixelWiseMultiplicationQSYMM16QuantDataset), - framework::dataset::make("InPlace", { false }))) + OutOfPlaceDataSet)) { // Validate output validate(Accessor(_target), _reference); @@ -411,7 +442,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationU8U8ToS16Fixture, fram framework::dataset::make("Scale", { scale_255 })), datasets::ConvertPolicies()), framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)), - framework::dataset::make("InPlace", { false }))) + OutOfPlaceDataSet)) { // Validate output validate_wrap(Accessor(_target), _reference, AbsoluteTolerance<int16_t>(1), 0.f); @@ -451,17 +482,17 @@ TEST_SUITE_END() // U8toU8 TEST_SUITE(U8toS16) TEST_SUITE(Scale255) -PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_255, TO_NEAREST_UP, framework::dataset::make("InPlace", { false }), +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_255, TO_NEAREST_UP, OutOfPlaceDataSet, WRAP_VALIDATE(int16_t, 2)) TEST_SUITE_END() // Scale255 TEST_SUITE(ScaleUnity) -PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_unity, TO_ZERO, framework::dataset::make("InPlace", { false }), +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_unity, TO_ZERO, OutOfPlaceDataSet, DEFAULT_VALIDATE) TEST_SUITE_END() // ScaleUnity TEST_SUITE(ScaleOther) -PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_other, TO_ZERO, framework::dataset::make("InPlace", { false }), +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, ALL, SmallShapes(), U8, S16, S16, scale_other, TO_ZERO, OutOfPlaceDataSet, DEFAULT_VALIDATE) TEST_SUITE_END() // ScaleOther diff --git a/tests/validation/NEON/Pooling3dLayer.cpp b/tests/validation/NEON/Pooling3dLayer.cpp new file mode 100644 index 0000000000..07054462f5 --- /dev/null +++ b/tests/validation/NEON/Pooling3dLayer.cpp @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEPooling3dLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/Pooling3dLayerDataset.h" +#include "tests/datasets/PoolingTypesDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/Pooling3dLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Input data sets for floating-point data types */ +const auto Pooling3dLayerDatasetFP = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })), + framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })), + framework::dataset::make("Padding", { Padding3D(0, 1, 0), Padding3D(1, 1, 1) })), + framework::dataset::make("ExcludePadding", { true, false })); + +const auto Pooling3dLayerDatasetFPSmall = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 2, 2), Size3D(3, 3, 3) })), + framework::dataset::make("Stride", { Size3D(2, 2, 2), Size3D(2, 1, 1) })), + framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })), + framework::dataset::make("ExcludePadding", { true, false })); + +const auto Pooling3dLayerDatasetQASYMM8Small = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size3D(3, 3, 3) })), + framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })), + framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })), + framework::dataset::make("ExcludePadding", { true })); + +const auto Pooling3dLayerDatasetQASYMM8Large = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size3D(3, 3, 3) })), + framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 2, 1) })), + framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 0) })), + framework::dataset::make("ExcludePadding", { true })); + +using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>; + +constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */ +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */ + +const auto qasymm8_in_qinfo_dataset = framework::dataset::make("InputQuantInfo", { QuantizationInfo(.2f, 10) }); +const auto qasymm8_out_qinfo_dataset = framework::dataset::make("OutputQuantInfo", +{ + QuantizationInfo(.2f, 10), // Same qinfo + QuantizationInfo(.1f, 5), // Multiplier <= 1 + QuantizationInfo(2.f, 3) // Multiplier > 1 +}); + +const auto qasymm8_signed_in_qinfo_dataset = framework::dataset::make("InputQuantInfo", { QuantizationInfo(.2f, -10) }); +const auto qasymm8_signed_out_qinfo_dataset = framework::dataset::make("OutputQuantInfo", +{ + QuantizationInfo(.2f, -10), // Same qinfo + QuantizationInfo(.1f, -5), // Multiplier <= 1 + QuantizationInfo(2.f, -3) // Multiplier > 1 +}); + +} //namespace + +TEST_SUITE(NEON) +TEST_SUITE(Pooling3dLayer) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Mismatching data type + TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination + TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination + TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output shape + TensorInfo(TensorShape(5U, 13U, 15U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global Pooling + TensorInfo(TensorShape(13U,13U, 5U, 1U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data type + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NHWC), // Invalid data layout + TensorInfo(TensorShape(5U, 13U, 13U, 5U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(1U, 16U, 1U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), + }), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(2U, 25U, 11U, 3U, 3U), 1, DataType::F16, DataLayout::NDHWC), + TensorInfo(TensorShape(2U, 30U, 11U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(2U, 25U, 16U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(2U, 27U, 13U, 3U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 1U, 1U, 1U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global pooling applied + TensorInfo(TensorShape(5U, 2U, 2U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling + TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::QASYMM8, DataLayout::NDHWC), // Invalid data type + TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data layout + TensorInfo(TensorShape(5U, 1U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(1U, 15U, 1U, 2U, 4U), 1, DataType::F32, DataLayout::NDHWC), // size larger than height + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), + TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), + })), + framework::dataset::make("PoolInfo", { Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(2, 0, 0)), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(0, 0, 0)), + Pooling3dLayerInfo(PoolingType::L2, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)), + Pooling3dLayerInfo(PoolingType::AVG), + Pooling3dLayerInfo(PoolingType::MAX), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG), + Pooling3dLayerInfo(PoolingType::MAX, 2, Size3D(1, 1, 2), Padding3D(0, 0, 0), false), + Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(2U, 2U, 2U), Padding3D(), false), + Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), true), // pool size is equal to the padding size + Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), false), // pool size is equal to the padding size + Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(2U, 2U, 2U), Padding3D(2,1,2,2,1,2), false, false, DimensionRoundingType::CEIL), // CEIL with asymmetric Padding + })), + framework::dataset::make("Expected", { false, false, false, false, true, false, false, false, false, true , false, true, false, false, false})), + input_info, output_info, pool_info, expected) +{ + bool is_valid = bool(NEPooling3dLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using NEPoolingLayer3dFixture = Pooling3dLayerValidationFixture<Tensor, Accessor, NEPooling3dLayer, T>; + +template <typename T> +using NESpecial3dPoolingLayerFixture = SpecialPooling3dLayerValidationFixture<Tensor, Accessor, NEPooling3dLayer, T>; + +template <typename T> +using NEPooling3dLayerGlobalFixture = Pooling3dLayerGlobalValidationFixture<Tensor, Accessor, NEPooling3dLayer, T>; + +// clang-format on +// *INDENT-ON* +TEST_SUITE(Float) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSpecial, NESpecial3dPoolingLayerFixture<float>, framework::DatasetMode::ALL, datasets::Pooling3dLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5dShapes(), combine(Pooling3dLayerDatasetFPSmall, + framework::dataset::make("DataType", DataType::F32)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<float>, framework::DatasetMode::NIGHTLY, + combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F32)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<float>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U), + TensorShape(4U, 27U, 13U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", {false, true})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunGlobalSmall, NEPooling3dLayerGlobalFixture<float>, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U), + TensorShape(27U, 13U, 4U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U), + TensorShape(4U, 79U, 37U, 11U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", {false, true})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP32 + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall, + framework::dataset::make("DataType", DataType::F16)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} + + +FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP, + framework::dataset::make("DataType", + DataType::F16)))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} + +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayer3dFixture<half>, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U), + TensorShape(4U, 27U, 13U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", {false, true})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} + + +FIXTURE_DATA_TEST_CASE(RunSmallGlobal, NEPooling3dLayerGlobalFixture<half>, framework::DatasetMode::ALL, + combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U), + TensorShape(27U, 13U, 4U, 4U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayer3dFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U), + TensorShape(4U, 79U, 37U, 11U, 2U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })), + framework::dataset::make("Strides", Size3D(1, 1, 1))), + framework::dataset::make("Paddings", Padding3D(0, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} + +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP16 +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +TEST_SUITE_END() // Float +TEST_SUITE(Quantized) + +template <typename T> +using NEPooling3dLayerQuantizedFixture = Pooling3dLayerValidationQuantizedFixture<Tensor, Accessor, NEPooling3dLayer, T>; + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(), + combine(Pooling3dLayerDatasetQASYMM8Small, + framework::dataset::make("DataType", DataType::QASYMM8))), + qasymm8_in_qinfo_dataset), + qasymm8_out_qinfo_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(), + combine(Pooling3dLayerDatasetQASYMM8Large, + framework::dataset::make("DataType", DataType::QASYMM8))), + qasymm8_in_qinfo_dataset), + qasymm8_out_qinfo_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) + +FIXTURE_DATA_TEST_CASE(RunSmall, NEPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(), + combine(Pooling3dLayerDatasetQASYMM8Small, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + qasymm8_signed_in_qinfo_dataset), + qasymm8_signed_out_qinfo_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_s); +} + +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // Pooling3dLayer +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp index 24e552ed0c..161fe627cc 100644 --- a/tests/validation/NEON/PoolingLayer.cpp +++ b/tests/validation/NEON/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" #include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" #include "tests/PaddingCalculator.h" #include "tests/datasets/PoolingLayerDataset.h" @@ -81,6 +80,14 @@ const auto qasymm8_signed_out_qinfo_dataset = framework::dataset::make("OutputQu QuantizationInfo(.1f, -5), // Multiplier <= 1 QuantizationInfo(2.f, -3) // Multiplier > 1 }); + +// Cases where pooling region is completely outside the input tensor (excluding global pooling) +const auto pool_outside_input_dataset = zip(zip(zip(zip( + framework::dataset::make("Shape", { TensorShape{ 2U, 2U, 1U }, TensorShape{ 2U, 2U, 4U }, TensorShape{ 3U, 5U, 2U }, TensorShape{ 10U, 20U, 3U } }), + framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D{ 2, 2 }, Size2D{ 3, 3 }, Size2D{ 2, 2 }, Size2D{ 3, 6 } })), + framework::dataset::make("PadStride", { PadStrideInfo{ 1, 1, 2, 2 }, PadStrideInfo{ 1, 1, 4, 4 }, PadStrideInfo{ 1, 1, 3, 3 }, PadStrideInfo{ 1, 1, 2, 5 } })), + framework::dataset::make("ExcludePadding", { false, false, false, false })); } // namespace TEST_SUITE(NEON) @@ -97,7 +104,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), // Invalid output Global Pooling TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::QASYMM8), // Invalid exclude_padding = false with quantized type, no actual padding and NHWC TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32), - }), + TensorInfo(TensorShape(1U, 16U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(112, 112, 64,1), 1, DataType::F32, DataLayout::NHWC), // Mismatching number of channels + TensorInfo(TensorShape(112, 112, 64,1), 1, DataType::F32, DataLayout::NHWC), // Mismatching width + }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), @@ -106,7 +116,11 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32), TensorInfo(TensorShape(12U, 12U, 5U), 1, DataType::QASYMM8), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32), - })), + TensorInfo(TensorShape(1U, 15U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(56, 56, 64,1), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(56, 51, 64,1), 1, DataType::F32, DataLayout::NHWC), + + })), framework::dataset::make("PoolInfo", { PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NCHW, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NCHW, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NCHW, PadStrideInfo(1, 1, 2, 0)), @@ -115,8 +129,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( PoolingLayerInfo(PoolingType::MAX, DataLayout::NCHW), PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NHWC, PadStrideInfo(), false), PoolingLayerInfo(PoolingType::AVG, DataLayout::NCHW), + PoolingLayerInfo(PoolingType::MAX, 2, DataLayout::NHWC, PadStrideInfo(1, 1, 0, 0), false), + PoolingLayerInfo(PoolingType::MAX,3,DataLayout::NHWC,PadStrideInfo(2,2,1,1)), + PoolingLayerInfo(PoolingType::MAX,3,DataLayout::NHWC,PadStrideInfo(2,2,1,1)), + })), - framework::dataset::make("Expected", { false, false, false, false, true, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, false, true, false, true, false, false, false, false})), input_info, output_info, pool_info, expected) { bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)); @@ -130,6 +148,8 @@ using NEPoolingLayerIndicesFixture = PoolingLayerIndicesValidationFixture<Tensor template <typename T> using NEPoolingLayerFixture = PoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>; +template <typename T> +using NEPoolingLayerMixedDataLayoutFixture = PoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T, true>; template <typename T> using NESpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>; @@ -137,27 +157,37 @@ using NESpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<Tensor const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2) })), framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0) })), framework::dataset::make("ExcludePadding", { true, false })); - +const auto PoolingLayerKernelIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(7, 7) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 1, 1, 1) })), + framework::dataset::make("ExcludePadding", { false })); TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall, - framework::dataset::make("DataType", - DataType::F32))), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }) - - )) +FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(PoolingLayerIndicesDatasetFPSmall, + framework::dataset::make("DataType", DataType::F32))), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("UseKernelIndices", { false }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); + validate(Accessor(_target_indices), _ref_indices); +} +FIXTURE_DATA_TEST_CASE(RunKernelIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(PoolingLayerKernelIndicesDatasetFPSmall, + framework::dataset::make("DataType", DataType::F32))), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("UseKernelIndices", { true }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); validate(Accessor(_target_indices), _ref_indices); } - FIXTURE_DATA_TEST_CASE(RunSpecial, NESpecialPoolingLayerFixture<float>, framework::DatasetMode::ALL, datasets::PoolingLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32)) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F32))), pool_data_layout_dataset)) @@ -165,6 +195,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::Datase // Validate output validate(Accessor(_target), _reference, tolerance_f32); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), + combine(combine(combine(combine(datasets::PoolingTypes(), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })), + framework::dataset::make("ExcludePadding", { false })), + framework::dataset::make("DataType", DataType::F32))), + pool_data_layout_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", DataType::F32))), @@ -173,11 +214,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<float>, framework::Datase // Validate output validate(Accessor(_target), _reference, tolerance_f32); } +TEST_SUITE(CornerCases) +FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(pool_outside_input_dataset, + framework::dataset::make("DataType", + DataType::F32)), + pool_data_layout_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // CornerCases TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall, +FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(PoolingLayerIndicesDatasetFPSmall, + framework::dataset::make("DataType", + DataType::F16))), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("UseKernelIndices", { false }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); + validate(Accessor(_target_indices), _ref_indices); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall, framework::dataset::make("DataType", DataType::F16))), pool_data_layout_dataset)) { @@ -191,6 +253,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<half>, framework::Dataset // Validate output validate(Accessor(_target), _reference, tolerance_f16); } +TEST_SUITE(CornerCases) +FIXTURE_DATA_TEST_CASE(PoolRegionCompletelyOutsideInput, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(pool_outside_input_dataset, + framework::dataset::make("DataType", + DataType::F16)), + pool_data_layout_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // CornerCases TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE_END() // Float @@ -199,9 +271,11 @@ TEST_SUITE(Quantized) template <typename T> using NEPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<Tensor, Accessor, NEPoolingLayer, T>; +template <typename T> +using NEPoolingLayerQuantizedMixedDataLayoutFixture = PoolingLayerValidationQuantizedFixture<Tensor, Accessor, NEPoolingLayer, T, true>; TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetQASYMM8Small, framework::dataset::make("DataType", DataType::QASYMM8))), framework::dataset::make("DataLayout", { DataLayout::NCHW })), @@ -211,7 +285,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<uint8_t>, fr // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetQASYMM8Small, framework::dataset::make("DataType", DataType::QASYMM8))), framework::dataset::make("DataLayout", { DataLayout::NHWC })), @@ -221,24 +295,40 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framew // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() // QASYMM8 -TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmallNCHW, NEPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), - combine(PoolingLayerDatasetQASYMM8Small, - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), - framework::dataset::make("DataLayout", { DataLayout::NCHW })), - qasymm8_signed_in_qinfo_dataset), - qasymm8_signed_in_qinfo_dataset)) +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), + framework::dataset::make("ExcludePadding", { true })), + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 255.f, 10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 255.f, 5) }))) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8_s); + validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), +TEST_SUITE_END() // QASYMM8 +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetQASYMM8Small, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), - framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), qasymm8_signed_in_qinfo_dataset), - qasymm8_signed_out_qinfo_dataset)) + qasymm8_signed_in_qinfo_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_s); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(), + combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), + framework::dataset::make("ExcludePadding", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_s); diff --git a/tests/validation/NEON/QLSTMLayerNormalization.cpp b/tests/validation/NEON/QLSTMLayerNormalization.cpp index 617f64ce1d..9738213114 100644 --- a/tests/validation/NEON/QLSTMLayerNormalization.cpp +++ b/tests/validation/NEON/QLSTMLayerNormalization.cpp @@ -167,7 +167,7 @@ TEST_SUITE(Quantized) TEST_SUITE(QSYMM16) /** Tests will be targetting - * - Comparison between Neon kernel and the exact same but scalar version of reference kernel + * - Comparison between optimized kernel and the exact same but scalar version of reference kernel * - Input shapes of 1D and 2D with the first dimension covers boundary values of 128-bit vector size (0~3 iterations) * - Weight and bias 1D shape that have same size as that of input shapes * - Quantization scale is greater and smaller than one. @@ -179,7 +179,7 @@ TEST_SUITE(QSYMM16) * - The algorithm has been sensitive to quantization scale but it is hard to fully test * the sensitivity due to aforementioned reason. * - Again, it is hard to fully test corner values due to the exact same algorithm of the - * reference kernel and the Neon kernel. + * reference kernel and the optimized kernel. */ constexpr uint32_t qsymm16_per_vector = vector_size_byte / sizeof(int16_t); diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp index aeee54c835..bab7490762 100644 --- a/tests/validation/NEON/QuantizationLayer.cpp +++ b/tests/validation/NEON/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/fixtures/QuantizationLayerFixture.h" + namespace arm_compute { namespace test @@ -182,7 +183,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture<uin framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 15) }))) + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 15), QuantizationInfo(0.5f, 25) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_u8); +} +FIXTURE_DATA_TEST_CASE(ConvertUint8toInt8, NEQuantizationLayerQASYMM8GenFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, -1) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, 127) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_u8); @@ -191,7 +201,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNED framework::dataset::make("DataTypeIn", DataType::QASYMM8)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10), QuantizationInfo(2.0f, -25) })), - framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15) }))) + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 15), QuantizationInfo(1.0f, 127) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_s8); @@ -211,7 +221,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8_SIGNED, NEQuantizationLayerQASYMM8_SIGNED framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8_SIGNED })), framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 10) })), - framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5) }))) + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(2.0f, -5), QuantizationInfo(1.0f, 43) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_s8); @@ -220,11 +230,21 @@ FIXTURE_DATA_TEST_CASE(RunSmallQASYMM8, NEQuantizationLayerQASYMM8GenFixture<int framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(2.0f, 10), QuantizationInfo(2.0f, -25) })), - framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 30) }))) + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, 30), QuantizationInfo(2.0f, -128) }))) { // Validate output validate(Accessor(_target), _reference, tolerance_u8); } +FIXTURE_DATA_TEST_CASE(ConvertInt8toUint8, NEQuantizationLayerQASYMM8_SIGNEDGenFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(QuantizationSmallShapes, + framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataTypeOut", { DataType::QASYMM8 })), + framework::dataset::make("QuantizationInfoOutput", { QuantizationInfo(1.0f, 0) })), + framework::dataset::make("QuantizationInfoInput", { QuantizationInfo(1.0f, -128) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_s8); +} + TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized diff --git a/tests/validation/NEON/RNNLayer.cpp b/tests/validation/NEON/RNNLayer.cpp index 14d9a5d14e..979aa0f2c5 100644 --- a/tests/validation/NEON/RNNLayer.cpp +++ b/tests/validation/NEON/RNNLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -139,7 +139,7 @@ TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, NERNNLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallRNNLayerDataset(), framework::dataset::make("DataType", DataType::F16))) { // Validate output - validate(Accessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); + validate(Accessor(_target), _reference, tolerance_f16, 0.02f, abs_tolerance_f16); } TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ diff --git a/tests/validation/NEON/ROIAlignLayer.cpp b/tests/validation/NEON/ROIAlignLayer.cpp index 7bfdddf10d..98c92a0b20 100644 --- a/tests/validation/NEON/ROIAlignLayer.cpp +++ b/tests/validation/NEON/ROIAlignLayer.cpp @@ -53,6 +53,7 @@ AbsoluteTolerance<float> absolute_tolerance_f16(0.001f); #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); +constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_s(1); } // namespace TEST_SUITE(NEON) @@ -154,7 +155,7 @@ FIXTURE_DATA_TEST_CASE(Small, NEROIAlignLayerQuantizedFixture<int8_t>, framework framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(2.f / 255.f, 120) }))) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_qasymm8_s); } TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized diff --git a/tests/validation/NEON/ROIPoolingLayer.cpp b/tests/validation/NEON/ROIPoolingLayer.cpp new file mode 100644 index 0000000000..8b5147e57f --- /dev/null +++ b/tests/validation/NEON/ROIPoolingLayer.cpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/Globals.h" +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ROIDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ROIPoolingLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> relative_tolerance_f32(0.01f); +AbsoluteTolerance<float> absolute_tolerance_f32(0.001f); + +constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); +} // end namespace + +TEST_SUITE(NEON) +TEST_SUITE(RoiPooling) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Successful test + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::QASYMM8), // Successful test (quantized) + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Incorrect rois type + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching depth size input/output + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching number of rois and output batch size + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Invalid number of values per ROIS + TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching height and width input/output + + }), + framework::dataset::make("RoisInfo", { TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::F16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 10U), 1, DataType::U16), + TensorInfo(TensorShape(4, 4U), 1, DataType::U16), + TensorInfo(TensorShape(5, 4U), 1, DataType::U16), + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F16), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 5U, 3U, 4U), 1, DataType::F32), + })), + framework::dataset::make("PoolInfo", { ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + ROIPoolingLayerInfo(7U, 7U, 1./8), + })), + framework::dataset::make("Expected", { true, true, false, false, false, false, false })), + input_info, rois_info, output_info, pool_info, expected) +{ + ARM_COMPUTE_EXPECT(bool(NEROIPoolingLayer::validate(&input_info.clone()->set_is_resizable(true), &rois_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), pool_info)) == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +using NEROIPoolingLayerFloatFixture = ROIPoolingLayerFixture<Tensor, Accessor, NEROIPoolingLayer, float>; + +TEST_SUITE(Float) +FIXTURE_DATA_TEST_CASE(SmallROIPoolingLayerFloat, NEROIPoolingLayerFloatFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) +{ + // Validate output + validate(Accessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32); +} + +TEST_SUITE_END() // Float test suite end + +// Begin quantized tests +TEST_SUITE(Quantized) +template <typename T> +using NEROIPoolingLayerQuantizedFixture = ROIPoolingLayerQuantizedFixture<Tensor, Accessor, NEROIPoolingLayer, T>; + +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(Small, NEROIPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallROIDataset(), + framework::dataset::make("DataType", { DataType::QASYMM8 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 127) })), + framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(2.f / 255.f, 120) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // end qasymm8 tests +TEST_SUITE_END() // end quantized tests + +TEST_SUITE_END() // RoiPooling +TEST_SUITE_END() // NEON + +} // validation end +} // test namespace end +} // arm_compute namespace end diff --git a/tests/validation/NEON/ReduceMean.cpp b/tests/validation/NEON/ReduceMean.cpp index b4a3f0d399..8ca0bb53a7 100644 --- a/tests/validation/NEON/ReduceMean.cpp +++ b/tests/validation/NEON/ReduceMean.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,10 +46,15 @@ constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value f #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC constexpr AbsoluteTolerance<float> tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#ifdef __aarch64__ constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric quantized type */ +constexpr AbsoluteTolerance<int8_t> tolerance_s8(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric quantized type */ +#else // __aarch64__ +constexpr AbsoluteTolerance<uint8_t> tolerance_u8(2); /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric quantized type */ constexpr AbsoluteTolerance<int8_t> tolerance_s8(2); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric quantized type */ +#endif // __aarch64__ -const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(0, 1, 2, 3) }), +const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(2, 3), Coordinates(0, 1, 2, 3) }), framework::dataset::make("KeepDims", { true })); const auto axis_drop = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1), Coordinates(3) }), framework::dataset::make("KeepDims", { false })); } // namespace diff --git a/tests/validation/NEON/Remap.cpp b/tests/validation/NEON/Remap.cpp deleted file mode 100644 index 3c02f8eece..0000000000 --- a/tests/validation/NEON/Remap.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NERemap.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" -#include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" -#include "tests/datasets/BorderModeDataset.h" -#include "tests/datasets/ShapeDatasets.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/framework/datasets/Datasets.h" -#include "tests/validation/Validation.h" -#include "tests/validation/fixtures/RemapFixture.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace -{ -constexpr AbsoluteTolerance<uint8_t> tolerance_value(0); -constexpr float tolerance_number = 0.f; -} // namespace - -TEST_SUITE(NEON) -TEST_SUITE(Remap) - -template <typename T> -using NERemapFixture = RemapValidationFixture<Tensor, Accessor, NERemap, T>; - -FIXTURE_DATA_TEST_CASE(RunSmall, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) -{ - // Validate output - validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) -{ - // Validate output - validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); -} -TEST_SUITE_END() -TEST_SUITE_END() -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/NEON/ReorderLayer.cpp b/tests/validation/NEON/ReorderLayer.cpp new file mode 100644 index 0000000000..839ad0ac92 --- /dev/null +++ b/tests/validation/NEON/ReorderLayer.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__aarch64__) + +#include "arm_compute/runtime/NEON/functions/NEReorderLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ReorderLayerDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ReorderFixture.h" +#include "src/core/NEON/kernels/NEReorderKernel.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +TEST_SUITE(NEON) +TEST_SUITE(ReorderLayer) + +template <typename T> +using NEReorderLayerAlias = ReorderValidationFixture<Tensor, Accessor, NEReorderLayer, T>; + +TEST_SUITE(FP32) +#if defined(ARM_COMPUTE_ENABLE_SVE) +DATA_TEST_CASE(ValidateReorderOHWIo8, framework::DatasetMode::ALL, combine( + zip( + make("InShape",{ TensorShape(10U, 9U), TensorShape(234U, 301U) }), + make("OutShape", { TensorShape(10U, 16U), TensorShape(234U, 304U) }) + ), + zip( + make("InputWeightFormat", {WeightFormat::OHWI}), + make("OutputWeightFormat", {WeightFormat::OHWIo8}) + )), + input_shape, output_shape, input_wf, output_wf) +{ + if(Scheduler::get().cpu_info().has_sve()){ + arm_compute::NEReorderLayer reorder_layer; + int vector_length = arm_gemm::utils::get_vector_length<float>(); + bool expected_bool_status = false; + if (vector_length == 8) + { + expected_bool_status = true; + } + + TensorInfo input_tensor_info(input_shape, 1, DataType::F32); + TensorInfo output_tensor_info(output_shape, 1, DataType::F32); + + Status status = reorder_layer.validate(&input_tensor_info, &output_tensor_info, input_wf, output_wf); + + ARM_COMPUTE_EXPECT((expected_bool_status == bool(status)), framework::LogLevel::ERRORS); + } +} + +FIXTURE_DATA_TEST_CASE(RunBlock8, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock8(), make("DataType", DataType::F32))) +{ + // Validate output + if (_hardware_supports) + { + validate(Accessor(_target), _reference); + } +} +#endif // ARM_COMPUTE_ENABLE_SVE + +FIXTURE_DATA_TEST_CASE(RunBlock4, NEReorderLayerAlias<float>, framework::DatasetMode::ALL, combine(datasets::ReorderLayerDatasetBlock4(), make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +TEST_SUITE_END() // FP32 + +TEST_SUITE_END() // ReorderLayer +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // defined(__aarch64__) diff --git a/tests/validation/NEON/ReshapeLayer.cpp b/tests/validation/NEON/ReshapeLayer.cpp index bf39c399a5..e9f114d491 100644 --- a/tests/validation/NEON/ReshapeLayer.cpp +++ b/tests/validation/NEON/ReshapeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -77,6 +77,9 @@ input_info, output_info, expected) template <typename T> using NEReshapeLayerFixture = ReshapeLayerValidationFixture<Tensor, Accessor, NEReshapeLayer, T>; +template <typename T> +using NEReshapeLayerPaddedFixture = ReshapeLayerPaddedValidationFixture<Tensor, Accessor, NEReshapeLayer, T>; + TEST_SUITE(Float) TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::F32))) @@ -84,8 +87,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<float>, framework::Datase // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() //F32 +TEST_SUITE_END() //Float TEST_SUITE(Integer) TEST_SUITE(S8) @@ -94,7 +97,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<int8_t>, framework::Datas // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() //S8 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S16))) @@ -102,11 +105,41 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerFixture<int16_t>, framework::Data // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() //S16 +TEST_SUITE_END() //Integer + +TEST_SUITE(Padded) +TEST_SUITE(Float) +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerPaddedFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() //S32 +TEST_SUITE_END() //Float + +TEST_SUITE(Integer) +TEST_SUITE(S8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerPaddedFixture<int8_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S8))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() //S8 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEReshapeLayerPaddedFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S16))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() //S16 +TEST_SUITE_END() //Integer +TEST_SUITE_END() //Padded -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() //ReshapeLayer +TEST_SUITE_END() //NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/Reverse.cpp b/tests/validation/NEON/Reverse.cpp index 3dc3eeee80..7b5337f14b 100644 --- a/tests/validation/NEON/Reverse.cpp +++ b/tests/validation/NEON/Reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,8 @@ namespace validation { namespace { -auto run_small_dataset = combine(datasets::SmallShapes(), datasets::Tiny1DShapes()); +using framework::dataset::make; +auto run_small_dataset = combine(datasets::Small3DShapes(), datasets::Tiny1DShapes()); auto run_large_dataset = combine(datasets::LargeShapes(), datasets::Tiny1DShapes()); } // namespace @@ -53,28 +54,31 @@ TEST_SUITE(Reverse) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype + make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis shape TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis length (> 4) TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes + TensorInfo(TensorShape(32U, 13U, 17U, 3U, 2U), 1, DataType::U8), // Unsupported source dimensions (>4) TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(2U), 1, DataType::U8), }), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), + make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(2U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 17U, 3U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(2U), 1, DataType::U8), })), - framework::dataset::make("AxisInfo", { TensorInfo(TensorShape(3U), 1, DataType::U8), + make("AxisInfo", { TensorInfo(TensorShape(3U), 1, DataType::U8), TensorInfo(TensorShape(2U, 10U), 1, DataType::U32), TensorInfo(TensorShape(8U), 1, DataType::U32), TensorInfo(TensorShape(2U), 1, DataType::U32), TensorInfo(TensorShape(2U), 1, DataType::U32), TensorInfo(TensorShape(2U), 1, DataType::U32), + TensorInfo(TensorShape(2U), 1, DataType::U32), })), - framework::dataset::make("Expected", { false, false, false, false, true, true})), + make("Expected", { false, false, false, false, false, true, true})), src_info, dst_info, axis_info, expected) { Status s = NEReverse::validate(&src_info.clone()->set_is_resizable(false), @@ -95,7 +99,11 @@ TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, NEReverseFixture<half>, framework::DatasetMode::PRECOMMIT, - combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16))) + combine( + run_small_dataset, + make("DataType", DataType::F16), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(Accessor(_target), _reference); @@ -104,7 +112,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, FIXTURE_DATA_TEST_CASE(RunLarge, NEReverseFixture<half>, framework::DatasetMode::NIGHTLY, - combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16))) + combine( + run_large_dataset, + make("DataType", DataType::F16), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(Accessor(_target), _reference); @@ -116,7 +128,11 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEReverseFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(run_small_dataset, framework::dataset::make("DataType", DataType::F32))) + combine( + run_small_dataset, + make("DataType", DataType::F32), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(Accessor(_target), _reference); @@ -125,7 +141,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, FIXTURE_DATA_TEST_CASE(RunLarge, NEReverseFixture<float>, framework::DatasetMode::NIGHTLY, - combine(run_large_dataset, framework::dataset::make("DataType", DataType::F32))) + combine( + run_large_dataset, + make("DataType", DataType::F32), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(Accessor(_target), _reference); @@ -138,7 +158,11 @@ TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEReverseFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(run_small_dataset, framework::dataset::make("DataType", DataType::QASYMM8))) + combine( + run_small_dataset, + make("DataType", DataType::QASYMM8), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(Accessor(_target), _reference); @@ -147,7 +171,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, FIXTURE_DATA_TEST_CASE(RunLarge, NEReverseFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(run_large_dataset, framework::dataset::make("DataType", DataType::QASYMM8))) + combine( + run_large_dataset, + make("DataType", DataType::QASYMM8), + make("use_negative_axis", { true, false }), + make("use_inverted_axis", { true, false }))) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp index bb1ab936d1..f1209a21ac 100644 --- a/tests/validation/NEON/Scale.cpp +++ b/tests/validation/NEON/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,16 +22,10 @@ * SOFTWARE. */ #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" #include "tests/datasets/ScaleValidationDataset.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" -#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ScaleFixture.h" @@ -50,8 +44,8 @@ using datasets::ScaleSamplingPolicySet; using datasets::ScaleAlignCornersSamplingPolicySet; /** We consider vector size in byte 64 since the maximum size of - * a vector used by @ref NEScaleKernel is currently 64-byte (float32x4x4). - * There are possibility to reduce test time further by using + * a vector used by the kernel is currently 64-byte (float32x4x4). + * There is possibility to reduce test time further by using * smaller vector sizes for different data types where applicable. */ constexpr uint32_t vector_byte = 64; @@ -62,25 +56,31 @@ constexpr uint32_t num_elements_per_vector() return vector_byte / sizeof(T); } -/** Scale data types */ -const auto ScaleDataTypes = framework::dataset::make("DataType", +/** Quantization information data set */ +const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo", { - DataType::U8, - DataType::S16, - DataType::F32, + QuantizationInfo(0.5f, -10), }); /** Quantization information data set */ -const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo", +const auto InputQuantizationInfoSet = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(0.5f, -10), }); +/** Quantization information data set */ +const auto OutputQuantizationInfoSet = framework::dataset::make("OutputQuantizationInfo", +{ + QuantizationInfo(0.2f, 20), +}); + /** Tolerance */ constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1); +constexpr AbsoluteTolerance<int8_t> tolerance_s8(1); constexpr AbsoluteTolerance<int16_t> tolerance_s16(1); RelativeTolerance<float> tolerance_f32(0.05); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +constexpr float abs_tolerance_f16(0.01f); RelativeTolerance<half> tolerance_f16(half(0.1)); #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ @@ -94,9 +94,8 @@ TEST_SUITE(Validate) /** Validate test suite is to test ARM_COMPUTE_RETURN_ON_* macros * we use to check the validity of given arguments in @ref NEScale - * and subsequent call to @ref NEScaleKernel. * Since this is using validate() of @ref NEScale, which pre-adjust - * arguments for @ref NEScaleKernel, the following conditions in + * arguments for the kernel, the following conditions in * the kernel are not currently tested. * - The same input and output * - Data type of offset, dx and dy @@ -156,8 +155,6 @@ TEST_CASE(SupportDataType, framework::DatasetMode::ALL) { DataType::BFLOAT16, false }, #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC { DataType::F16, true }, -#else // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - { DataType::F16, false }, #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC { DataType::F32, true }, { DataType::F64, false }, @@ -317,11 +314,18 @@ DATA_TEST_CASE(CheckNoPaddingInterpAREA, framework::DatasetMode::ALL, combine(co template <typename T> using NEScaleFixture = ScaleValidationFixture<Tensor, Accessor, NEScale, T>; template <typename T> +using NEScaleMixedDataLayoutFixture = ScaleValidationFixture<Tensor, Accessor, NEScale, T, true>; +template <typename T> using NEScaleQuantizedFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T>; +template <typename T> +using NEScaleDifferentOutputQuantizedFixture = ScaleValidationDifferentOutputQuantizedFixture<Tensor, Accessor, NEScale, T>; +template <typename T> +using NEScaleQuantizedMixedDataLayoutFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T, true>; TEST_SUITE(Float) TEST_SUITE(FP32) -const auto f32_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<float>())), framework::dataset::make("DataType", DataType::F32)); +const auto f32_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<float>())), framework::dataset::make("DataType", DataType::F32)); +const auto f32_shape_nhwc = combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F32)); FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleSamplingPolicySet)) { //Create valid region @@ -331,6 +335,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<float>, framework::DatasetMode:: // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); } +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEScaleMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, ASSEMBLE_DATASET(f32_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); +} FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleAlignCornersSamplingPolicySet)) { //Create valid region @@ -340,10 +353,38 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<float>, framework::D // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); } +FIXTURE_DATA_TEST_CASE(RunMediumNHWC, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f32_shape_nhwc, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); +} +FIXTURE_DATA_TEST_CASE(RunMediumMixedDataLayoutNHWC, NEScaleMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, ASSEMBLE_NHWC_DATASET(f32_shape_nhwc, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); +} +FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f32_shape_nhwc, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); +} TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -const auto f16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16)); +const auto f16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16)); +const auto f16_shape_nhwc = combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F16)); FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleSamplingPolicySet)) { //Create valid region @@ -351,7 +392,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<half>, framework::DatasetMode::A const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16); + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet)) { @@ -360,7 +401,34 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<half>, framework::Da const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); // Validate output - validate(Accessor(_target), _reference, valid_region, tolerance_f16); + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunMediumNHWC, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunMediumMixedDataLayoutNHWC, NEScaleMixedDataLayoutFixture<half>, framework::DatasetMode::PRECOMMIT, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunMediumAlignCornersNHWC, NEScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_NHWC_DATASET(f16_shape_nhwc, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); } TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ @@ -388,6 +456,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<uint8_t>, framework: validate(Accessor(_target), _reference, valid_region, tolerance_u8); } TEST_SUITE_END() // U8 +TEST_SUITE(S8) +const auto s8_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<int8_t>())), framework::dataset::make("DataType", DataType::S8)); +FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_S8_DATASET(s8_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_s8); +} +FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_S8_DATASET(s8_shape, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_s8); +} +TEST_SUITE_END() // S8 TEST_SUITE(S16) const auto s16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<int16_t>())), framework::dataset::make("DataType", DataType::S16)); FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int16_t>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(s16_shape, ScaleSamplingPolicySet)) @@ -423,6 +512,26 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<uint8_t>, framework::Da // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_u8); } +FIXTURE_DATA_TEST_CASE(RunSmallDifferentOutputQuantization, NEScaleDifferentOutputQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet, InputQuantizationInfoSet, OutputQuantizationInfoSet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_u8); +} +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEScaleQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet, + QuantizationInfoSet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_u8); +} FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_shape, ScaleAlignCornersSamplingPolicySet, QuantizationInfoSet)) { @@ -446,6 +555,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<int8_t>, framework::Dat // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_qasymm8_signed); } +FIXTURE_DATA_TEST_CASE(RunSmallDifferentOutputQuantization, NEScaleDifferentOutputQuantizedFixture<int8_t>, framework::DatasetMode::ALL, + ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(qasymm8_signed_shape, ScaleSamplingPolicySet, InputQuantizationInfoSet, OutputQuantizationInfoSet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_qasymm8_signed); +} FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleQuantizedFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_signed_shape, ScaleAlignCornersSamplingPolicySet, QuantizationInfoSet)) { diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index 2a9e30604e..94d0866c38 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,21 +25,22 @@ #include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/common/cpuinfo/CpuIsaInfo.h" +#include "src/cpu/kernels/CpuSoftmaxKernel.h" #include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/SoftmaxLayerFixture.h" - namespace arm_compute { namespace test { namespace validation { +using framework::dataset::make; namespace { /** Tolerance for float operations */ @@ -51,7 +52,7 @@ constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1); /** CNN data types */ -const auto CNNDataTypes = framework::dataset::make("DataType", +const auto CNNDataTypes = make("DataType", { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC DataType::F16, @@ -62,56 +63,55 @@ const auto CNNDataTypes = framework::dataset::make("DataType", TEST_SUITE(NEON) TEST_SUITE(SoftmaxLayer) - // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes - TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, // Invalid output quantization info - QuantizationInfo(1.f/256, 12)), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, - QuantizationInfo(1.f/256, 12)), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis high - QuantizationInfo(1.f/256, 12)), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis low - QuantizationInfo(1.f/256, 12)), - }), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16), - TensorInfo(TensorShape(27U, 11U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, - QuantizationInfo(1.f/256, 12)), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, - QuantizationInfo(1.f/256, 0)), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, - QuantizationInfo(1.f/256, 0)), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, - QuantizationInfo(1.f/256, 0)), - })), - framework::dataset::make("beta", { 1.0, - 2.0, - 1.0, - 2.0, - 1.0, - 1.0, - 2.0, - 1.0, - })), - framework::dataset::make("axis", { 0, - 0, - 0, - 1, - 0, - -1, - 2, - -3, - })), - framework::dataset::make("Expected", { false, false, false, true, true, true, false, false })), - input_info, output_info, beta, axis, expected) +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, // Invalid output quantization info + QuantizationInfo(1.f/256, 12)), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, + QuantizationInfo(1.f/256, 12)), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis high + QuantizationInfo(1.f/256, 12)), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, //Invalid axis low + QuantizationInfo(1.f/256, 12)), + }), + make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 11U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, + QuantizationInfo(1.f/256, 12)), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, + QuantizationInfo(1.f/256, 0)), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, + QuantizationInfo(1.f/256, 0)), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, + QuantizationInfo(1.f/256, 0)), + }), + make("beta", { 1.0, + 2.0, + 1.0, + 2.0, + 1.0, + 1.0, + 2.0, + 1.0, + }), + make("axis", { 0, + 0, + 0, + 1, + 0, + -1, + 2, + -3, + }), + make("Expected", { false, false, false, true, true, true, false, false })), + input_info, output_info, beta, axis, expected) { ARM_COMPUTE_EXPECT(bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), beta, axis)) == expected, framework::LogLevel::ERRORS); } @@ -121,29 +121,80 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( template <typename T> using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>; +DATA_TEST_CASE(KernelSelection, framework::DatasetMode::ALL, + concat( + combine( + make("CpuExt", std::string("neon")), + make("DataType", { DataType::F32, + DataType::F16, + DataType::QASYMM8, + DataType::QASYMM8_SIGNED}) + ), + combine( + make("CpuExt", std::string("sme2")), + make("DataType", { DataType::F32, + DataType::F16})) + ), + cpu_ext, data_type) +{ + using namespace cpu::kernels; + + cpuinfo::CpuIsaInfo cpu_isa{}; + cpu_isa.neon = (cpu_ext == "neon"); + cpu_isa.sme2 = (cpu_ext == "sme2"); + cpu_isa.fp16 = (data_type == DataType::F16); + + const auto *selected_impl = CpuSoftmaxKernel::get_implementation( + SoftmaxKernelDataTypeISASelectorData{ data_type, cpu_isa, false /* is_log */, 0 /* axis */, CPUInfo::get().get_sme2_vector_length()}, + cpu::KernelSelectionType::Preferred); + + ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl); + + std::string expected = cpu_ext + "_" + cpu_impl_dt(data_type) + "_softmax"; + std::string actual = selected_impl->name; + + ARM_COMPUTE_EXPECT_EQUAL(expected, actual, framework::LogLevel::ERRORS); +} + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0, 1 }))) +FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0, -1 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine( + datasets::SmallShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0, 1 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0, 2, -1 }))) +FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, + combine( + datasets::Small4DShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f }), + make("Axis", { 0, 2, -1 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, + combine( + datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f16); @@ -152,26 +203,30 @@ TEST_SUITE_END() //FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0, -1 }))) +FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0, -1 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0, -2, 3 }))) +FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(datasets::Small4DShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0, -2, 3 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, + combine(datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); @@ -184,29 +239,40 @@ using NESoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<Tensor, TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.f }))), - framework::dataset::make("Axis", { 0, -1 }))) +FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::QASYMM8), + combine( + make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), + make("Beta", { 1.0f, 2.f }) + ), + make("Axis", { 0, -1 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small4DShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.f }))), - framework::dataset::make("Axis", { 0, 1, -2 }))) +FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + combine( + datasets::Small4DShapes(), + make("DataType", DataType::QASYMM8), + combine( + make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), + make("Beta", { 1.0f, 2.f })), + make("Axis", { 0, 1, -2 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), - framework::dataset::make("DataType", DataType::QASYMM8)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.0f }))), - framework::dataset::make("Axis", { 0 }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, + combine( + datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::QASYMM8), + combine( + make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), + make("Beta", { 1.0f, 2.0f }) + ), + make("Axis", { 0 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); @@ -214,20 +280,28 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framew TEST_SUITE_END() //QASYMM8 TEST_SUITE(QASYMM8_SIGNED) -FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.f }))), - framework::dataset::make("Axis", { 0, -1 }))) +FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::QASYMM8_SIGNED), + combine( + make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), + make("Beta", { 1.0f, 2.f }) + ), + make("Axis", { 0, -1 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); } -FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small4DShapes(), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), - framework::dataset::make("Beta", { 1.0f, 2.f }))), - framework::dataset::make("Axis", { 0, 1, -1 }))) +FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, + combine( + datasets::Small4DShapes(), + make("DataType", DataType::QASYMM8_SIGNED), + combine( + make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }), + make("Beta", { 1.0f, 2.f }) + ), + make("Axis", { 0, 1, -1 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8_signed); diff --git a/tests/validation/NEON/StackLayer.cpp b/tests/validation/NEON/StackLayer.cpp index d88f713ccd..3828010c7b 100644 --- a/tests/validation/NEON/StackLayer.cpp +++ b/tests/validation/NEON/StackLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,69 +44,74 @@ namespace test { namespace validation { + +using framework::dataset::make; namespace { // *INDENT-OFF* // clang-format off /** Data types */ -const auto data_types = framework::dataset::make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 }); +const auto data_types = make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 }); /** Num tensors values to test */ -const auto n_values = framework::dataset::make("NumTensors", { 3, 4 }); +const auto n_values = make("NumTensors", { 3, 4 }); /** Shapes 1D to test */ -const auto shapes_1d_small = combine(datasets::Small1DShapes(), framework::dataset::make("Axis", -1, 2)); +const auto shapes_1d_small = combine(datasets::Small1DShapes(), make("Axis", -1, 2)); /** Shapes 2D to test */ -const auto shapes_2d_small = combine(datasets::Small2DShapes(), framework::dataset::make("Axis", -2, 3)); +const auto shapes_2d_small = combine(datasets::Small2DShapes(), make("Axis", -2, 3)); /** Shapes 3D to test */ -const auto shapes_3d_small = combine(datasets::Small3DShapes(), framework::dataset::make("Axis", -3, 4)); +const auto shapes_3d_small = combine(datasets::Small3DShapes(), make("Axis", -3, 4)); /** Shapes 4D to test */ -const auto shapes_4d_small = combine(datasets::Small4DShapes(), framework::dataset::make("Axis", -4, 5)); +const auto shapes_4d_small = combine(datasets::Small4DShapes(), make("Axis", -4, 5)); /** Shapes 1D to test */ -const auto shapes_1d_large = combine(datasets::Large1DShapes(), framework::dataset::make("Axis", -1, 2)); +const auto shapes_1d_large = combine(datasets::Large1DShapes(), make("Axis", -1, 2)); /** Shapes 2D to test */ -const auto shapes_2d_large = combine(datasets::Medium2DShapes(), framework::dataset::make("Axis", -2, 3)); +const auto shapes_2d_large = combine(datasets::Medium2DShapes(), make("Axis", -2, 3)); /** Shapes 3D to test */ -const auto shapes_3d_large = combine(datasets::Medium3DShapes(), framework::dataset::make("Axis", -3, 4)); +const auto shapes_3d_large = combine(datasets::Medium3DShapes(), make("Axis", -3, 4)); /** Shapes 4D to test */ -const auto shapes_4d_large = combine(datasets::Medium4DShapes(), framework::dataset::make("Axis", -4, 5)); +const auto shapes_4d_large = combine(datasets::Medium4DShapes(), make("Axis", -4, 5)); } // namespace /** Fixture to use */ template<typename T> using NEStackLayerFixture = StackLayerValidationFixture<Tensor, ITensor, Accessor, NEStackLayer, T>; +template<typename T> +using NEStackLayerWithPaddingFixture = StackLayerWithPaddingValidationFixture<Tensor, ITensor, Accessor, NEStackLayer, T>; + using namespace arm_compute::misc::shape_calculator; TEST_SUITE(NEON) TEST_SUITE(StackLayer) -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( +make("InputInfo", { std::vector<TensorInfo>{ TensorInfo(TensorShape(9U, 8U), 1, DataType::U8) }, - std::vector<TensorInfo>{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)}, + std::vector<TensorInfo>{ TensorInfo(TensorShape(1U, 2U), 1, DataType::U8) , TensorInfo(TensorShape(1U, 2U), 1, DataType::U8), TensorInfo(TensorShape(1U, 2U), 1, DataType::U8)}, std::vector<TensorInfo>{ TensorInfo(TensorShape(2U, 3U), 1, DataType::S32) }, - std::vector<TensorInfo>{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)}, + std::vector<TensorInfo>{ TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32), TensorInfo(TensorShape(7U, 5U, 3U, 8U, 2U), 1, DataType::S32)}, std::vector<TensorInfo>{ TensorInfo(TensorShape(9U, 8U), 1, DataType::S32) }, }), -framework::dataset::make("OutputInfo", +make("OutputInfo", { TensorInfo(TensorShape(1U, 9U, 8U), 1, DataType::U8), // Passes, stack 1 tensor on x axis TensorInfo(TensorShape(1U, 3U, 2U), 1, DataType::U8), // Passes, stack 3 tensors on y axis TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::S32), // fails axis < (- input's rank) TensorInfo(TensorShape(3U, 7U, 5U), 1, DataType::S32), // fails, input dimensions > 4 TensorInfo(TensorShape(1U, 2U, 3U), 1, DataType::U8), // fails mismatching data types -})), -framework::dataset::make("Axis", { -3, 1, -4, -3, 1 })), -framework::dataset::make("Expected", { true, true, false, false, false })), +}), +make("Axis", { -3, 1, -4, -3, 1 }), +make("Expected", { true, true, false, false, false })), input_info, output_info, axis, expected) { std::vector<TensorInfo> ti(input_info); @@ -121,18 +126,18 @@ input_info, output_info, axis, expected) TEST_SUITE(Shapes1D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -141,18 +146,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -161,18 +166,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL, - combine(combine(shapes_1d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_1d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_1d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_1d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -183,18 +188,18 @@ TEST_SUITE_END() // Shapes1D TEST_SUITE(Shapes2D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -203,18 +208,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -223,18 +228,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL, - combine(combine(shapes_2d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_2d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_2d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_2d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -245,18 +250,18 @@ TEST_SUITE_END() // Shapes2D TEST_SUITE(Shapes3D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -265,18 +270,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -285,18 +290,18 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL, - combine(combine(shapes_3d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_3d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_3d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_3d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -307,18 +312,29 @@ TEST_SUITE_END() // Shapes3D TEST_SUITE(Shapes4D) TEST_SUITE(S32) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<int>, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S32 }), + n_values)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +// Testing the case with padding for only 4d shapes and for one data type. This is because the underlying code +// path depends only on the padding, which isn't affected by the shapes or data types. +FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, NEStackLayerWithPaddingFixture<int>, framework::DatasetMode::ALL, + combine(shapes_4d_small, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<int>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S32 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S32 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -327,18 +343,18 @@ TEST_SUITE_END() // S32 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<short>, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<short>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S16 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S16 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); @@ -347,24 +363,37 @@ TEST_SUITE_END() // S16 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, NEStackLayerFixture<char>, framework::DatasetMode::ALL, - combine(combine(shapes_4d_small, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_4d_small, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } FIXTURE_DATA_TEST_CASE(RunLarge, NEStackLayerFixture<char>, framework::DatasetMode::NIGHTLY, - combine(combine(shapes_4d_large, - framework::dataset::make("DataType", { DataType::S8 })), - n_values)) + combine(shapes_4d_large, + make("DataType", { DataType::S8 }), + n_values)) { // Validate output validate(Accessor(_target), _reference); } TEST_SUITE_END() // S8 TEST_SUITE_END() // Shapes4D + +TEST_SUITE(HighDimensional) +// The Cpu implementation supports tensors of size 4D+, but reference implementation does not. +FIXTURE_DATA_TEST_CASE(RunHighDimensional, NEStackLayerFixture<char>, framework::DatasetMode::DISABLED, + combine(make("Shape", { TensorShape{2U, 3U, 4U, 5U, 3U} }), + make("Axis", { 5, 0, -3, 2 }), + make("DataType", { DataType::S8 }), + make("NumTensors", { 3 }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // HighDimensional TEST_SUITE_END() // StackLayer TEST_SUITE_END() // Neon } // namespace validation diff --git a/tests/validation/NEON/UNIT/MemoryManager.cpp b/tests/validation/NEON/UNIT/MemoryManager.cpp index 83a9fcb332..2c57b534fe 100644 --- a/tests/validation/NEON/UNIT/MemoryManager.cpp +++ b/tests/validation/NEON/UNIT/MemoryManager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -62,15 +62,15 @@ TEST_CASE(BlobMemoryManagerSimpleWithinFunctionLevel, framework::DatasetMode::AL norm_layer_1.configure(&src, &dst, NormalizationLayerInfo(NormType::CROSS_MAP, 3)); norm_layer_2.configure(&src, &dst, NormalizationLayerInfo(NormType::IN_MAP_1D, 3)); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Finalize memory manager mm->populate(allocator, 1 /* num_pools */); diff --git a/tests/validation/NEON/UNIT/RuntimeContext.cpp b/tests/validation/NEON/UNIT/RuntimeContext.cpp index f64d380423..e0d45c639a 100644 --- a/tests/validation/NEON/UNIT/RuntimeContext.cpp +++ b/tests/validation/NEON/UNIT/RuntimeContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,6 +48,19 @@ namespace validation { TEST_SUITE(NEON) TEST_SUITE(UNIT) +#if defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__) +TEST_CASE(CpuCapacity, framework::DatasetMode::ALL) +{ + CPUInfo& ci = arm_compute::Scheduler::get().cpu_info(); + const uint32_t nonlittle_num_cpus = ci.get_cpu_num_excluding_little(); + const uint32_t num_threads = arm_compute::Scheduler::get().num_threads(); + + ARM_COMPUTE_EXPECT(num_threads<=nonlittle_num_cpus , framework::LogLevel::ERRORS); +} +#endif /* defined(ARM_COMPUTE_OPENMP_SCHEDULER) && !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ + (defined(__arm__) || defined(__aarch64__)) && defined(__ANDROID__)*/ + TEST_SUITE(RuntimeContext) TEST_CASE(Scheduler, framework::DatasetMode::ALL) @@ -57,14 +70,14 @@ TEST_CASE(Scheduler, framework::DatasetMode::ALL) RuntimeContext ctx; // Check if it's been initialised properly - ARM_COMPUTE_EXPECT(ctx.scheduler() != nullptr, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(ctx.asset_manager() == nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(ctx.scheduler() != nullptr); + ARM_COMPUTE_ASSERT(ctx.asset_manager() == nullptr); // Create a Scheduler auto scheduler = SchedulerFactory::create(); ctx.set_scheduler(scheduler.get()); // Check if the scheduler has been properly setup - ARM_COMPUTE_EXPECT(ctx.scheduler() != nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(ctx.scheduler() != nullptr); // Create a new activation function NEActivationLayer act_layer(&ctx); @@ -74,14 +87,14 @@ TEST_CASE(Scheduler, framework::DatasetMode::ALL) act_layer.configure(&src, &dst, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR)); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); float min_bound = 0; float max_bound = 0; @@ -117,10 +130,10 @@ TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL) act_layer_thread0.configure(&src_t0, &dst_t0, activation_info); act_layer_thread1.configure(&src_t1, &dst_t1, activation_info); - ARM_COMPUTE_EXPECT(src_t0.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst_t0.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(src_t1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst_t1.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src_t0.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst_t0.info()->is_resizable()); + ARM_COMPUTE_ASSERT(src_t1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst_t1.info()->is_resizable()); // Allocate tensors src_t0.allocator()->allocate(); @@ -128,8 +141,8 @@ TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL) src_t1.allocator()->allocate(); dst_t1.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src_t0.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!src_t1.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src_t0.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!src_t1.info()->is_resizable()); float min_bound = 0; float max_bound = 0; diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp index ef19524d1c..0aab9ef9b5 100644 --- a/tests/validation/NEON/UNIT/TensorAllocator.cpp +++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,32 +61,32 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL) // Negative case : Import nullptr Tensor t1; t1.allocator()->init(info); - ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(nullptr)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(nullptr))); + ARM_COMPUTE_ASSERT(t1.info()->is_resizable()); // Negative case : Import misaligned pointer Tensor t2; const size_t required_alignment = 339; t2.allocator()->init(info, required_alignment); - ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(data.get()))); + ARM_COMPUTE_ASSERT(t2.info()->is_resizable()); // Negative case : Import memory to a tensor that is memory managed Tensor t3; MemoryGroup mg; t3.allocator()->set_associated_memory_group(&mg); - ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(data.get()))); + ARM_COMPUTE_ASSERT(t3.info()->is_resizable()); // Positive case : Set raw pointer Tensor t4; t4.allocator()->init(info); - ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t4.buffer() == reinterpret_cast<uint8_t *>(data.get()), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(data.get()))); + ARM_COMPUTE_ASSERT(!t4.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t4.buffer() == reinterpret_cast<uint8_t *>(data.get())); t4.allocator()->free(); - ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(t4.buffer() == nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(t4.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t4.buffer() == nullptr); } TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) @@ -114,8 +114,8 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) void *aligned_ptr = raw_data.get(); std::align(required_alignment, total_size_in_bytes, aligned_ptr, space); - ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(aligned_ptr)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(aligned_ptr))); + ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); // Fill tensor std::uniform_real_distribution<float> distribution(-5.f, 5.f); @@ -137,7 +137,7 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL) // Release resources tensor.allocator()->free(); - ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); } TEST_CASE(ImportMemoryMallocPadded, framework::DatasetMode::ALL) @@ -160,8 +160,8 @@ TEST_CASE(ImportMemoryMallocPadded, framework::DatasetMode::ALL) const size_t total_size_in_bytes = tensor.info()->total_size(); auto raw_data = std::make_unique<uint8_t[]>(total_size_in_bytes); - ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(raw_data.get())), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(raw_data.get()))); + ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); // Fill tensor while accounting padding std::uniform_real_distribution<float> distribution(-5.f, 5.f); @@ -190,10 +190,10 @@ TEST_CASE(ImportMemoryMallocPadded, framework::DatasetMode::ALL) // Release resources tensor.allocator()->free(); - ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); } -#if !defined(BARE_METAL) +#if !defined(_WIN64) && !defined(BARE_METAL) TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) { const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU); @@ -221,12 +221,12 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) // Map file utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0); - ARM_COMPUTE_EXPECT(mmapped_file.is_mapped(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mmapped_file.is_mapped()); unsigned char *data = mmapped_file.data(); // Import memory mapped memory - ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(data)), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(data))); + ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable()); // Fill tensor std::uniform_real_distribution<float> distribution(-5.f, 5.f); @@ -248,9 +248,9 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL) // Release resources tensor.allocator()->free(); - ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(tensor.info()->is_resizable()); } -#endif // !defined(BARE_METAL) +#endif // !defined(_WIN64) && !defined(BARE_METAL) TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL) { @@ -262,7 +262,7 @@ TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL) t.allocator()->init(info, requested_alignment); t.allocator()->allocate(); - ARM_COMPUTE_EXPECT(t.buffer() != nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(t.buffer() != nullptr); ARM_COMPUTE_EXPECT(t.allocator()->alignment() == requested_alignment, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(arm_compute::utility::check_aligned(reinterpret_cast<void *>(t.buffer()), requested_alignment), framework::LogLevel::ERRORS); diff --git a/tests/validation/UNIT/CPPScheduler.cpp b/tests/validation/UNIT/CPPScheduler.cpp new file mode 100644 index 0000000000..6a3f6819fc --- /dev/null +++ b/tests/validation/UNIT/CPPScheduler.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CPP/CPPScheduler.h" + +#include "arm_compute/core/CPP/ICPPKernel.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" + +#include <stdexcept> + +using namespace arm_compute; +using namespace arm_compute::test; + +namespace +{ +class TestException: public std::exception +{ +public: + const char* what() const noexcept override + { + return "Expected test exception"; + } +}; + +class TestKernel: public ICPPKernel +{ +public: + TestKernel() + { + Window window; + window.set(0, Window::Dimension(0, 2)); + configure(window); + } + + const char* name() const override + { + return "TestKernel"; + } + + void run(const Window &, const ThreadInfo &) override + { + throw TestException(); + } + +}; +} + +TEST_SUITE(UNIT) +TEST_SUITE(CPPScheduler) +#if defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL) +TEST_CASE(RethrowException, framework::DatasetMode::ALL) +{ + CPPScheduler scheduler; + CPPScheduler::Hints hints(0); + TestKernel kernel; + + scheduler.set_num_threads(2); + try + { + scheduler.schedule(&kernel, hints); + } + catch(const TestException&) + { + return; + } + ARM_COMPUTE_EXPECT_FAIL("Expected exception not caught", framework::LogLevel::ERRORS); +} +#endif // defined(ARM_COMPUTE_CPP_SCHEDULER) && !defined(BARE_METAL) +TEST_SUITE_END() +TEST_SUITE_END() diff --git a/tests/validation/UNIT/GPUTarget.cpp b/tests/validation/UNIT/GPUTarget.cpp index e1b7e1fe3f..2e64635b7a 100644 --- a/tests/validation/UNIT/GPUTarget.cpp +++ b/tests/validation/UNIT/GPUTarget.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,6 @@ * SOFTWARE. */ #include "arm_compute/core/GPUTarget.h" -#include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/Utils.h" #include "tests/framework/Asserts.h" @@ -38,6 +37,7 @@ TEST_SUITE(GPUTarget) TEST_CASE(GetGPUTargetFromName, framework::DatasetMode::ALL) { + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T000") == GPUTarget::MIDGARD, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T600") == GPUTarget::T600, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T700") == GPUTarget::T700, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T800") == GPUTarget::T800, framework::LogLevel::ERRORS); @@ -46,15 +46,24 @@ TEST_CASE(GetGPUTargetFromName, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G51") == GPUTarget::G51, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G51BIG") == GPUTarget::G51BIG, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G51LIT") == GPUTarget::G51LIT, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52") == GPUTarget::G52, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52LIT") == GPUTarget::G52LIT, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G31") == GPUTarget::G31, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G76") == GPUTarget::G76, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G76 r0p0") == GPUTarget::G76, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52") == GPUTarget::G52, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G52LIT") == GPUTarget::G52LIT, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G77") == GPUTarget::G77, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G57") == GPUTarget::G57, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G78") == GPUTarget::G78, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G78AE") == GPUTarget::G78, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(get_target_from_name("Mali-TODX") == GPUTarget::TODX, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(get_target_from_name("Mali-T000") == GPUTarget::MIDGARD, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G68") == GPUTarget::G68, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G78AE") == GPUTarget::G78AE, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G710") == GPUTarget::G710, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G610") == GPUTarget::G610, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G510") == GPUTarget::G510, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G310") == GPUTarget::G310, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G715") == GPUTarget::G715, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G615") == GPUTarget::G615, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G720") == GPUTarget::G720, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(get_target_from_name("Mali-G620") == GPUTarget::G620, framework::LogLevel::ERRORS); } TEST_CASE(GPUTargetIsIn, framework::DatasetMode::ALL) diff --git a/tests/validation/UNIT/SafeIntegerOps.cpp b/tests/validation/UNIT/SafeIntegerOps.cpp index 62f70414f1..13e4ef5125 100644 --- a/tests/validation/UNIT/SafeIntegerOps.cpp +++ b/tests/validation/UNIT/SafeIntegerOps.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,6 @@ */ #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/utils/math/SafeOps.h" -#include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/Utils.h" #include "tests/framework/Asserts.h" diff --git a/tests/validation/UNIT/SubTensorInfo.cpp b/tests/validation/UNIT/SubTensorInfo.cpp index 5a930620ce..ca5e46550c 100644 --- a/tests/validation/UNIT/SubTensorInfo.cpp +++ b/tests/validation/UNIT/SubTensorInfo.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -69,6 +69,7 @@ TEST_CASE(SubTensorCreation, framework::DatasetMode::ALL) * - A) Extend padding when SubTensor XY does not match parent tensor should fail * B) Extend with zero padding when SubTensor XY does not match parent tensor should succeed * - C) Extend padding when SubTensor XY matches parent tensor should succeed + * - D) Set lock padding to true, so that extend padding would fail */ TEST_CASE(SubTensorPaddingExpansion, framework::DatasetMode::ALL) { @@ -95,6 +96,14 @@ TEST_CASE(SubTensorPaddingExpansion, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(tensor_info.padding().top == 2, framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(tensor_info.padding().right == 1, framework::LogLevel::ERRORS); } + + // Test D + { + TensorInfo tensor_info(TensorShape(23U, 17U, 3U), 1, DataType::F32); + SubTensorInfo sub_tensor_info(&tensor_info, TensorShape(4U, 3U, 1U), Coordinates(5, 2, 1)); + sub_tensor_info.set_lock_paddings(true); + ARM_COMPUTE_EXPECT_THROW(sub_tensor_info.extend_padding(PaddingSize(2, 1)), framework::LogLevel::ERRORS); + } } TEST_SUITE_END() // SubTensorInfo diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp index cf9dfeabe9..b79c1e9253 100644 --- a/tests/validation/UNIT/TensorInfo.cpp +++ b/tests/validation/UNIT/TensorInfo.cpp @@ -93,7 +93,7 @@ TEST_CASE(Clone, framework::DatasetMode::ALL) // Get clone of current tensor info std::unique_ptr<ITensorInfo> info_clone = info.clone(); - ARM_COMPUTE_EXPECT(info_clone != nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(info_clone != nullptr); ARM_COMPUTE_EXPECT(info_clone->total_size() == info.total_size(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(info_clone->num_channels() == info.num_channels(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(info_clone->data_type() == info.data_type(), framework::LogLevel::ERRORS); @@ -184,8 +184,17 @@ TEST_CASE(SymmPerChannelQuantizationInfo, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(info.quantization_info().offset().empty(), framework::LogLevel::ERRORS); } -TEST_SUITE_END() // TensorInfoValidation -TEST_SUITE_END() +/** Validates lock paddings flag*/ +TEST_CASE(SubTensorPaddingExpansion, framework::DatasetMode::ALL) +{ + TensorInfo tensor_info(TensorShape(23U, 17U, 3U), 1, DataType::F32); + tensor_info.set_lock_paddings(true); + + // Now lock padding is set to true, therefore the extend padding would fail + ARM_COMPUTE_EXPECT_THROW(tensor_info.extend_padding(PaddingSize(2, 1)), framework::LogLevel::ERRORS); +} +TEST_SUITE_END() // TensorInfo +TEST_SUITE_END() // UNIT } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h index d356f05b70..289aca4d08 100644 --- a/tests/validation/Validation.h +++ b/tests/validation/Validation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,6 +45,17 @@ namespace test { namespace validation { +namespace +{ +// Compare if 2 values are both infinities and if they are "equal" (has the same sign) +template <typename T> +inline bool are_equal_infs(T val0, T val1) +{ + const auto same_sign = support::cpp11::signbit(val0) == support::cpp11::signbit(val1); + return (!support::cpp11::isfinite(val0)) && (!support::cpp11::isfinite(val1)) && same_sign; +} +} // namespace + /** Class reprensenting an absolute tolerance value. */ template <typename T> class AbsoluteTolerance @@ -140,7 +151,7 @@ bool compare_dimensions(const Dimensions<T> &dimensions1, const Dimensions<T> &d { ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN); - if(data_layout == DataLayout::NCHW) + if(data_layout != DataLayout::NHWC) { if(dimensions1.num_dimensions() != dimensions2.num_dimensions()) { @@ -157,24 +168,22 @@ bool compare_dimensions(const Dimensions<T> &dimensions1, const Dimensions<T> &d } else { - // In case a 2D shape becomes 3D after permutation, the permuted tensor will have one dimension more and the first value will be 1 - if((dimensions1.num_dimensions() != dimensions2.num_dimensions()) && ((dimensions1.num_dimensions() != (dimensions2.num_dimensions() + 1)) || (dimensions1.x() != 1))) + // In case a 1D/2D shape becomes 3D after permutation, the permuted tensor will have two/one dimension(s) more and the first (two) value(s) will be 1 + // clang-format off + const auto max_dims = std::max(dimensions1.num_dimensions(), dimensions2.num_dimensions()); + for(unsigned int i = 3; i < max_dims; ++i) { - return false; + if(dimensions1[i] != dimensions2[i]) + { + return false; + } } + // clang-format on if((dimensions1[0] != dimensions2[2]) || (dimensions1[1] != dimensions2[0]) || (dimensions1[2] != dimensions2[1])) { return false; } - - for(unsigned int i = 3; i < dimensions1.num_dimensions(); ++i) - { - if(dimensions1[i] != dimensions2[i]) - { - return false; - } - } } return true; @@ -267,16 +276,6 @@ void validate(std::vector<unsigned int> classified_labels, std::vector<unsigned template <typename T, typename U = AbsoluteTolerance<T>> bool validate(T target, T reference, U tolerance = AbsoluteTolerance<T>()); -/** Validate key points. */ -template <typename T, typename U, typename V = AbsoluteTolerance<float>> -void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance = AbsoluteTolerance<float>(), - float allowed_missing_percentage = 5.f, float allowed_mismatch_percentage = 5.f); - -/** Validate detection windows. */ -template <typename T, typename U, typename V = AbsoluteTolerance<float>> -void validate_detection_windows(T target_first, T target_last, U reference_first, U reference_last, V tolerance = AbsoluteTolerance<float>(), - float allowed_missing_percentage = 5.f, float allowed_mismatch_percentage = 5.f); - template <typename T> struct compare_base { @@ -308,9 +307,9 @@ struct compare<AbsoluteTolerance<U>> : public compare_base<AbsoluteTolerance<U>> /** Perform comparison */ operator bool() const { - if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference)) + if(are_equal_infs(this->_target, this->_reference)) { - return false; + return true; } else if(this->_target == this->_reference) { @@ -334,9 +333,9 @@ struct compare<RelativeTolerance<U>> : public compare_base<RelativeTolerance<U>> /** Perform comparison */ operator bool() const { - if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference)) + if(are_equal_infs(this->_target, this->_reference)) { - return false; + return true; } else if(this->_target == this->_reference) { @@ -506,9 +505,9 @@ void validate_wrap(const IAccessor &tensor, const SimpleTensor<T> &reference, co // check for wrapping if(!equal) { - if(!support::cpp11::isfinite(target_value) || !support::cpp11::isfinite(reference_value)) + if(are_equal_infs(target_value, reference_value)) { - equal = false; + equal = true; } else { @@ -687,210 +686,6 @@ void validate_min_max_loc(const MinMaxLocationValues<T> &target, const MinMaxLoc ARM_COMPUTE_EXPECT(same_coords != reference.max_loc.end(), framework::LogLevel::ERRORS); } } - -/** Check which keypoints from [first1, last1) are missing in [first2, last2) */ -template <typename T, typename U, typename V> -std::pair<int64_t, int64_t> compare_keypoints(T first1, T last1, U first2, U last2, V tolerance, bool check_mismatches = true) -{ - /* Keypoint (x,y) should have similar strength (within tolerance) and other properties in both reference and target */ - const auto compare_props_eq = [&](const KeyPoint & lhs, const KeyPoint & rhs) - { - return compare<V>(lhs.strength, rhs.strength, tolerance) - && lhs.tracking_status == rhs.tracking_status - && lhs.scale == rhs.scale - && lhs.orientation == rhs.orientation - && lhs.error == rhs.error; - }; - - /* Used to sort KeyPoints by coordinates (x, y) */ - const auto compare_coords_lt = [](const KeyPoint & lhs, const KeyPoint & rhs) - { - return std::tie(lhs.x, lhs.y) < std::tie(rhs.x, rhs.y); - }; - - std::sort(first1, last1, compare_coords_lt); - std::sort(first2, last2, compare_coords_lt); - - if(check_mismatches) - { - ARM_COMPUTE_TEST_INFO("Checking for mismatches: ref count = " << std::distance(first1, last1) << " target count = " << std::distance(first2, last2)); - } - - int64_t num_missing = 0; - int64_t num_mismatches = 0; - bool rest_missing = false; - - while(first1 != last1) - { - if(first2 == last2) - { - rest_missing = true; - break; - } - - if(compare_coords_lt(*first1, *first2)) - { - ++num_missing; - ARM_COMPUTE_TEST_INFO("Key point not found"); - ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1++); - framework::ARM_COMPUTE_PRINT_INFO(); - } - else - { - if(!compare_coords_lt(*first2, *first1)) // Equal coordinates - { - if(check_mismatches && !compare_props_eq(*first1, *first2)) // Check other properties - { - ++num_mismatches; - ARM_COMPUTE_TEST_INFO("Mismatching keypoint"); - ARM_COMPUTE_TEST_INFO("keypoint1 [ref] = " << *first1); - ARM_COMPUTE_TEST_INFO("keypoint2 [tgt] = " << *first2); - framework::ARM_COMPUTE_PRINT_INFO(); - } - ++first1; - } - ++first2; - } - } - - if(rest_missing) - { - while(first1 != last1) - { - ++num_missing; - ARM_COMPUTE_TEST_INFO("Key point not found"); - ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1++); - framework::ARM_COMPUTE_PRINT_INFO(); - } - } - - return std::make_pair(num_missing, num_mismatches); -} - -template <typename T, typename U, typename V> -void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance, float allowed_missing_percentage, float allowed_mismatch_percentage) -{ - if(framework::Framework::get().configure_only() && framework::Framework::get().new_fixture_call()) - { - return; - } - - const int64_t num_elements_target = std::distance(target_first, target_last); - const int64_t num_elements_reference = std::distance(reference_first, reference_last); - - int64_t num_missing = 0; - int64_t num_mismatches = 0; - - if(num_elements_reference > 0) - { - std::tie(num_missing, num_mismatches) = compare_keypoints(reference_first, reference_last, target_first, target_last, tolerance); - - const float percent_missing = static_cast<float>(num_missing) / num_elements_reference * 100.f; - const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements_reference * 100.f; - - ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) in ref are missing from target"); - ARM_COMPUTE_TEST_INFO("Missing (not in tgt): " << num_missing << "/" << num_elements_reference << " = " << std::fixed << std::setprecision(2) << percent_missing - << "% \tMax allowed: " << allowed_missing_percentage << "%"); - ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS); - - ARM_COMPUTE_TEST_INFO(num_mismatches << " keypoints (" << std::fixed << std::setprecision(2) << percent_mismatches << "%) mismatched"); - ARM_COMPUTE_TEST_INFO("Mismatched keypoints: " << num_mismatches << "/" << num_elements_reference << " = " << std::fixed << std::setprecision(2) << percent_mismatches - << "% \tMax allowed: " << allowed_mismatch_percentage << "%"); - ARM_COMPUTE_EXPECT(percent_mismatches <= allowed_mismatch_percentage, framework::LogLevel::ERRORS); - } - - if(num_elements_target > 0) - { - // Note: no need to check for mismatches a second time (last argument is 'false') - std::tie(num_missing, num_mismatches) = compare_keypoints(target_first, target_last, reference_first, reference_last, tolerance, false); - - const float percent_missing = static_cast<float>(num_missing) / num_elements_target * 100.f; - - ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) in target are missing from ref"); - ARM_COMPUTE_TEST_INFO("Missing (not in ref): " << num_missing << "/" << num_elements_target << " = " << std::fixed << std::setprecision(2) << percent_missing - << "% \tMax allowed: " << allowed_missing_percentage << "%"); - ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS); - } -} - -/** Check which detection windows from [first1, last1) are missing in [first2, last2) */ -template <typename T, typename U, typename V> -std::pair<int64_t, int64_t> compare_detection_windows(T first1, T last1, U first2, U last2, V tolerance) -{ - int64_t num_missing = 0; - int64_t num_mismatches = 0; - - while(first1 != last1) - { - const auto window = std::find_if(first2, last2, [&](DetectionWindow window) - { - return window.x == first1->x && window.y == first1->y && window.width == first1->width && window.height == first1->height && window.idx_class == first1->idx_class; - }); - - if(window == last2) - { - ++num_missing; - ARM_COMPUTE_TEST_INFO("Detection window not found " << *first1) - framework::ARM_COMPUTE_PRINT_INFO(); - } - else - { - if(!compare<V>(window->score, first1->score, tolerance)) - { - ++num_mismatches; - ARM_COMPUTE_TEST_INFO("Mismatching detection window") - ARM_COMPUTE_TEST_INFO("detection window 1= " << *first1) - ARM_COMPUTE_TEST_INFO("detection window 2= " << *window) - framework::ARM_COMPUTE_PRINT_INFO(); - } - } - - ++first1; - } - - return std::make_pair(num_missing, num_mismatches); -} - -template <typename T, typename U, typename V> -void validate_detection_windows(T target_first, T target_last, U reference_first, U reference_last, V tolerance, - float allowed_missing_percentage, float allowed_mismatch_percentage) -{ - if(framework::Framework::get().configure_only() && framework::Framework::get().new_fixture_call()) - { - return; - } - - const int64_t num_elements_target = std::distance(target_first, target_last); - const int64_t num_elements_reference = std::distance(reference_first, reference_last); - - int64_t num_missing = 0; - int64_t num_mismatches = 0; - - if(num_elements_reference > 0) - { - std::tie(num_missing, num_mismatches) = compare_detection_windows(reference_first, reference_last, target_first, target_last, tolerance); - - const float percent_missing = static_cast<float>(num_missing) / num_elements_reference * 100.f; - const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements_reference * 100.f; - - ARM_COMPUTE_TEST_INFO(num_missing << " detection windows (" << std::fixed << std::setprecision(2) << percent_missing << "%) are missing in target"); - ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS); - - ARM_COMPUTE_TEST_INFO(num_mismatches << " detection windows (" << std::fixed << std::setprecision(2) << percent_mismatches << "%) mismatched"); - ARM_COMPUTE_EXPECT(percent_mismatches <= allowed_mismatch_percentage, framework::LogLevel::ERRORS); - } - - if(num_elements_target > 0) - { - std::tie(num_missing, num_mismatches) = compare_detection_windows(target_first, target_last, reference_first, reference_last, tolerance); - - const float percent_missing = static_cast<float>(num_missing) / num_elements_target * 100.f; - - ARM_COMPUTE_TEST_INFO(num_missing << " detection windows (" << std::fixed << std::setprecision(2) << percent_missing << "%) are not part of target"); - ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS); - } -} - } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/cpu/unit/Context.cpp b/tests/validation/cpu/unit/Context.cpp index bf2a02df5d..42247ba1da 100644 --- a/tests/validation/cpu/unit/Context.cpp +++ b/tests/validation/cpu/unit/Context.cpp @@ -21,11 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" - -#include "arm_compute/Acl.hpp" +#include "tests/validation/fixtures/UNIT/ContextFixture.h" #include "src/cpu/CpuContext.h" @@ -78,92 +74,10 @@ TEST_CASE(CreateContextWithInvalidOptions, framework::DatasetMode::ALL) ARM_COMPUTE_ASSERT(ctx == nullptr); } -/** Test-case for AclDestroyContext - * - * Validate that AclDestroyContext behaves as expected when invalid inputs as context are given - * - * Test Steps: - * - Call AclDestroyContext with null context - * - Confirm that AclInvalidArgument is reported - * - Call AclDestroyContext on empty array - * - Confirm that AclInvalidArgument is reported - * - Call AclDestroyContext on an ACL object other than AclContext - * - Confirm that AclInvalidArgument is reported - * - Confirm that context is still nullptr - */ -TEST_CASE(DestroyInvalidContext, framework::DatasetMode::ALL) -{ - AclContext ctx = nullptr; - std::array<char, 256> empty_array{}; - AclContext valid_ctx = nullptr; - ARM_COMPUTE_ASSERT(AclCreateContext(&valid_ctx, AclCpu, nullptr) == AclStatus::AclSuccess); - ARM_COMPUTE_ASSERT(AclDestroyContext(ctx) == AclStatus::AclInvalidArgument); - ARM_COMPUTE_ASSERT(AclDestroyContext(reinterpret_cast<AclContext>(empty_array.data())) == AclStatus::AclInvalidArgument); - ARM_COMPUTE_ASSERT(ctx == nullptr); - ARM_COMPUTE_ASSERT(AclDestroyContext(valid_ctx) == AclStatus::AclSuccess); -} - -/** Test-case for AclCreateContext and AclDestroy Context - * - * Validate that AclCreateContext can create and destroy a context - * - * Test Steps: - * - Call AclCreateContext with valid target - * - Confirm that context is not nullptr and error code is AclSuccess - * - Destroy context - * - Confirm that AclSuccess is reported - */ -TEST_CASE(SimpleContextCApi, framework::DatasetMode::ALL) -{ - AclContext ctx = nullptr; - ARM_COMPUTE_ASSERT(AclCreateContext(&ctx, AclCpu, nullptr) == AclStatus::AclSuccess); - ARM_COMPUTE_ASSERT(ctx != nullptr); - ARM_COMPUTE_ASSERT(AclDestroyContext(ctx) == AclStatus::AclSuccess); -} - -/** Test-case for Context from the C++ interface - * - * Test Steps: - * - Create a Context obejct - * - Confirm that StatusCode::Success is reported - * - Confirm that equality operator works - * - Confirm that inequality operator works - */ -TEST_CASE(SimpleContextCppApi, framework::DatasetMode::ALL) -{ - acl::StatusCode status = acl::StatusCode::Success; - acl::Context ctx(acl::Target::Cpu, &status); - ARM_COMPUTE_ASSERT(status == acl::StatusCode::Success); - - auto ctx_eq = ctx; - ARM_COMPUTE_ASSERT(ctx_eq == ctx); - - acl::Context ctx_ienq(acl::Target::Cpu, &status); - ARM_COMPUTE_ASSERT(status == acl::StatusCode::Success); - ARM_COMPUTE_ASSERT(ctx_ienq != ctx); -} - -/** Test-case for CpuCapabilities - * - * Validate that AclCreateContext can create/destroy multiple contexts with different options - * - * Test Steps: - * - Call AclCreateContext with different targets - * - Confirm that AclSuccess is reported - * - Destroy all contexts - * - Confirm that AclSuccess is reported - */ -TEST_CASE(MultipleContexts, framework::DatasetMode::ALL) -{ - const unsigned int num_tests = 5; - std::array<AclContext, num_tests> ctxs{}; - for(unsigned int i = 0; i < num_tests; ++i) - { - ARM_COMPUTE_ASSERT(AclCreateContext(&ctxs[i], AclTarget::AclCpu, nullptr) == AclStatus::AclSuccess); - ARM_COMPUTE_ASSERT(ctxs[i] != nullptr); - ARM_COMPUTE_ASSERT(AclDestroyContext(ctxs[i]) == AclStatus::AclSuccess); - } -} +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidContext, DestroyInvalidContextFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCApi, SimpleContextCApiFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCppApi, SimpleContextCppApiFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MultipleContexts, MultipleContextsFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL) /** Test-case for CpuCapabilities * @@ -176,17 +90,17 @@ TEST_CASE(MultipleContexts, framework::DatasetMode::ALL) */ TEST_CASE(CpuCapabilities, framework::DatasetMode::ALL) { - AclContextOptions opts = acl_default_ctx_options; - opts.capabilities = AclCpuCapabilitiesDot | AclCpuCapabilitiesMmlaInt8 | AclCpuCapabilitiesSve2; - arm_compute::cpu::CpuContext ctx(&opts); + acl::Context::Options opts; + opts.copts.capabilities = AclCpuCapabilitiesDot | AclCpuCapabilitiesMmlaInt8 | AclCpuCapabilitiesSve2; + arm_compute::cpu::CpuContext ctx(&opts.copts); - ARM_COMPUTE_ASSERT(ctx.capabilities().dot == true); - ARM_COMPUTE_ASSERT(ctx.capabilities().mmla_int8 == true); - ARM_COMPUTE_ASSERT(ctx.capabilities().sve2 == true); - ARM_COMPUTE_ASSERT(ctx.capabilities().fp16 == false); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_dotprod() == true); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_i8mm() == true); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_sve2() == true); + ARM_COMPUTE_ASSERT(ctx.capabilities().cpu_info.has_fp16() == false); arm_compute::cpu::CpuContext ctx_legacy(nullptr); - ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().neon == true); + ARM_COMPUTE_ASSERT(ctx_legacy.capabilities().cpu_info.has_neon() == true); } TEST_SUITE_END() // Context diff --git a/tests/validation/reference/HOGDetector.h b/tests/validation/cpu/unit/Queue.cpp index 9809ae384f..7d977cc48e 100644 --- a/tests/validation/reference/HOGDetector.h +++ b/tests/validation/cpu/unit/Queue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,14 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_HOG_DETECTOR_H -#define ARM_COMPUTE_TEST_HOG_DETECTOR_H +#include "tests/validation/fixtures/UNIT/QueueFixture.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "tests/SimpleTensor.h" - -#include <vector> +#include "src/cpu/CpuQueue.h" namespace arm_compute { @@ -36,13 +31,18 @@ namespace test { namespace validation { -namespace reference -{ -template <typename T> -std::vector<DetectionWindow> hog_detector(const SimpleTensor<T> &src, const std::vector<T> &descriptor, unsigned int max_num_detection_windows, - const HOGInfo &hog_info, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0); -} // namespace reference +TEST_SUITE(CPU) +TEST_SUITE(UNIT) +TEST_SUITE(Queue) + +EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueueWithInvalidContext, CreateQueueWithInvalidContextFixture, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueuerWithInvalidOptions, CreateQueuerWithInvalidOptionsFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidQueue, DestroyInvalidQueueFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleQueue, SimpleQueueFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) + +TEST_SUITE_END() // Queue +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CPU } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_HOG_DETECTOR_H */ diff --git a/tests/validation/cpu/unit/Tensor.cpp b/tests/validation/cpu/unit/Tensor.cpp new file mode 100644 index 0000000000..cc0c55758f --- /dev/null +++ b/tests/validation/cpu/unit/Tensor.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "tests/validation/fixtures/UNIT/TensorFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CPU) +TEST_SUITE(UNIT) +TEST_SUITE(Tensor) + +EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidContext, CreateTensorWithInvalidContextFixture, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidDescriptor, CreateTensorWithInvalidDescriptorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensor, DestroyInvalidTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensor, SimpleTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(TensorStress, TensorStressFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MapInvalidTensor, MapInvalidTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MapNotAllocatedTensor, MapNotAllocatedTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MapAllocatedTensor, MapAllocatedTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(ImportMemory, ImportMemoryFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetSize, TensorSizeFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidSize, InvalidTensorSizeFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetDescriptor, DescriptorConversionFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidDescriptor, InvalidDescriptorConversionFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) + +TEST_SUITE_END() // Tensor +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CPU +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/HOGMultiDetection.h b/tests/validation/cpu/unit/TensorPack.cpp index 7194af70c7..f019e8e3c4 100644 --- a/tests/validation/reference/HOGMultiDetection.h +++ b/tests/validation/cpu/unit/TensorPack.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,13 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_H -#define ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_H - -#include "arm_compute/core/Types.h" -#include "tests/SimpleTensor.h" - -#include <vector> +#include "tests/validation/fixtures/UNIT/TensorPackFixture.h" namespace arm_compute { @@ -35,14 +29,19 @@ namespace test { namespace validation { -namespace reference -{ -template <typename T> -std::vector<DetectionWindow> hog_multi_detection(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value, - const std::vector<HOGInfo> &models, std::vector<std::vector<float>> descriptors, - unsigned int max_num_detection_windows, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f); -} // namespace reference +TEST_SUITE(CPU) +TEST_SUITE(UNIT) +TEST_SUITE(TensorPack) + +EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorPackWithInvalidContext, CreateTensorPackWithInvalidContextFixture, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensorPack, DestroyInvalidTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(AddInvalidObjectToTensorPack, AddInvalidObjectToTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensorPack, SimpleTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MultipleTensorsInPack, MultipleTensorsInPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL) + +TEST_SUITE_END() // Tensor +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CPU } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_HOG_MULTI_DETECTION_H */ diff --git a/tests/validation/dynamic_fusion/Utils.h b/tests/validation/dynamic_fusion/Utils.h new file mode 100644 index 0000000000..72e9ec5955 --- /dev/null +++ b/tests/validation/dynamic_fusion/Utils.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TESTS_VALIDATION_DYNAMIC_FUSION_UTILS +#define TESTS_VALIDATION_DYNAMIC_FUSION_UTILS + +#include "tests/AssetsLibrary.h" +#include "utils/Utils.h" + +#include <chrono> +#include <limits> +#include <type_traits> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace utils +{ +/** A pair of macros which measures the wall clock time, and records it into a map measurement_map with name clock_name + * + */ +#define TICK(clock_name) \ + auto clock_name##_tick = std::chrono::high_resolution_clock::now(); +#define TOCK(clock_name, measurement_map) \ + auto clock_name##_tock = std::chrono::high_resolution_clock::now(); \ + measurement_map["\"" #clock_name "\""] = duration_cast<microseconds>(clock_name##_tock - clock_name##_tick); +#define TOCK_AVG(clock_name, measurement_map, num_iterations) \ + auto clock_name##_tock = std::chrono::high_resolution_clock::now(); \ + measurement_map["\"" #clock_name "\""] = duration_cast<microseconds>((clock_name##_tock - clock_name##_tick) / (num_iterations)); + +template <typename T, typename U> +void fill(U &&tensor, int seed, AssetsLibrary *library) +{ + static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); + using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; + + DistributionType distribution{ T(-1.0f), T(1.0f) }; + library->fill(tensor, distribution, seed); + + // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) + DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; + library->fill_borders_with_garbage(tensor, distribution_inf, seed); +} +} // namespace utils +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif /* TESTS_VALIDATION_DYNAMIC_FUSION_UTILS */ diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp new file mode 100644 index 0000000000..453983c077 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -0,0 +1,642 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Macros.h" +#include "tests/validation/dynamic_fusion/Utils.h" +#include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/DepthConvertLayer.h" +#include "tests/validation/reference/DepthwiseConvolutionLayer.h" +#include "tests/validation/reference/ElementwiseOperations.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; +using namespace arm_compute::test::validation::utils; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(INTEGRATION) +TEST_SUITE(DYNAMIC_FUSION) + +TEST_CASE(Conv2d, framework::DatasetMode::ALL) +{ + /* Computation: + * out = conv2d1x1(direct_conv)(input, weights, bias) + */ + CLScheduler::get().default_reinit(); + + const auto data_type = DataType::F32; + const auto data_layout = DataLayout::NHWC; + const auto t_input_shape = TensorShape(384, 12, 12); + const auto t_weight_shape = TensorShape(384, 1, 1, 16); + const auto t_dst_shape = TensorShape(16, 12, 12); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Fuse conv2d + Conv2dAttributes conv2d_attr{}; + ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); + + ITensorInfo *conv_out_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr); + + ITensorInfo *dst_info = context.create_tensor_info(); + GpuOutput::create_op(sketch, conv_out_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + // Instead of using ACL allocated memory, the user can choose to import memory into the tensors + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + // auto buf = cl::Buffer(); + // tensor->allocator()->import_memory(buf); // Or, import external memory + } + + // Construct user tensors + CLTensor t_input{}; + CLTensor t_weight{}; + CLTensor t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_dst.allocator()->allocate(); + fill<float>(CLAccessor(t_input), 0, library.get()); + fill<float>(CLAccessor(t_weight), 1, library.get()); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_dst}); + + // Create reference + SimpleTensor<float> ref_t_input{t_input_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC}; + SimpleTensor<float> ref_t_weight{t_weight_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC}; + SimpleTensor<float> ref_t_bias_placeholder{t_dst_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC}; + + // Fill reference + fill<float>(ref_t_input, 0, library.get()); + fill<float>(ref_t_weight, 1, library.get()); + + auto ref_t_input_nchw = reference::permute(ref_t_input, PermutationVector(1U, 2U, 0U)); + auto ref_t_weight_nchw = reference::permute(ref_t_weight, PermutationVector(1U, 2U, 0U)); + auto ref_t_bias_placeholder_nchw = reference::permute(ref_t_bias_placeholder, PermutationVector(1U, 2U, 0U)); + auto t_dst_shape_nchw = t_dst_shape; + permute(t_dst_shape_nchw, PermutationVector(1U, 2U, 0U)); + + PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, + conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto ref_t_dst_nchw = reference::convolution_layer(ref_t_input_nchw, ref_t_weight_nchw, ref_t_bias_placeholder_nchw, + t_dst_shape_nchw, legacy_pad_stride, conv2d_attr.dilation()); + const auto ref_t_dst = reference::permute(ref_t_dst_nchw, PermutationVector(2U, 0U, 1U)); + + RelativeTolerance<float> tolerance_f32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); +} + +TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) +{ + /* Computation: + * out_0 = in_0 + in_1 + * out_1 = out_0 + in_2 + */ + CLScheduler::get().default_reinit(); + + const auto data_type = DataType::F32; + const auto t_input_shape = TensorShape(33, 3, 2); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); + + ITensorInfo *out_0_info = context.create_tensor_info(); + ITensorInfo *out_1_info = context.create_tensor_info(); + + ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info); + GpuOutput::create_op(sketch, ans_0_info, out_0_info); + ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info); + GpuOutput::create_op(sketch, ans_1_info, out_1_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + // Instead of using ACL allocated memory, the user can choose to import memory into the tensors + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + // auto buf = cl::Buffer(); + // tensor->allocator()->import_memory(buf); // Or, import external memory + } + + // Construct user tensors + CLTensor t_in_0{}; + CLTensor t_in_1{}; + CLTensor t_in_2{}; + + CLTensor t_out_0{}; + CLTensor t_out_1{}; + + // Initialize user tensors + t_in_0.allocator()->init(*in_0_info); + t_in_1.allocator()->init(*in_1_info); + t_in_2.allocator()->init(*in_2_info); + + t_out_0.allocator()->init(*out_0_info); + t_out_1.allocator()->init(*out_1_info); + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_in_0.allocator()->allocate(); + t_in_1.allocator()->allocate(); + t_in_2.allocator()->allocate(); + + t_out_0.allocator()->allocate(); + t_out_1.allocator()->allocate(); + + fill<float>(CLAccessor(t_in_0), 0, library.get()); + fill<float>(CLAccessor(t_in_1), 1, library.get()); + fill<float>(CLAccessor(t_in_2), 2, library.get()); + + // Run runtime + runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1}); + + // Create reference + SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()}; + + SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_out_1{t_input_shape, data_type, 1, QuantizationInfo()}; + + // Fill reference + fill<float>(ref_t_in_0, 0, library.get()); + fill<float>(ref_t_in_1, 1, library.get()); + fill<float>(ref_t_in_2, 2, library.get()); + + reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP); + reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_out_1, + ConvertPolicy::WRAP); + + RelativeTolerance<float> tolerance_f32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_f32); + validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_f32); +} +TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) +{ + /* Computation: + * out_0 = in_0 + in_1 + * out_1 = float(int32_t(out_0 + in_2)) + */ + CLScheduler::get().default_reinit(); + + const auto data_type = DataType::F32; + const auto t_input_shape = TensorShape(3, 8, 5); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + ITensorInfo *in_0_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_1_info = context.create_tensor_info(t_input_shape, 1, data_type); + ITensorInfo *in_2_info = context.create_tensor_info(t_input_shape, 1, data_type); + + ITensorInfo *out_0_info = context.create_tensor_info(); + ITensorInfo *out_1_info = context.create_tensor_info(); + + CastAttributes cast_0_attr; + cast_0_attr.data_type(DataType::F16); + + CastAttributes cast_1_attr; + cast_1_attr.data_type(DataType::F32); + + ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info); + GpuOutput::create_op(sketch, ans_0_info, out_0_info); + ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, in_2_info); + ITensorInfo *ans_2_info = GpuCast::create_op(sketch, ans_1_info, cast_0_attr); + ITensorInfo *ans_3_info = GpuCast::create_op(sketch, ans_2_info, cast_1_attr); + GpuOutput::create_op(sketch, ans_3_info, out_1_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + // Instead of using ACL allocated memory, the user can choose to import memory into the tensors + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + // auto buf = cl::Buffer(); + // tensor->allocator()->import_memory(buf); // Or, import external memory + } + + // Construct user tensors + CLTensor t_in_0{}; + CLTensor t_in_1{}; + CLTensor t_in_2{}; + + CLTensor t_out_0{}; + CLTensor t_out_1{}; + + // Initialize user tensors + t_in_0.allocator()->init(*in_0_info); + t_in_1.allocator()->init(*in_1_info); + t_in_2.allocator()->init(*in_2_info); + + t_out_0.allocator()->init(*out_0_info); + t_out_1.allocator()->init(*out_1_info); + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_in_0.allocator()->allocate(); + t_in_1.allocator()->allocate(); + t_in_2.allocator()->allocate(); + + t_out_0.allocator()->allocate(); + t_out_1.allocator()->allocate(); + + fill<float>(CLAccessor(t_in_0), 0, library.get()); + fill<float>(CLAccessor(t_in_1), 1, library.get()); + fill<float>(CLAccessor(t_in_2), 2, library.get()); + + // Run runtime + runtime.run({&t_in_0, &t_in_1, &t_in_2, &t_out_0, &t_out_1}); + + // Create reference + SimpleTensor<float> ref_t_in_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_1{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_in_2{t_input_shape, data_type, 1, QuantizationInfo()}; + + SimpleTensor<float> ref_t_out_0{t_input_shape, data_type, 1, QuantizationInfo()}; + SimpleTensor<float> ref_t_ans_1{t_input_shape, data_type, 1, QuantizationInfo()}; + + // Fill reference + fill<float>(ref_t_in_0, 0, library.get()); + fill<float>(ref_t_in_1, 1, library.get()); + fill<float>(ref_t_in_2, 2, library.get()); + + reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_in_0, ref_t_in_1, ref_t_out_0, ConvertPolicy::WRAP); + reference::arithmetic_operation(ArithmeticOperation::ADD, ref_t_out_0, ref_t_in_2, ref_t_ans_1, + ConvertPolicy::WRAP); + const auto ref_t_ans_2 = + reference::depth_convert<float, int32_t>(ref_t_ans_1, DataType::S32, ConvertPolicy::SATURATE, 0); + const auto ref_t_out_1 = + reference::depth_convert<int32_t, float>(ref_t_ans_2, DataType::F32, ConvertPolicy::SATURATE, 0); + + RelativeTolerance<float> tolerance_add_f32(0.001f); + AbsoluteTolerance<float> tolerance_cast_f32(1.0f); + validate(CLAccessor(t_out_0), ref_t_out_0, tolerance_add_f32); + validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_cast_f32); +} + +/// TODO: COMPMID-6593 : This integration test fails with CKW backend. +/// It was not enabled for CKW before, therefore went unnoticed. +TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::DISABLED) +{ + // (tensor0) + // | + // ======|============================================== Sketch 0 + // | (tensor1) +---- (tensor2) + // | | | | + // +-- input -- weights -- biases --+ | + // | | | + // | Conv2d | | + // | | | + // +----------- output -------------+ | + // | | + // +-- input ---+ | + // | | | + // | Sigmoid | | + // | | | + // +-- output --+ | + // | | + // +-- input ---+ | + // | | | + // | Output | | + // | | | + // +-- output --+ | + // | | + // (tensor5) | + // | | + // +--------+ | + // ======|=============================|================ Sketch 1 + // | (tensor3) (tensor4) | + // | | | | + // +-- input -- weights -- biases --+ | + // | | | + // | DepthwiseConv2d | | + // | | | + // +----------- output -------------+ | + // | | + // +--+ +----------------+ + // | | + // +-- lhs -- rhs --+ + // | | + // | Multiply | + // | | + // +---- output ----+ + // | + // +-- input ---+ + // | | + // | Output | + // | | + // +-- output --+ + // | + // (tensor6) + + TensorShape conv2d_src_shape(10, 20, 30); + TensorShape conv2d_wei_shape(10, 3, 3, 5); + TensorShape conv2d_bia_shape(5); + TensorShape conv2d_dst_shape(5, 18, 28); + TensorShape dwc_wei_shape(5, 3, 3); + TensorShape dwc_bia_shape(5); + TensorShape dwc_dst_shape(5, 16, 26); + + // Initialize the context. + CLScheduler::get().default_reinit(); + + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context(&cl_compile_ctx); + + auto tensor0_info = context.create_tensor_info(conv2d_src_shape, 1, DataType::F32, DataLayout::NHWC); + + // Create the first sketch: conv2d + cast + output. + GpuWorkloadSketch sketch0(&context); + + Conv2dAttributes conv2d_attr; + auto tensor1_info = context.create_tensor_info(conv2d_wei_shape, 1, DataType::F32, DataLayout::NHWC); + auto tensor2_info = context.create_tensor_info(conv2d_bia_shape, 1, DataType::F32, DataLayout::NHWC); + ARM_COMPUTE_EXPECT(GpuConv2d::validate_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr), + framework::LogLevel::ERRORS); + auto ans_info = GpuConv2d::create_op(sketch0, tensor0_info, tensor1_info, tensor2_info, conv2d_attr); + + ARM_COMPUTE_EXPECT(GpuSigmoid::validate_op(sketch0, ans_info), framework::LogLevel::ERRORS); + ans_info = GpuSigmoid::create_op(sketch0, ans_info); + + DepthwiseConv2dAttributes dwc_attr; + auto tensor3_info = context.create_tensor_info(dwc_wei_shape, 1, DataType::F32, DataLayout::NHWC); + auto tensor4_info = context.create_tensor_info(dwc_bia_shape, 1, DataType::F32, DataLayout::NHWC); + ARM_COMPUTE_EXPECT(!GpuDepthwiseConv2d::validate_op(sketch0, ans_info, tensor3_info, tensor4_info, dwc_attr), + framework::LogLevel::ERRORS); + + auto tensor5_info = context.create_tensor_info(); + ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch0, ans_info, tensor5_info), framework::LogLevel::ERRORS); + GpuOutput::create_op(sketch0, ans_info, tensor5_info); + + // Create the first workload runtime. + ClWorkloadRuntime runtime0; + runtime0.configure(sketch0); + + // Create the second sketch: dwc + sigmoid + output. + GpuWorkloadSketch sketch1(&context); + + ARM_COMPUTE_EXPECT(GpuDepthwiseConv2d::validate_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr), + framework::LogLevel::ERRORS); + ans_info = GpuDepthwiseConv2d::create_op(sketch1, tensor5_info, tensor3_info, tensor4_info, dwc_attr); + + ARM_COMPUTE_EXPECT(GpuMul::validate_op(sketch1, ans_info, tensor2_info), framework::LogLevel::ERRORS); + ans_info = GpuMul::create_op(sketch1, ans_info, tensor2_info); + + auto tensor6_info = context.create_tensor_info(); + ARM_COMPUTE_EXPECT(GpuOutput::validate_op(sketch1, ans_info, tensor6_info), framework::LogLevel::ERRORS); + GpuOutput::create_op(sketch1, ans_info, tensor6_info); + + // Create the second workload runtime. + ClWorkloadRuntime runtime1; + runtime1.configure(sketch1); + + // Create the user tensors. + CLTensor tensor0; + CLTensor tensor1; + CLTensor tensor2; + CLTensor tensor3; + CLTensor tensor4; + CLTensor tensor5; + CLTensor tensor6; + + tensor0.allocator()->init(*tensor0_info); + tensor1.allocator()->init(*tensor1_info); + tensor2.allocator()->init(*tensor2_info); + tensor3.allocator()->init(*tensor3_info); + tensor4.allocator()->init(*tensor4_info); + tensor5.allocator()->init(*tensor5_info); + tensor6.allocator()->init(*tensor6_info); + + tensor0.allocator()->allocate(); + tensor1.allocator()->allocate(); + tensor2.allocator()->allocate(); + tensor3.allocator()->allocate(); + tensor4.allocator()->allocate(); + tensor5.allocator()->allocate(); + tensor6.allocator()->allocate(); + + // Allocate the auxiliary tensors. + for (auto &data : runtime0.get_auxiliary_tensors()) + { + auto tensor = std::get<0>(data); + auto &tensor_info = std::get<1>(data); + auto mem_req = std::get<2>(data); + + tensor->allocator()->init(tensor_info, mem_req.alignment); + tensor->allocator()->allocate(); + } + + for (auto &data : runtime1.get_auxiliary_tensors()) + { + auto tensor = std::get<0>(data); + auto &tensor_info = std::get<1>(data); + auto mem_req = std::get<2>(data); + + tensor->allocator()->init(tensor_info, mem_req.alignment); + tensor->allocator()->allocate(); + } + + // Fill the input tensors with random data. + fill<float>(CLAccessor(tensor0), 0, library.get()); + fill<float>(CLAccessor(tensor1), 1, library.get()); + fill<float>(CLAccessor(tensor2), 2, library.get()); + fill<float>(CLAccessor(tensor3), 3, library.get()); + fill<float>(CLAccessor(tensor4), 4, library.get()); + + // Run each runtime. + runtime0.run({&tensor0, &tensor1, &tensor2, &tensor5}); + runtime1.run({&tensor5, &tensor3, &tensor4, &tensor2, &tensor6}); + + // Compute the reference result. + SimpleTensor<float> ref_conv2d_src(conv2d_src_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_conv2d_wei(conv2d_wei_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_conv2d_bia(conv2d_bia_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_dwc_wei(dwc_wei_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + SimpleTensor<float> ref_dwc_bia(dwc_bia_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC); + + fill<float>(ref_conv2d_src, 0, library.get()); + fill<float>(ref_conv2d_wei, 1, library.get()); + fill<float>(ref_conv2d_bia, 2, library.get()); + fill<float>(ref_dwc_wei, 3, library.get()); + fill<float>(ref_dwc_bia, 4, library.get()); + + PermutationVector nhwc_to_nchw(1, 2, 0); + + auto conv2d_dst_shape_nchw = conv2d_dst_shape; + permute(conv2d_dst_shape_nchw, nhwc_to_nchw); + const auto ref_conv2d_src_nchw = reference::permute(ref_conv2d_src, nhwc_to_nchw); + const auto ref_conv2d_wei_nchw = reference::permute(ref_conv2d_wei, nhwc_to_nchw); + const auto ref_conv2d_bia_nchw = reference::permute(ref_conv2d_bia, nhwc_to_nchw); + const auto ref_conv2d_dst_nchw = reference::convolution_layer( + ref_conv2d_src_nchw, ref_conv2d_wei_nchw, ref_conv2d_bia_nchw, conv2d_dst_shape_nchw, PadStrideInfo()); + + const auto ref_sigmoid_dst_nchw = reference::activation_layer( + ref_conv2d_dst_nchw, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + + auto dwc_dst_shape_nchw = dwc_dst_shape; + permute(dwc_dst_shape_nchw, nhwc_to_nchw); + const auto ref_dwc_wei_nchw = reference::permute(ref_dwc_wei, nhwc_to_nchw); + const auto ref_dwc_bia_nchw = reference::permute(ref_dwc_bia, nhwc_to_nchw); + const auto ref_dwc_dst_nchw = reference::depthwise_convolution( + ref_sigmoid_dst_nchw, ref_dwc_wei_nchw, ref_dwc_bia_nchw, dwc_dst_shape_nchw, PadStrideInfo(), 1); + + const auto ref_mul_dst_nchw = reference::pixel_wise_multiplication<float, float, float>( + ref_dwc_dst_nchw, ref_conv2d_bia_nchw, 1.0, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, + DataType::F32); + + constexpr RelativeTolerance<float> tolerance(0.001f); + validate(CLAccessor(tensor6), ref_mul_dst_nchw, tolerance); +} + +TEST_SUITE(Invalid_Fusion_Should_Fail) +TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) +{ + /* Computation: + * out = conv2d(conv2d(l0_input, l0_weight), l1_weight) + */ + CLScheduler::get().default_reinit(); + + const auto data_type = DataType::F32; + const auto data_layout = DataLayout::NHWC; + const auto t_input_shape = TensorShape(384, 12, 12); + const auto t_weight_shape = TensorShape(384, 1, 1, 16); + auto t_input_info = TensorInfo(t_input_shape, 1, data_type, data_layout); + auto t_weight_info = TensorInfo(t_weight_shape, 1, data_type, data_layout); + auto t_dst_info = TensorInfo(); + + Conv2dAttributes conv2d_attr{}; + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create tensor infos + ITensorInfo *input_info = context.create_tensor_info(t_input_shape, 1, data_type, data_layout); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(t_weight_shape, 1, data_type, data_layout)); + ITensorInfo *dst_info; + + // Fuse conv2d into the workload + { + // Validate operator + const Status success = GpuConv2d::validate_op(sketch, input_info, weight_info, nullptr, conv2d_attr); + ARM_COMPUTE_EXPECT(bool(success), framework::LogLevel::ERRORS); + + dst_info = GpuConv2d::create_op(sketch, input_info, weight_info, nullptr, conv2d_attr); + } + + // Create tensor infos + ITensorInfo *weight_info_2 = context.create_tensor_info(t_weight_info); + + // Fuse conv2d into the workload + { + // Validate operator, should fail + const Status success = GpuConv2d::validate_op(sketch, dst_info, weight_info_2, nullptr, conv2d_attr); + const auto expected_error_str = "Operator fusion test failed. This operator cannot be fused into the workload"; + + ARM_COMPUTE_EXPECT(!bool(success), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT((success.error_description().find(expected_error_str) != std::string::npos), + framework::LogLevel::ERRORS); + } +} +TEST_SUITE_END() // Invalid_Fusion_Should_Fail +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // INTEGRATION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp new file mode 100644 index 0000000000..9bfdc961fe --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* Synced with tests/validation/CL/ArithmeticAddition.cpp from the standard interface. + * + * Difference | Why the difference + * No quantized tests | Not supported yet + * No in place tests | Not supported yet + * No activation tests | Not needed in dynamic fusion interface + * + */ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(ADD) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Add + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed + TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Broadcast Z dimension is not allowed + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed + }), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32), + TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, false, true, true, false, true, false, false, true, false, false, true})), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Validate Elementwise Add + auto lhs_info = context.create_tensor_info(input1_info); + auto rhs_info = context.create_tensor_info(input2_info); + + bool res = bool(GpuAdd::validate_op(sketch, lhs_info, rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +constexpr AbsoluteTolerance<float> tolerance_f( + 0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 and DataType::F16 */ +constexpr float tolerance_num = 0.0001f; /**< Tolerance number */ + +template <typename T> +using DynamicFusionCLAddFixture = + DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; + +template <typename T> +using DynamicFusionCLAddBroadcastFixture = + DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; + +template <typename T> +using DynamicFusionCLAddTwoOpsFixture = + DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLAddFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f); +} +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, + DynamicFusionCLAddFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::LargeShapes()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f); +} +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLAddBroadcastFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLAddBroadcastFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::TemporaryLimitedLargeShapesBroadcast()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f); +} +FIXTURE_DATA_TEST_CASE( + RunSmallTwoOps, + DynamicFusionCLAddTwoOpsFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false})), + framework::dataset::make("FuseTwoOps", {true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLAddFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLAddBroadcastFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLAddFixture<int32_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::S32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLAddFixture<int16_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::S16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionCLAddFixture<int16_t>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::LargeShapes()), + framework::dataset::make("DataType", {DataType::S16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLAddFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::ADD}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::U8})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE_END() // ADD +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp b/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp new file mode 100644 index 0000000000..4ef359e74d --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ConvertPolicyDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +// Tolerance +constexpr AbsoluteTolerance<float> zero_tolerance(0); + +/** Input data sets **/ + +// F16 +const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); + +// F32 +const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); + +class DFConvertPolicies final : public framework::dataset::ContainerDataset<std::vector<ConvertPolicy>> +{ +public: + DFConvertPolicies() + : ContainerDataset("ConvertPolicy", + { + ConvertPolicy::WRAP + }) + { + } +}; +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(CAST) + +template <typename T> +using DynamicFusionCLCastToF16Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, half>; +template <typename T> +using DynamicFusionCLCastToF32Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, float>; + +#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \ + TEST_SUITE(NAME) \ + FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \ + DFConvertPolicies())) \ + { \ + validate(CLAccessor(_target), _reference, tolerance); \ + } \ + TEST_SUITE_END() + +// F16 +CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, DynamicFusionCLCastToF32Fixture<half>, CastF16toF32Dataset, zero_tolerance) + +// F32 +CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, DynamicFusionCLCastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance) + +TEST_SUITE_END() // CAST +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp new file mode 100644 index 0000000000..cef8b87c3f --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr float epsilon = 1e-6f; +constexpr AbsoluteTolerance<float> tolerance(epsilon); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(CLAMP) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Minimum value larger than maximum value + }), + framework::dataset::make("MinVal", { 0.2f, + 1.5f, + 9.0f, + })), + framework::dataset::make("MaxVal", { 0.5f, + 2.0f, + 1.0f, + })), + framework::dataset::make("Expected", { true, true, false })), + input_info, min_val, max_val, expected) +{ + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Fuse Clamp + const ITensorInfo* src_info = context.create_tensor_info(input_info); + + ClampAttributes attributes {}; + attributes.min_val(min_val) + .max_val(max_val); + + const bool res = static_cast<bool>(GpuClamp::validate_op(sketch, src_info, attributes)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionClampOpFixture = DynamicFusionClampValidationFixture<CLTensor, CLAccessor, GpuClamp, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionClampOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make( + "ClampAttributes", {ClampAttributes().min_val(0.1f).max_val(0.6f)})), + framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance); +} + +FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp, + DynamicFusionClampOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::Small5dShapes(), + framework::dataset::make( + "ClampAttributes", {ClampAttributes().min_val(0.1f).max_val(0.6f)})), + framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation."); + framework::ARM_COMPUTE_PRINT_INFO(); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionClampOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make( + "ClampAttributes", {ClampAttributes().min_val(0.2f).max_val(0.4f)})), + framework::dataset::make("Fuse", {true})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionClampOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make( + "ClampAttributes", {ClampAttributes().min_val(0.3f).max_val(0.7f)})), + framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance); +} + +FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp, + DynamicFusionClampOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::Small5dShapes(), + framework::dataset::make( + "ClampAttributes", {ClampAttributes().min_val(0.3f).max_val(0.7f)})), + framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation."); + framework::ARM_COMPUTE_PRINT_INFO(); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionClampOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make( + "ClampAttributes", {ClampAttributes().min_val(0.1f).max_val(0.9f)})), + framework::dataset::make("Fuse", {true})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance); +} + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // CLAMP +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp new file mode 100644 index 0000000000..2f8c639cea --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/DepthwiseConvolutionLayerDataset.h" +#include "tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +const auto depth_multipliers = framework::dataset::make("DepthMultiplier", {1U, 4U}); +const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", {1, 2, 5, 8}); + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(DEPTHWISE_CONV2D) + +RelativeTolerance<float> tolerance_f32( + 0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance<half_float::half> tolerance_f16(half_float::half( + 0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float tolerance_num = 0.02f; /**< Tolerance number */ + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( // Explanations of failing tests + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type input/weights + TensorInfo(TensorShape(3U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching input feature maps + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching depth multiplier + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases size + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases dimensions + TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // dilation < 1 + TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U32, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S32, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), // weight dimension > 3 + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + }), + framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S8, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S16, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::U32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U, 5U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U, 4U, 3U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NCHW), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("Padding", { Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(0, 0, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(1, 1, 0, 0), + Padding2D(2, 1, 2, 1), + Padding2D(2, 1, 2, 1), + Padding2D(2, 1, 2, 1), + })), + framework::dataset::make("Stride", { Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(1, 1), + Size2D(2, 3), + Size2D(2, 3), + })), + framework::dataset::make("DepthMultiplier", { 1, + 1, + 3, + 1, + 1, + 2, + 2, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + })), + framework::dataset::make("Dilation", { Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(0U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(2U, 3U), + })), + framework::dataset::make("Expected", { false, false, false, false, false, false, true, false, + false, false, false, false, false, false, false, false, false, false, + false, false, true, false, true, true, true })), + input_info, weights_info, biases_info, padding, stride, depth_multiplier, dilation, expected) +{ + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + const ITensorInfo* sketch_input_info = context.create_tensor_info(input_info); + const ITensorInfo* sketch_weights_info = context.create_tensor_info(weights_info); + const ITensorInfo* sketch_biases_info = context.create_tensor_info(biases_info); + + DepthwiseConv2dAttributes attributes {}; + attributes.pad(padding) + .stride(stride) + .dilation(dilation) + .depth_multiplier(depth_multiplier); + + const Status status = GpuDepthwiseConv2d::validate_op(sketch, sketch_input_info, sketch_weights_info, sketch_biases_info, attributes); + const bool res = bool(status); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionGpuDepthwiseConv2dFixture = + DynamicFusionGpuDepthwiseConv2dValidationFixture<CLTensor, CLAccessor, GpuDepthwiseConv2d, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +TEST_SUITE(Dilation) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +TEST_SUITE(Dilation) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // Generic +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(Dilation) + +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // W3x3 + +TEST_SUITE(Generic) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeKernelSize, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(), + framework::dataset::make("DepthMultiplier", {1})), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(Dilation) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuDepthwiseConv2dFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + large_depth_multipliers), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", {DataLayout::NHWC}))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // Dilation +TEST_SUITE_END() // Generic +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float +TEST_SUITE_END() // DEPTHWISE_CONV2D +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp new file mode 100644 index 0000000000..b843764786 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tests/AssetsLibrary.h" +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/SmallConvolutionLayerDataset.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerances from tests/validation/CL/DirectConvolutionLayer.cpp + */ +RelativeTolerance<float> tolerance_f32( + 0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance<half_float::half> tolerance_f16(half_float::half( + 0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +/** Synced with tests/validation/CL/ConvolutionLayer.cpp + * + * Difference | Why the difference + * f32 tolerance here is smaller | To use the same tolerance as that of DirectConv2d; lowering tolerance is safe + * No quantized tests | Not supported yet + * No grouped CNN tests | Not supported yet + * No mixed layout tests | Not needed; only NHWC is supported + * No activation | Not needed in fusion + * No ValidateConvolutionMethod | Only a single method (direct conv2d) is supported + * No ReshapeWeights = true tests | Not applicable yet. This parameter only concerns gemm-based conv2d + * No RunSmallWithPadding tests | Padding is removed + * + */ +TEST_SUITE(CONV2D) + +template <typename T> +using DynamicFusionGpuConv2dFixture = DynamicFusionGpuConv2dValidationFixture<CLTensor, CLAccessor, GpuConv2d, T>; +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuConv2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", {DataLayout::NHWC})), + framework::dataset::make("QuantizationInfo", QuantizationInfo()))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuConv2dFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(datasets::SmallConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", {DataLayout::NHWC})), + framework::dataset::make("QuantizationInfo", QuantizationInfo()))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // FP16 + +// Tests for specific conv2d methods +/** Synced with tests/validation/CL/DirectConvolutionLayer.cpp + * + * Difference | Why the difference + * No quantized tests | Not supported yet + * No Invalid output size test | Not applicable. Output is removed from the interface + * No mixed layout/NCHW tests | Not needed; only NHWC is supported + * No activation tests | Not needed in fusion + */ +TEST_SUITE(DIRECT_CONV2D) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid: Mismatching data type input/weights + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid: Mismatching input feature maps + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid weights dimensions + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Unsupported biases size + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Unsupported biases dimensions + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout: NCHW + TensorInfo(TensorShape(2U, 32U, 16U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type: quantized + TensorInfo(TensorShape(2U, 32U, 16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Arbitrary weight sizes for NHWC are supported + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Non-rectangular weights dimensions for NHWC are supported + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Strides > 2 for any kernel sizes for NHWC are supported + }), + framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(2U, 1U, 1U, 4U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 13U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 5U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(25U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(4U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("Conv2dAttributes", { + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({3, 3}).pad({0, 0, 0, 0}), + })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false, true, true, true, true })), + input_info, weights_info, biases_info, conv2d_attrs, expected) +{ + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + const ITensorInfo* sketch_input_info = context.create_tensor_info(input_info); + const ITensorInfo* sketch_weights_info = context.create_tensor_info(weights_info); + const ITensorInfo* sketch_biases_info = context.create_tensor_info(biases_info); + bool is_valid = bool(GpuConv2d::validate_op(sketch, sketch_input_info, sketch_weights_info, sketch_biases_info, conv2d_attrs)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +template <typename T> +using DynamicFusionGpuDirectConv2dFixture = DynamicFusionDirectConv2dValidationFixture<CLTensor, CLAccessor, GpuConv2d, T>; + +TEST_SUITE(FP16) +/// TODO: COMPMID-6877: Once the issue in Conv2d is resolved, re-enable these +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDirectConv2dFixture<half>, framework::DatasetMode::DISABLED, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDirectConv2dFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +/// TODO: COMPMID-6877: Once the issue in Conv2d is resolved, re-enable these +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDirectConv2dFixture<float>, framework::DatasetMode::DISABLED, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32, 0.0, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDirectConv2dFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32, 0.0, abs_tolerance_f32); +} +// clang-format on +// *INDENT-ON* + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // DIRECT_CONV2D +TEST_SUITE_END() // CONV2D +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp new file mode 100644 index 0000000000..82d66ca6ce --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tests/AssetsLibrary.h" +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/LargeMatMulDataset.h" +#include "tests/datasets/MatMulDataset.h" +#include "tests/datasets/SmallMatMulDataset.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/Validation.h" + +#include <tuple> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance<float> tolerance_f32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +constexpr float abs_tolerance_f32( + 0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */ +constexpr float abs_tolerance_f16( + 0.001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16 data types in case using relative tolerance fails because of small values */ +RelativeTolerance<half_float::half> tolerance_f16(half( + 0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ +} // namespace + +/** M0 values to test - precommit */ +const auto m0_values_lhs_nt_precommit = framework::dataset::make("M0", {1, 2, 3}); + +/** N0 values to test - precommit */ +const auto n0_values_rhs_t_precommit = framework::dataset::make("N0", {1, 2, 4}); + +/** K0 values to test - precommit */ +const auto k0_values_rhs_t_precommit = framework::dataset::make("K0", {1, 2, 4}); + +/** M0 values to test - nightly */ +const auto m0_values_lhs_nt_nightly = framework::dataset::make("M0", {1, 2, 3, 4}); + +/** N0 values to test - nightly */ +const auto n0_values_rhs_t_nightly = framework::dataset::make("N0", {1, 2, 3, 4, 8}); + +/** K0 values to test - nightly */ +const auto k0_values_rhs_t_nightly = framework::dataset::make("K0", {1, 2, 3, 4, 8}); + +class DFMatMulDataset final : public datasets::MatMulDataset +{ +public: + DFMatMulDataset() + { + // LHS = [K, M], RHS = [N, K], DST = [N, M] + add_config(TensorShape(1U, 1U), TensorShape(1U, 1U), TensorShape(1U, 1U)); + add_config(TensorShape(1U, 2U), TensorShape(2U, 1U), TensorShape(2U, 2U)); + add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U)); + add_config(TensorShape(32U, 37U), TensorShape(17U, 32U), TensorShape(17U, 37U)); + } +}; + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) + +TEST_SUITE(MatMul) + +TEST_SUITE(Validate) +TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL) +{ + using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>; + + const std::vector<MatMulConfigurationPair> supported_block_sizes = { + // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false) + + // Lhs not-transposed, Rhs transposed + {MatMulKernelInfo(false, true, 0, 1, 1), false}, // M0 should be > 0 + {MatMulKernelInfo(false, true, 3, 11, 1), false}, // N0 not in {1, 2, 3, 4, 8, 16} + {MatMulKernelInfo(false, true, 3, 7, 1), false}, // N0 not in {1, 2, 3, 4, 8, 16} + {MatMulKernelInfo(false, true, 3, 3, 12), false}, // K0 not in {1, 2, 3, 4, 8, 16} + {MatMulKernelInfo(false, true, 3, 3, 6), false}, // K0 not in {1, 2, 3, 4, 8, 16} + {MatMulKernelInfo(false, true, 5, 1, 2), true}, {MatMulKernelInfo(false, true, 3, 3, 3), true}, + {MatMulKernelInfo(false, true, 2, 4, 8), true}, + + }; + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal + // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here, + // not the shapes themselves. + const ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(TensorShape(100U, 100U), 1, DataType::F32)); + const ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(TensorShape(100U, 100U), 1, DataType::F32)); + + for (auto &pair : supported_block_sizes) + { + MatMulAttributes matmul_attr{}; + matmul_attr.adj_lhs(pair.first.adj_lhs); + matmul_attr.adj_rhs(pair.first.adj_rhs); + + GpuMatMulSettings matmul_settings{}; + matmul_settings.m0(pair.first.m0); + matmul_settings.n0(pair.first.n0); + matmul_settings.k0(pair.first.k0); + + Status status = GpuMatMul::validate_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS); + } +} + +TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL) +{ + // Create a sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool>; + const std::vector<ShapeConfigurationTuple> shape_configurations = { + {TensorShape(5U, 1U), TensorShape(3U, 5U), true}, + {TensorShape(10U, 12U), TensorShape(3U, 10U), true}, + {TensorShape(8U, 4U), TensorShape(2U, 8U), true}, + {TensorShape(8U, 4U), TensorShape(2U, 5U), false}, // Mismatch in the K dimension + {TensorShape(5U, 0U), TensorShape(2U, 5U), false}, // Invalid dimension + {TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), true}, + {TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), false}, // no batch broadcasting + {TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), + false}, // mismatch in batch dimension + }; + + for (auto &tuple : shape_configurations) + { + const bool expected = std::get<2>(tuple); + + for (bool adj_lhs : {false}) + { + for (bool adj_rhs : {true}) + { + TensorShape lhs_shape = std::get<0>(tuple); + TensorShape rhs_shape = std::get<1>(tuple); + + if (adj_lhs) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + + if (adj_rhs) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + const ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(lhs_shape, 1, DataType::F32)); + const ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(rhs_shape, 1, DataType::F32)); + + MatMulAttributes matmul_attr{}; + matmul_attr.adj_lhs(adj_lhs); + matmul_attr.adj_rhs(adj_rhs); + + GpuMatMulSettings matmul_settings{}; + matmul_settings.m0(1); + matmul_settings.n0(1); + matmul_settings.k0(1); + + Status status = GpuMatMul::validate_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } + } + } +} + +TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL) +{ + // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations + using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>; + const std::vector<DataTypeConfigurationTuple> data_type_configurations = { + {DataType::F32, DataType::F32, DataType::F32, true}, + {DataType::F16, DataType::F16, DataType::F16, true}, + {DataType::F16, DataType::F32, DataType::F32, false}, // no mixed precision + {DataType::F64, DataType::F64, DataType::F64, false}, // no double precision + {DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false}, // no quantized types + {DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false}, // no quantized types + {DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, + false}, // no quantized types + {DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false}, // no quantized types + {DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false}, // no quantized types + {DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false}, // no quantized types + {DataType::S64, DataType::S64, DataType::S64, false}, // no integral types + {DataType::S32, DataType::S32, DataType::S32, false}, // no integral types + {DataType::S16, DataType::S16, DataType::S16, false}, // no integral types + {DataType::S8, DataType::S8, DataType::S8, false}, // no integral types + {DataType::U64, DataType::U64, DataType::U64, false}, // no integral types + {DataType::U32, DataType::U32, DataType::U32, false}, // no integral types + {DataType::U16, DataType::U16, DataType::U16, false}, // no integral types + {DataType::U8, DataType::U8, DataType::U8, false}, // no integral types + }; + // Create a sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + const TensorShape shape = TensorShape(10U, 10U); + MatMulAttributes matmul_attr{}; + matmul_attr.adj_lhs(false); + matmul_attr.adj_rhs(false); + GpuMatMulSettings matmul_settings{}; + matmul_settings.m0(1); + matmul_settings.n0(1); + matmul_settings.k0(1); + + for (auto &tuple : data_type_configurations) + { + const bool expected = std::get<3>(tuple); + + const ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape, 1, std::get<0>(tuple))); + const ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape, 1, std::get<1>(tuple))); + + Status status = GpuMatMul::validate_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); + } +} + +TEST_SUITE_END() // Validate + +template <typename T> +using DynamicFusionGpuMatmulFixture = DynamicFusionGpuMatMulValidationFixture<CLTensor, CLAccessor, GpuMatMul, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunPrecommit, + DynamicFusionGpuMatmulFixture<float>, + framework::DatasetMode::ALL, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_precommit, + n0_values_rhs_t_precommit, + k0_values_rhs_t_precommit, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunNightly, + DynamicFusionGpuMatmulFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_nightly, + n0_values_rhs_t_nightly, + k0_values_rhs_t_nightly, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunPrecommit, + DynamicFusionGpuMatmulFixture<half>, + framework::DatasetMode::ALL, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_precommit, + n0_values_rhs_t_precommit, + k0_values_rhs_t_precommit, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunNightly, + DynamicFusionGpuMatmulFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(DFMatMulDataset(), + framework::dataset::make("TransposeA", {false}), + framework::dataset::make("TransposeB", {true}), + m0_values_lhs_nt_nightly, + n0_values_rhs_t_nightly, + k0_values_rhs_t_nightly, + framework::dataset::make("ExportRhsToCLImage", {false}), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE_END() // Float +TEST_SUITE_END() // MatMul +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp new file mode 100644 index 0000000000..af02ce3eaa --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* Synced with tests/validation/CL/PixelwiseMultiplication.cpp from the standard interface. + * + * Difference | Why the difference + * No integer tests | Not supported yet + * No quantized tests | Not supported yet + * No convert policy tests | Not needed as convert policy is ignored by floating types + * No scale tests | Not supported yet + * No rounding modes tests | Not supported yet + * No in place tests | Not supported yet + * No activation tests | Not needed in dynamic fusion interface + * + */ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_f16( + 0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr AbsoluteTolerance<float> tolerance_f32( + 0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +} // namespace +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(MUL) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Unsupported data type U8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Unsupported data type S8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Unsupported data type S16 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // Unsupported data type S32 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8_SIGNED + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed + TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::F32), // Broadcast Z dimension is not allowed + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed + }), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32), + TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, false, false, true, true, false, false, true })), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Validate Elementwise Mul + auto lhs_info = context.create_tensor_info(input1_info); + auto rhs_info = context.create_tensor_info(input2_info); + + bool res = bool(GpuMul::validate_op(sketch, lhs_info, rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionCLMulFixture = DynamicFusionMulOneOpValidationFixture<CLTensor, CLAccessor, GpuMul, T>; +template <typename T> +using DynamicFusionCLMulBroadcastFixture = DynamicFusionMulBroadcastValidationFixture<CLTensor, CLAccessor, GpuMul, T>; +template <typename T> +using DynamicFusionCLMulTwoOpsFixture = DynamicFusionMulTwoOpsValidationFixture<CLTensor, CLAccessor, GpuMul, T>; + +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLMulFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<half>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(), + framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(), + framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // F16 + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLMulFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, + DynamicFusionCLMulFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionCLMulTwoOpsFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes(), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false})), + framework::dataset::make("FuseTwoOps", {true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // F32 + +TEST_SUITE_END() // MUL +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp new file mode 100644 index 0000000000..be816b32b3 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/dynamic_fusion/PoolingLayerDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(POOL2D) + +constexpr AbsoluteTolerance<float> tolerance_f32( + 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ +constexpr AbsoluteTolerance<float> tolerance_f16( + 0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ + +const auto PoolingLayerDatasetFP = + combine(combine(combine(combine(framework::dataset::make("PoolingType", {PoolingType::MAX, PoolingType::AVG}), + framework::dataset::make("PoolingSize", {Size2D(2, 2), Size2D(3, 3)})), + framework::dataset::make("Pad", {Padding2D()})), + framework::dataset::make("Stride", {Size2D(1, 1), Size2D(2, 1), Size2D(5, 7)})), + framework::dataset::make("ExcludePadding", {true})); + +template <typename T> +using DynamicFusionGpuPool2dFixture = DynamicFusionGpuPool2dValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>; + +template <typename T> +using DFSpecialGpuPool2dFixture = DynamicFusionGpuPool2dSpecialValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>; +// *INDENT-OFF* +// clang-format off + +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Invalid parameters, unsupported pooling + TensorInfo(TensorShape(5U, 15U, 13U), 1, DataType::F32, DataLayout::NHWC), // Valid Non-rectangular Global Pooling + TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Invalid - Quantized not supported. + TensorInfo(TensorShape(5U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC), // Valid global pooling + TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout + }), + framework::dataset::make("Pool2dAttributes", { + Pool2dAttributes().pool_type(PoolingType::L2).pool_size(Size2D(3,3)).pad(Padding2D(0,0,0,0)).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(15U, 13U)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).pad(Padding2D()).stride(Size2D(1,1)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(13U,13U)), + Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(13U,13U)), + })), + framework::dataset::make("Expected", { false, true, false, true, false })), + input_info, pool2d_attr, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Declare GpuPool2d settings + const GpuPool2dSettings &settings = GpuPool2dSettings(); + + // Validate Pool2d Configuration + auto src_info = context.create_tensor_info(input_info); + bool res = bool(GpuPool2d::validate_op(sketch, src_info, pool2d_attr, settings)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +// clang-format on +// *INDENT-ON* + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuPool2dFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallNoneUnitShapes(), PoolingLayerDatasetFP), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionGpuPool2dFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::LargeShapes(), PoolingLayerDatasetFP), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSpecial, + DFSpecialGpuPool2dFixture<float>, + framework::DatasetMode::ALL, + combine(datasets::PoolingLayerDatasetSpecialDynamicFusion(), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE(GlobalPooling) +FIXTURE_DATA_TEST_CASE( + RunSmall, + DynamicFusionGpuPool2dFixture<float>, + framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape", + {TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U)}), + framework::dataset::make("PoolingType", + {PoolingType::AVG, PoolingType::MAX})), + framework::dataset::make("PoolingSize", {Size2D(27, 13)})), + framework::dataset::make("Pad", {Padding2D()})), + framework::dataset::make("Stride", {Size2D(1, 1)})), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE( + RunLarge, + DynamicFusionGpuPool2dFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape", + {TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U)}), + framework::dataset::make("PoolingType", + {PoolingType::AVG, PoolingType::MAX})), + framework::dataset::make("PoolingSize", {Size2D(79, 37)})), + framework::dataset::make("Pad", {Padding2D()})), + framework::dataset::make("Stride", {Size2D(1, 1)})), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +TEST_SUITE(GlobalPooling) +FIXTURE_DATA_TEST_CASE( + RunSmall, + DynamicFusionGpuPool2dFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape", + {TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U)}), + framework::dataset::make("PoolingType", + {PoolingType::AVG, PoolingType::MAX})), + framework::dataset::make("PoolingSize", {Size2D(27, 13)})), + framework::dataset::make("Pad", {Padding2D()})), + framework::dataset::make("Stride", {Size2D(1, 1)})), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE( + RunLarge, + DynamicFusionGpuPool2dFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(framework::dataset::make("InputShape", + {TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U)}), + framework::dataset::make("PoolingType", + {PoolingType::AVG, PoolingType::MAX})), + framework::dataset::make("PoolingSize", {Size2D(79, 37)})), + framework::dataset::make("Pad", {Padding2D()})), + framework::dataset::make("Stride", {Size2D(1, 1)})), + framework::dataset::make("ExcludePadding", true)), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // GlobalPooling +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // FLOAT + +TEST_SUITE_END() // POOL2D +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp new file mode 100644 index 0000000000..d46754ccca --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ReshapeLayerDataset.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(RESHAPE) + +DATA_TEST_CASE(Validate, + framework::DatasetMode::DISABLED, + zip(zip(framework::dataset::make( + "InputInfo", + { + TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32) /*mismatching dimensions*/, + }), + framework::dataset::make("OutputShape", + { + TensorShape(9U, 5U, 21U), + TensorShape(8U, 24U, 4U), + TensorShape(192U, 192U), + })), + framework::dataset::make("Expected", {true, true, false})), + input_info, + output_shape, + expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + TensorShape input_shape = input_info.tensor_shape(); + ARM_COMPUTE_UNUSED(input_shape); + ITensorInfo *src_info = context.create_tensor_info(input_info); + + ReshapeAttributes attributes; + attributes.shape(output_shape); + Status status = GpuReshape::validate_op(sketch, src_info, attributes); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} + +template <typename T> +using DynamicFusionGpuReshapeLayerFixture = + DynamicFusionGpuReshapeLayerValidationFixture<CLTensor, CLAccessor, GpuReshape, T>; + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuReshapeLayerFixture<float>, + framework::DatasetMode::DISABLED, + combine(datasets::SmallReshapeLayerDataset(), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // F32 + +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuReshapeLayerFixture<half>, + framework::DatasetMode::DISABLED, + combine(datasets::SmallReshapeLayerDataset(), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // F16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuReshapeLayerFixture<uint8_t>, + framework::DatasetMode::DISABLED, + combine(datasets::SmallReshapeLayerDataset(), + framework::dataset::make("DataType", DataType::U8))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE(S8) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuReshapeLayerFixture<int8_t>, + framework::DatasetMode::DISABLED, + combine(datasets::SmallReshapeLayerDataset(), + framework::dataset::make("DataType", DataType::S8))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S8 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionGpuReshapeLayerFixture<int16_t>, + framework::DatasetMode::DISABLED, + combine(datasets::SmallReshapeLayerDataset(), + framework::dataset::make("DataType", DataType::S16))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S16 + +TEST_SUITE_END() // RESHAPE +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp new file mode 100644 index 0000000000..a6bcf4ae26 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp @@ -0,0 +1,359 @@ +/* +* Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ScaleValidationDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +using datasets::ScaleAlignCornersSamplingPolicySet; +using datasets::ScaleInterpolationPolicySet; +using datasets::ScaleSamplingPolicySet; +using datasets::ScaleShapesBaseDataSet; + +/** We consider vector size in byte 16 since the maximum size of + * a vector used by @ref CLScaleKernel is currently 16-byte (float4). + */ +constexpr uint32_t vector_byte = 16; + +template <typename T> +constexpr uint32_t num_elements_per_vector() +{ + return vector_byte / sizeof(T); +} + +/** Quantization information data set */ +const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo", + { + QuantizationInfo(0.5f, -1), + }); + +/** Tolerance */ +constexpr float tolerance_f32_absolute(0.001f); + +RelativeTolerance<float> tolerance_f32(0.05); +constexpr float abs_tolerance_f16(0.1f); +RelativeTolerance<half> tolerance_f16(half(0.1)); + +constexpr float tolerance_num_f32(0.01f); + +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(RESIZE) + +TEST_SUITE(Validate) + +const auto default_input_shape = TensorShape{2, 3, 3, 2}; +const auto default_output_shape = TensorShape{4, 6, 3, 2}; + +constexpr auto default_data_type = DataType::U8; +constexpr auto default_data_layout = DataLayout::NHWC; + +TEST_CASE(NullPtr, framework::DatasetMode::ALL) +{ + const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, default_data_layout}; + const TensorInfo output_info = TensorInfo{default_output_shape, 1, default_data_type, default_data_layout}; + + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // nullptr is given as input + Status status = GpuResize::validate_op(sketch, nullptr, ResizeAttributes()); + ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); +} + +TEST_CASE(SupportDataType, framework::DatasetMode::ALL) +{ + const std::map<DataType, bool> supported_data_types = + { + { DataType::U8, false }, + { DataType::S8, false }, + { DataType::QSYMM8, false }, + { DataType::QASYMM8, false }, + { DataType::QASYMM8_SIGNED, false }, + { DataType::QSYMM8_PER_CHANNEL, false }, + { DataType::U16, false }, + { DataType::S16, false }, + { DataType::QSYMM16, false }, + { DataType::QASYMM16, false }, + { DataType::U32, false }, + { DataType::S32, false }, + { DataType::U64, false }, + { DataType::S64, false }, + { DataType::BFLOAT16, false }, + { DataType::F16, true }, + { DataType::F32, true }, + { DataType::F64, false }, + { DataType::SIZET, false }, + }; + + for (auto &kv : supported_data_types) + { + const TensorInfo input_info = TensorInfo{default_input_shape, 1, kv.first, default_data_layout}; + + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info); + + ResizeAttributes attributes; + attributes.output_width(default_output_shape[0]); // shape is not important unless it's empty + attributes.output_height(default_output_shape[1]); + + Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes); + ARM_COMPUTE_EXPECT(bool(status) == kv.second, framework::LogLevel::ERRORS); + } +} + +TEST_CASE(MismatchingDataType, framework::DatasetMode::ALL) +{ + constexpr DataType non_default_data_type = DataType::F32; + + const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, default_data_layout}; + const TensorInfo output_info = TensorInfo{default_output_shape, 1, non_default_data_type, default_data_layout}; + + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info); + + Status status = GpuResize::validate_op(sketch, sketch_input_info, ResizeAttributes()); + ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); +} + +TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL) +{ + // Aligned corners require sampling policy to be TOP_LEFT. + constexpr InterpolationPolicy interpolation_policy = InterpolationPolicy::BILINEAR; + constexpr bool align_corners = true; + constexpr SamplingPolicy sampling_policy = SamplingPolicy::CENTER; + + const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, default_data_layout}; + const TensorInfo output_info = TensorInfo{default_output_shape, 1, default_data_type, default_data_layout}; + + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info); + + ResizeAttributes attributes{}; + attributes.interpolation_policy(interpolation_policy).sampling_policy(sampling_policy).align_corners(align_corners); + + Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes); + ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); +} + +TEST_CASE(UnsupportedInterpolationPolicy, framework::DatasetMode::ALL) +{ + const TensorInfo input_info = TensorInfo{TensorShape(28U, 33U, 2U), 1, DataType::F32, default_data_layout}; + const TensorInfo output_info = TensorInfo{TensorShape(26U, 21U, 2U), 1, DataType::F32, default_data_layout}; + constexpr auto interpolation_policy = InterpolationPolicy::AREA; + + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info); + + ResizeAttributes attributes{}; + attributes.interpolation_policy(interpolation_policy); + + Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes); + ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); +} + +TEST_CASE(UnsupportedLayout, framework::DatasetMode::ALL) +{ + const TensorInfo input_info = TensorInfo{default_input_shape, 1, default_data_type, DataLayout::NCHW}; + const TensorInfo output_info = TensorInfo{default_output_shape, 1, default_data_type, DataLayout::NCHW}; + constexpr auto interpolation_policy = InterpolationPolicy::BILINEAR; + + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + const ITensorInfo *sketch_input_info = context.create_tensor_info(input_info); + + ResizeAttributes attributes{}; + attributes.interpolation_policy(interpolation_policy); + + Status status = GpuResize::validate_op(sketch, sketch_input_info, attributes); + ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); +} + +TEST_SUITE_END() // Validate + +template <typename T> +using DynamicFusionResizeFixture = DynamicFusionResizeValidationFixture<CLTensor, CLAccessor, GpuResize, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP32) + +const auto f32_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<float>())), + framework::dataset::make("DataType", DataType::F32)); + +FIXTURE_DATA_TEST_CASE(Run, + DynamicFusionResizeFixture<float>, + framework::DatasetMode::ALL, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); +} + +FIXTURE_DATA_TEST_CASE(RunAlignCorners, + DynamicFusionResizeFixture<float>, + framework::DatasetMode::ALL, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_shape, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); +} +const auto f32_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<float>())), + framework::dataset::make("DataType", DataType::F32)); +FIXTURE_DATA_TEST_CASE(RunNightly, + DynamicFusionResizeFixture<float>, + framework::DatasetMode::NIGHTLY, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_nightly_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); +} +FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, + DynamicFusionResizeFixture<float>, + framework::DatasetMode::NIGHTLY, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f32_nightly_shape, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +const auto f16_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<half>())), + framework::dataset::make("DataType", DataType::F16)); +FIXTURE_DATA_TEST_CASE(Run, + DynamicFusionResizeFixture<half>, + framework::DatasetMode::ALL, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunAlignCorners, + DynamicFusionResizeFixture<half>, + framework::DatasetMode::ALL, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_shape, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +const auto f16_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<half>())), + framework::dataset::make("DataType", DataType::F16)); +FIXTURE_DATA_TEST_CASE(RunNightly, + DynamicFusionResizeFixture<half>, + framework::DatasetMode::NIGHTLY, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_nightly_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, + DynamicFusionResizeFixture<half>, + framework::DatasetMode::NIGHTLY, + ASSEMBLE_DATASET_DYNAMIC_FUSION(f16_nightly_shape, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + const ValidRegion valid_region = + calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false); + + // Validate output + validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // RESIZE +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL + +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp new file mode 100644 index 0000000000..0134a7c11b --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Sigmoid.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_f32(1e-6f); +constexpr AbsoluteTolerance<float> tolerance_f16(0.001f); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(SIGMOID) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type + }), + framework::dataset::make("Expected", { true, true, false })), + input_info, expected) +{ + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Fuse sigmoid + const ITensorInfo *src_info = context.create_tensor_info(input_info); + + const bool res = static_cast<bool>(GpuSigmoid::validate_op(sketch, src_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionSigmoidOpFixture = DynamicFusionSigmoidValidationFixture<CLTensor, CLAccessor, GpuSigmoid, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionSigmoidOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp, + DynamicFusionSigmoidOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation."); + framework::ARM_COMPUTE_PRINT_INFO(); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionSigmoidOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionSigmoidOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp, + DynamicFusionSigmoidOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation."); + framework::ARM_COMPUTE_PRINT_INFO(); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionSigmoidOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // SIGMOID +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp new file mode 100644 index 0000000000..8f5a1ed14a --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** Tolerance for float operations */ +RelativeTolerance<half> tolerance_f16(half(0.2)); +RelativeTolerance<float> tolerance_f32(0.001f); + +using framework::dataset::make; + +/// TODO: COMPMID-6713 +/// Softmax is not implemented in CKW. Therefore, the tests are DISABLED. +/// Enable the tests when Softmax is implemented in CKW. + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(SOFTMAX) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::DISABLED, + zip( + make("InputInfo", { + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::S32), // Unsupported data type + TensorInfo(TensorShape(32U, 13U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + }), + make("OutputInfo",{ + TensorInfo(TensorShape(27U, 13U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 11U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM16), // Unsupported data type + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + }), + make("beta", { + 1.0, + 2.0, + 2.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + }), + make("axis", { + 0, + 0, + 1, // Invalid as axis != 0 + 0, + 0, + 0, + -3, // Invalid as axis != 0 + 2, // Invalid as axis != 0 + 1, // Invalid as axis != 0 + -1, // Invalid as axis != 0 + }), + make("Expected", { false, false, false, true, false, false, false, false, false, false})), + input_info, output_info, beta, axis, expected) +{ + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + SoftmaxAttributes softmax_attr{}; + softmax_attr.axis(axis).beta(beta).is_log_softmax(false); + ITensorInfo* src_info = context.create_tensor_info(input_info); + ITensorInfo* dst_info = context.create_tensor_info(output_info); + const bool res = static_cast<bool>(GpuSoftmax::validate_op(sketch, src_info, dst_info, softmax_attr)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} + +template <typename T> +using DynamicFusionSoftmaxLayerFixture = DynamicFusionSoftmaxValidationFixture<CLTensor, CLAccessor, GpuSoftmax, T>; + +TEST_SUITE(FLOAT) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + + +FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayer4DShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // FP32 +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + + +FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayer4DShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // FLOAT + +TEST_SUITE_END() // SOFTMAX +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL + +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp new file mode 100644 index 0000000000..c7ab1e717c --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* Synced with tests/validation/CL/ArithmeticSubtraction.cpp from the standard interface. + * + * Difference | Why the difference + * No quantized tests | Not supported yet + * No in place tests | Not supported yet + * No activation tests | Not needed in dynamic fusion interface + * + */ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(SUB) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32), // Unsupported data type U32 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed + TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed + }), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32), + TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, true, true, false, false, true })), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Validate Elementwise Sub + auto lhs_info = context.create_tensor_info(input1_info); + auto rhs_info = context.create_tensor_info(input2_info); + + bool res = bool(GpuSub::validate_op(sketch, lhs_info, rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionCLSubFixture = + DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuSub, T>; + +template <typename T> +using DynamicFusionCLSubBroadcastFixture = + DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuSub, T>; + +template <typename T> +using DynamicFusionCLSubTwoOpsFixture = + DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuSub, T>; + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLSubFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, + DynamicFusionCLSubFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::LargeShapes()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLSubBroadcastFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLSubBroadcastFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::TemporaryLimitedLargeShapesBroadcast()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE( + RunSmallTwoOps, + DynamicFusionCLSubTwoOpsFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), + framework::dataset::make("DataType", {DataType::F32})), + framework::dataset::make("InPlace", {false})), + framework::dataset::make("FuseTwoOps", {true}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLSubFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLSubBroadcastFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", {DataType::F16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLSubFixture<int32_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::S32})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLSubFixture<int16_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::S16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionCLSubFixture<int16_t>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::LargeShapes()), + framework::dataset::make("DataType", {DataType::S16})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLSubFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", {ArithmeticOperation::SUB}), + datasets::SmallShapes()), + framework::dataset::make("DataType", {DataType::U8})), + framework::dataset::make("InPlace", {false}))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE_END() // SUB +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp b/tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp new file mode 100644 index 0000000000..2560f3aab1 --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Tanh.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuTanh.h" +#include "arm_compute/runtime/CL/CLTensor.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f); +constexpr AbsoluteTolerance<float> tolerance_f16(0.001f); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(TANH) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type + }), + framework::dataset::make("Expected", { true, true, false })), + input_info, expected) +{ + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Fuse tanh + const ITensorInfo* src_info = context.create_tensor_info(input_info); + + const bool res = static_cast<bool>(GpuTanh::validate_op(sketch, src_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionTanhOpFixture = DynamicFusionTanhValidationFixture<CLTensor, CLAccessor, GpuTanh, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionTanhOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp, + DynamicFusionTanhOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation."); + framework::ARM_COMPUTE_PRINT_INFO(); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionTanhOpFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionTanhOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp, + DynamicFusionTanhOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(datasets::Small5dShapes(), framework::dataset::make("Fuse", {false})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + ARM_COMPUTE_TEST_INFO("Currently 5D+ tensors are unsupported for this operation."); + framework::ARM_COMPUTE_PRINT_INFO(); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionTanhOpFixture<float>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), framework::dataset::make("Fuse", {true})), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // TANH +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h index 91b43f0f24..a24ba8913e 100644 --- a/tests/validation/fixtures/ActivationLayerFixture.h +++ b/tests/validation/fixtures/ActivationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ACTIVATION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_ACTIVATION_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_ACTIVATIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_ACTIVATIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -47,12 +47,7 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ActivationValidationGenericFixture : public framework::Fixture { public: - ActivationValidationGenericFixture() - : _target(parameters->get_ctx<TensorType>()) - { - } - template <typename...> void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info) { ActivationLayerInfo info(function, alpha_beta, alpha_beta); @@ -120,29 +115,28 @@ protected: TensorType compute_target(const TensorShape &shape, ActivationLayerInfo info) { - auto ctx = parameters->get_ctx<TensorType>(); // Create tensors - TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, DataLayout::NCHW, ctx); - TensorType dst = create_tensor<TensorType>(shape, _data_type, 1, _output_quantization_info, DataLayout::NCHW, ctx); + TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, DataLayout::NCHW); + TensorType dst = create_tensor<TensorType>(shape, _data_type, 1, _output_quantization_info, DataLayout::NCHW); // Create and configure function - FunctionType act_layer(ctx); + FunctionType act_layer; TensorType *dst_ptr = _in_place ? nullptr : &dst; act_layer.configure(&src, dst_ptr, info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); if(!_in_place) { dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); } // Fill tensors @@ -234,7 +228,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ActivationValidationFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type) { ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, QuantizationInfo()); @@ -245,7 +238,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ActivationValidationQuantizedFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info) { ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, quantization_info); @@ -255,4 +247,4 @@ public: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ACTIVATION_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_ACTIVATIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/AddMulAddFixture.h b/tests/validation/fixtures/AddMulAddFixture.h new file mode 100644 index 0000000000..d13fef2f02 --- /dev/null +++ b/tests/validation/fixtures/AddMulAddFixture.h @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/ArithmeticOperations.h" +#include "tests/validation/reference/DequantizationLayer.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" +#include "tests/validation/reference/QuantizationLayer.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class AddMulAddGenericFixture : public framework::Fixture +{ +public: + void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info, bool interm_out) + { + compute_target(shape, data_type, act_info, interm_out); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, DataType data_type) + { + switch(data_type) + { + case DataType::F32: + library->fill_tensor_uniform(tensor, i, -10.f, 10.f); + break; + case DataType::F16: + library->fill_tensor_uniform(tensor, i, -1.f, 1.f); + break; + default: + library->fill_tensor_uniform(tensor, i); + break; + } + } + + void compute_target(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info, bool interm_out) + { + TensorShape b_shape(shape.x()); + + // Create tensors + TensorType input1 = create_tensor<TensorType>(shape, data_type, 1, _input1_qinfo); + TensorType input2 = create_tensor<TensorType>(shape, data_type, 1, _input2_qinfo); + TensorType bn_mul = create_tensor<TensorType>(b_shape, data_type, 1, _bn_mul_qinfo); + TensorType bn_add = create_tensor<TensorType>(b_shape, data_type, 1, _bn_add_qinfo); + TensorType add_output = create_tensor<TensorType>(shape, data_type, 1, _add_output_qinfo); + TensorType final_output = create_tensor<TensorType>(shape, data_type, 1, _final_output_qinfo); + + // Create and configure function + FunctionType add_mul_add; + ARM_COMPUTE_ERROR_THROW_ON(add_mul_add.validate(input1.info(), input2.info(), bn_mul.info(), + bn_add.info(), interm_out ? add_output.info() : nullptr, final_output.info(), + ConvertPolicy::SATURATE, act_info)); + + add_mul_add.configure(&input1, &input2, &bn_mul, &bn_add, interm_out ? &add_output : nullptr, + &final_output, ConvertPolicy::SATURATE, act_info); + + // Allocate tensors + input1.allocator()->allocate(); + input2.allocator()->allocate(); + bn_mul.allocator()->allocate(); + bn_add.allocator()->allocate(); + + if(interm_out) + { + add_output.allocator()->allocate(); + } + + final_output.allocator()->allocate(); + + // Fill tensors + fill(AccessorType(input1), 0, data_type); + fill(AccessorType(input2), 1, data_type); + fill(AccessorType(bn_mul), 2, data_type); + fill(AccessorType(bn_add), 3, data_type); + + // // Compute function + add_mul_add.run(); + + _target = std::move(final_output); + + if(interm_out) + { + _interm_target = std::move(add_output); + } + } + + TensorType _target{}; + TensorType _interm_target{}; + SimpleTensor<T> _reference{}; + SimpleTensor<T> _interm_reference{}; + + QuantizationInfo _input1_qinfo{}; + QuantizationInfo _input2_qinfo{}; + QuantizationInfo _bn_mul_qinfo{}; + QuantizationInfo _bn_add_qinfo{}; + QuantizationInfo _add_output_qinfo{}; + QuantizationInfo _final_output_qinfo{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool interm_out> +class AddMulAddFloatValidationFixture : public AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + using Parent = AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>; + + void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info) + { + Parent::setup(shape, data_type, act_info, interm_out); + compute_reference(shape, data_type, act_info); + } + + // Compute Reference is moved outside of the generic fixture because with the quantized data types, + // it becomes a very different implementation with intermediate tensors' data types being always float. + // This way the reference calculations are more readable and the size of the classes will be smaller + // due to unrepeated fill() and target() methods. + void compute_reference(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info) + { + TensorShape b_shape(shape.x()); + + // Create reference + SimpleTensor<T> input1{ shape, data_type }; + SimpleTensor<T> input2{ shape, data_type }; + SimpleTensor<T> bn_mul{ b_shape, data_type }; + SimpleTensor<T> bn_add{ b_shape, data_type }; + SimpleTensor<T> add_output{ shape, data_type, 1 }; + + SimpleTensor<T> bn_mul_out{ shape, data_type }; + SimpleTensor<T> bn_add_out{ shape, data_type }; + + // Fill reference + Parent::fill(input1, 0, data_type); + Parent::fill(input2, 1, data_type); + Parent::fill(bn_mul, 2, data_type); + Parent::fill(bn_add, 3, data_type); + + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, input1, input2, add_output, ConvertPolicy::SATURATE); + bn_mul_out = reference::pixel_wise_multiplication<T, T, T>(add_output, bn_mul, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, data_type); + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, bn_mul_out, bn_add, bn_add_out, ConvertPolicy::SATURATE); + + if(interm_out) + { + Parent::_interm_reference = std::move(add_output); + } + + if(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::IDENTITY) + { + Parent::_reference = reference::activation_layer(bn_add_out, act_info); + } + else + { + Parent::_reference = std::move(bn_add_out); + } + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool interm_out> +class AddMulAddQuantizedValidationFixture : public AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + using Parent = AddMulAddGenericFixture<TensorType, AccessorType, FunctionType, T>; + + void setup(const TensorShape &shape, DataType data_type, ActivationLayerInfo act_info, + QuantizationInfo input1_qinfo, QuantizationInfo input2_qinfo, QuantizationInfo bn_mul_qinfo, + QuantizationInfo bn_add_qinfo, QuantizationInfo add_output_qinfo, QuantizationInfo final_output_qinfo) + { + // Quantization arguments moved to class attributes to prevent long function declerations + Parent::_input1_qinfo = input1_qinfo; + Parent::_input2_qinfo = input2_qinfo; + Parent::_bn_mul_qinfo = bn_mul_qinfo; + Parent::_bn_add_qinfo = bn_add_qinfo; + Parent::_add_output_qinfo = add_output_qinfo; + Parent::_final_output_qinfo = final_output_qinfo; + + Parent::setup(shape, data_type, act_info, interm_out); + compute_reference(shape, data_type, act_info); + } + + // Compute Reference is moved outside of the generic fixture because with the quantized data types, + // it becomes a very different implementation with intermediate tensors' data types being always float. + // This way the reference calculations are more readable and the size of the classes will be smaller + // due to unrepeated fill() and target() methods. + void compute_reference(const TensorShape &shape, DataType data_type, ActivationLayerInfo &act_info) + { + TensorShape b_shape(shape.x()); + + // Create reference + SimpleTensor<T> input1{ shape, data_type, 1, Parent::_input1_qinfo }; + SimpleTensor<T> input2{ shape, data_type, 1, Parent::_input2_qinfo }; + SimpleTensor<T> bn_mul{ b_shape, data_type, 1, Parent::_bn_mul_qinfo }; + SimpleTensor<T> bn_add{ b_shape, data_type, 1, Parent::_bn_add_qinfo }; + + // Fill input tensors + Parent::fill(input1, 0, data_type); + Parent::fill(input2, 1, data_type); + Parent::fill(bn_mul, 2, data_type); + Parent::fill(bn_add, 3, data_type); + + SimpleTensor<float> input1_dequantized = reference::dequantization_layer<float>(input1); + SimpleTensor<float> input2_dequantized = reference::dequantization_layer<float>(input2); + SimpleTensor<float> bn_mul_dequantized = reference::dequantization_layer<float>(bn_mul); + SimpleTensor<float> bn_add_dequantized = reference::dequantization_layer<float>(bn_add); + + SimpleTensor<float> add_output_dequantized{ shape, DataType::F32 }; + SimpleTensor<float> bn_add_out_dequantized{ shape, DataType::F32 }; + + reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, input1_dequantized, input2_dequantized, add_output_dequantized, ConvertPolicy::SATURATE); + SimpleTensor<float> bn_mul_out_dequantized = reference::pixel_wise_multiplication<float, float, float>(add_output_dequantized, bn_mul_dequantized, 1.f, ConvertPolicy::SATURATE, + RoundingPolicy::TO_NEAREST_UP, DataType::F32); + reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, bn_mul_out_dequantized, bn_add_dequantized, bn_add_out_dequantized, ConvertPolicy::SATURATE); + + if(interm_out) + { + Parent::_interm_reference = reference::quantization_layer<float, T>(add_output_dequantized, data_type, Parent::_add_output_qinfo); + } + + if(act_info.enabled() && act_info.activation() != ActivationLayerInfo::ActivationFunction::IDENTITY) + { + SimpleTensor<T> ref = reference::quantization_layer<float, T>(bn_add_out_dequantized, data_type, Parent::_final_output_qinfo); + Parent::_reference = reference::activation_layer(ref, act_info); + } + else + { + Parent::_reference = reference::quantization_layer<float, T>(bn_add_out_dequantized, data_type, Parent::_final_output_qinfo); + } + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_ADDMULADDFIXTURE_H diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h index f140c9846b..7a823568a8 100644 --- a/tests/validation/fixtures/ArgMinMaxFixture.h +++ b/tests/validation/fixtures/ArgMinMaxFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,15 +42,14 @@ namespace test { namespace validation { -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> class ArgMinMaxValidationBaseFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info) + void setup(TensorShape shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info) { - _target = compute_target(shape, data_type, axis, op, q_info); - _reference = compute_reference(shape, data_type, axis, op, q_info); + _target = compute_target(shape, input_type, output_type, axis, op, q_info); + _reference = compute_reference(shape, input_type, output_type, axis, op, q_info); } protected: @@ -80,7 +79,7 @@ protected: case DataType::QASYMM8: { std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, 0); break; @@ -88,7 +87,7 @@ protected: case DataType::QASYMM8_SIGNED: { std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, 0); break; @@ -98,25 +97,25 @@ protected: } } - TensorType compute_target(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info) + TensorType compute_target(TensorShape &src_shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info) { // Create tensors - TensorType src = create_tensor<TensorType>(src_shape, data_type, 1, q_info); - TensorType dst; + TensorType src = create_tensor<TensorType>(src_shape, input_type, 1, q_info); + TensorType dst = create_tensor<TensorType>(compute_output_shape(src_shape, axis), output_type, 1, q_info); // Create and configure function FunctionType arg_min_max_layer; arg_min_max_layer.configure(&src, axis, &dst, op); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -127,41 +126,43 @@ protected: return dst; } - SimpleTensor<int32_t> compute_reference(TensorShape &src_shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo q_info) + TensorShape compute_output_shape(const TensorShape &src_shape, int axis) + { + return arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false); + } + + SimpleTensor<T2> compute_reference(TensorShape &src_shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo q_info) { // Create reference - SimpleTensor<T> src{ src_shape, data_type, 1, q_info }; + SimpleTensor<T1> src{ src_shape, input_type, 1, q_info }; // Fill reference fill(src); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false); - return reference::reduction_operation<T, int32_t>(src, output_shape, axis, op); + return reference::reduction_operation<T1, T2>(src, compute_output_shape(src_shape, axis), axis, op, output_type); } - TensorType _target{}; - SimpleTensor<int32_t> _reference{}; + TensorType _target{}; + SimpleTensor<T2> _reference{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class ArgMinMaxValidationQuantizedFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> +class ArgMinMaxValidationQuantizedFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type, int axis, ReductionOperation op, QuantizationInfo quantization_info) + void setup(const TensorShape &shape, DataType input_type, DataType output_type, int axis, ReductionOperation op, QuantizationInfo quantization_info) { - ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, quantization_info); + ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, input_type, output_type, axis, op, quantization_info); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class ArgMinMaxValidationFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> +class ArgMinMaxValidationFixture : public ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type, int axis, ReductionOperation op) + void setup(const TensorShape &shape, DataType input_type, DataType output_type, int axis, ReductionOperation op) { - ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, QuantizationInfo()); + ArgMinMaxValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, input_type, output_type, axis, op, QuantizationInfo()); } }; } // namespace validation diff --git a/tests/validation/fixtures/ArithmeticDivisionFixture.h b/tests/validation/fixtures/ArithmeticDivisionFixture.h index 782939a960..e11a386130 100644 --- a/tests/validation/fixtures/ArithmeticDivisionFixture.h +++ b/tests/validation/fixtures/ArithmeticDivisionFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticDivisionBroadcastValidationFixture : public framework::Fixture { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type) { _target = compute_target(shape0, shape1, data_type); @@ -73,18 +72,18 @@ protected: FunctionType add; add.configure(&ref_src1, &ref_src2, &dst); - ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(ref_src1), 0); @@ -117,7 +116,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticDivisionValidationFixture : public ArithmeticDivisionBroadcastValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type) { ArithmeticDivisionBroadcastValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, shape, data_type); diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h index 9ba7bd3ef7..0785af1151 100644 --- a/tests/validation/fixtures/ArithmeticOperationsFixture.h +++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,16 +45,14 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticOperationGenericFixture : public framework::Fixture { public: - template <typename...> - void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, - DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool in_place) + void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace) { - _op = op; - _act_info = act_info; - _in_place = in_place; - _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out); - _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out); + _op = op; + _act_info = act_info; + _is_inplace = is_inplace; + _target = compute_target(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out); + _reference = compute_reference(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out); } protected: @@ -64,31 +62,55 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) { // Create tensors - TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0); - TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1); - TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out); - TensorType *dst_to_use = _in_place ? &ref_src1 : &dst; + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type, 1, qinfo0); + TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type, 1, qinfo1); + TensorType dst = create_tensor<TensorType>(out_shape, data_type, 1, qinfo_out); + + // Check whether do in-place computation and whether inputs are broadcast compatible + TensorType *actual_dst = &dst; + if(_is_inplace) + { + bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out); + bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out); + bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace); + ARM_COMPUTE_ASSERT(do_in_place); + + if(src1_is_inplace) + { + actual_dst = &ref_src1; + } + else + { + actual_dst = &ref_src2; + } + } // Create and configure function FunctionType arith_op; - arith_op.configure(&ref_src1, &ref_src2, dst_to_use, convert_policy, _act_info); + arith_op.configure(&ref_src1, &ref_src2, actual_dst, convert_policy, _act_info); - ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst_to_use->info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable()); // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); - dst_to_use->allocator()->allocate(); - ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst_to_use->info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable()); + + // If don't do in-place computation, still need to allocate original dst + if(!_is_inplace) + { + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } // Fill tensors fill(AccessorType(ref_src1), 0); @@ -97,50 +119,40 @@ protected: // Compute function arith_op.run(); - if(_in_place) - { - return ref_src1; - } - return dst; + return std::move(*actual_dst); } - SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, - DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) { - // current in-place implementation only supports same metadata of input and output tensors. - // By ignoring output quantization information here, we can make test cases implementation much simpler. - QuantizationInfo output_qinfo = _in_place ? qinfo0 : qinfo_out; - // Create reference - SimpleTensor<T> ref_src1{ shape0, data_type0, 1, qinfo0 }; - SimpleTensor<T> ref_src2{ shape1, data_type1, 1, qinfo1 }; - SimpleTensor<T> ref_dst{ TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, output_qinfo }; + SimpleTensor<T> ref_src1{ shape0, data_type, 1, qinfo0 }; + SimpleTensor<T> ref_src2{ shape1, data_type, 1, qinfo1 }; + SimpleTensor<T> ref_dst{ TensorShape::broadcast_shape(shape0, shape1), data_type, 1, qinfo_out }; // Fill reference fill(ref_src1, 0); fill(ref_src2, 1); auto result = reference::arithmetic_operation<T>(_op, ref_src1, ref_src2, ref_dst, convert_policy); - return _act_info.enabled() ? reference::activation_layer(result, _act_info, output_qinfo) : result; + return _act_info.enabled() ? reference::activation_layer(result, _act_info, qinfo_out) : result; } TensorType _target{}; SimpleTensor<T> _reference{}; reference::ArithmeticOperation _op{ reference::ArithmeticOperation::ADD }; ActivationLayerInfo _act_info{}; - bool _in_place{}; + bool _is_inplace{}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> class ArithmeticAdditionBroadcastValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace); } }; @@ -148,11 +160,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticAdditionValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy) + void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace); } }; @@ -160,11 +171,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticAdditionBroadcastValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -172,11 +182,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticAdditionValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info) + void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1, - output_data_type, convert_policy, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -184,13 +193,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticAdditionValidationQuantizedFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type0, data_type1, - output_data_type, convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy, + qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace); } }; @@ -198,13 +205,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticAdditionValidationQuantizedBroadcastFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, + bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, - data_type0, data_type1, output_data_type, convert_policy, - qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy, + qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace); } }; @@ -212,12 +217,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticSubtractionBroadcastValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, bool in_place) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, - data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace); } }; @@ -225,13 +228,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticSubtractionBroadcastValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, - bool in_place) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, + bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, - data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, in_place); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -239,12 +240,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticSubtractionValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, bool in_place) + void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, - data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace); } }; @@ -252,12 +251,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticSubtractionValidationFloatFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool in_place) + void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, - data_type0, data_type1, output_data_type, convert_policy, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, in_place); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -265,14 +262,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticSubtractionValidationQuantizedFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place) + void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, - data_type0, data_type1, output_data_type, - convert_policy, qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), in_place); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy, + qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace); } }; @@ -280,13 +274,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticSubtractionValidationQuantizedBroadcastFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, + bool is_inplace) { - ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, - data_type0, data_type1, output_data_type, convert_policy, - qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), in_place); + ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy, + qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace); } }; } // namespace validation diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h index f5a5420df3..54a0ed9e09 100644 --- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h +++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BatchNormalizationLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape0, TensorShape shape1, float epsilon, bool use_beta, bool use_gamma, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout) { _data_type = dt; @@ -111,12 +110,12 @@ protected: TensorType *gamma_ptr = _use_gamma ? &gamma : nullptr; norm.configure(&src, &dst, &mean, &var, beta_ptr, gamma_ptr, epsilon, act_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(var.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(beta.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(gamma.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(var.info()->is_resizable()); + ARM_COMPUTE_ASSERT(beta.info()->is_resizable()); + ARM_COMPUTE_ASSERT(gamma.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -126,12 +125,12 @@ protected: beta.allocator()->allocate(); gamma.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!var.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!beta.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!gamma.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!var.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!beta.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!gamma.info()->is_resizable()); // Fill tensors fill(AccessorType(src), AccessorType(mean), AccessorType(var), AccessorType(beta), AccessorType(gamma)); diff --git a/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h index ccacfeb062..161eeb0ef4 100644 --- a/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h +++ b/tests/validation/fixtures/BatchNormalizationLayerFusionFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename ConvolutionFuncti class BatchNormalizationLayerFusionValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape src_shape, TensorShape w_shape, TensorShape b_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, bool use_conv_b, bool use_beta, bool use_gamma, float epsilon, DataType dt, DataLayout data_layout) { @@ -110,16 +109,16 @@ protected: fuse_fn.configure(&conv_w, &bn_mean, &bn_var, &fused_w, &fused_b, conv_b_ptr, beta_ptr, gamma_ptr, epsilon); conv_fn.configure(&src, &fused_w, &fused_b, &dst, info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(conv_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(conv_b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bn_mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bn_var.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bn_beta.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bn_gamma.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(fused_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(fused_b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(conv_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(conv_b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bn_mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bn_var.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bn_beta.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bn_gamma.info()->is_resizable()); + ARM_COMPUTE_ASSERT(fused_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(fused_b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -133,16 +132,16 @@ protected: fused_b.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!conv_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!conv_b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bn_mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bn_var.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bn_beta.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bn_gamma.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!fused_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!fused_b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!conv_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!conv_b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bn_mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bn_var.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bn_beta.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bn_gamma.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!fused_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!fused_b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), diff --git a/tests/validation/fixtures/BatchToSpaceLayerFixture.h b/tests/validation/fixtures/BatchToSpaceLayerFixture.h index 796afd1c5d..56a6109dbc 100644 --- a/tests/validation/fixtures/BatchToSpaceLayerFixture.h +++ b/tests/validation/fixtures/BatchToSpaceLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE #define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_FIXTURE +#include "arm_compute/core/Helpers.h" #include "tests/Globals.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" @@ -39,11 +40,10 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BatchToSpaceLayerValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout) + void setup(const TensorShape &input_shape, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &output_shape, DataType data_type, DataLayout data_layout) { - _target = compute_target(input_shape, block_shape_shape, output_shape, data_type, data_layout); - _reference = compute_reference(input_shape, block_shape_shape, output_shape, data_type); + _target = compute_target(input_shape, block_shape, crop_info, output_shape, data_type, data_layout); + _reference = compute_reference(input_shape, block_shape, crop_info, output_shape, data_type); } protected: @@ -56,9 +56,10 @@ protected: DistributionType distribution{ T(-1.0f), T(1.0f) }; library->fill(tensor, distribution, i); } - TensorType compute_target(TensorShape input_shape, TensorShape block_shape_shape, TensorShape output_shape, + TensorType compute_target(TensorShape input_shape, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, TensorShape output_shape, DataType data_type, DataLayout data_layout) { + ARM_COMPUTE_ERROR_ON(block_shape.size() != 2U); // Only support batch to 2D space (x, y) for now if(data_layout == DataLayout::NHWC) { permute(input_shape, PermutationVector(2U, 0U, 1U)); @@ -66,64 +67,49 @@ protected: } // Create tensors - TensorType input = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout); - TensorType block_shape = create_tensor<TensorType>(block_shape_shape, DataType::S32); - TensorType output = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout); + TensorType input = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout); + TensorType output = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout); // Create and configure function FunctionType batch_to_space; - batch_to_space.configure(&input, &block_shape, &output); + batch_to_space.configure(&input, block_shape.at(0), block_shape.at(1), &output, crop_info); - ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(block_shape.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); // Allocate tensors input.allocator()->allocate(); - block_shape.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!block_shape.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors fill(AccessorType(input), 0); - { - auto block_shape_data = AccessorType(block_shape); - const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - for(unsigned int i = 0; i < block_shape_shape.x(); ++i) - { - static_cast<int32_t *>(block_shape_data.data())[i] = output_shape[i + idx_width] / input_shape[i + idx_width]; - } - } // Compute function batch_to_space.run(); return output; } - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &block_shape_shape, - const TensorShape &output_shape, DataType data_type) + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const std::vector<int32_t> &block_shape, + const CropInfo &crop_info, const TensorShape &output_shape, DataType data_type) { + ARM_COMPUTE_ERROR_ON(block_shape.size() != 2U); // Only support batch to 2D space (x, y) for now // Create reference - SimpleTensor<T> input{ input_shape, data_type }; - SimpleTensor<int32_t> block_shape{ block_shape_shape, DataType::S32 }; + SimpleTensor<T> input{ input_shape, data_type }; // Fill reference fill(input, 0); - for(unsigned int i = 0; i < block_shape_shape.x(); ++i) - { - block_shape[i] = output_shape[i] / input_shape[i]; - } // Compute reference - return reference::batch_to_space(input, block_shape, output_shape); + return reference::batch_to_space(input, block_shape, crop_info, output_shape); } TensorType _target{}; SimpleTensor<T> _reference{}; }; + } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/BitwiseAndFixture.h b/tests/validation/fixtures/BitwiseAndFixture.h index 6c8e1b1f6e..745a34058e 100644 --- a/tests/validation/fixtures/BitwiseAndFixture.h +++ b/tests/validation/fixtures/BitwiseAndFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BitwiseAndValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -69,17 +68,17 @@ protected: bitwise_and.configure(&src1, &src2, &dst); - ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src1.allocator()->allocate(); src2.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src1), 0); diff --git a/tests/validation/fixtures/BitwiseNotFixture.h b/tests/validation/fixtures/BitwiseNotFixture.h index c6affcfad6..bdfd255156 100644 --- a/tests/validation/fixtures/BitwiseNotFixture.h +++ b/tests/validation/fixtures/BitwiseNotFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BitwiseNotValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -68,14 +67,14 @@ protected: bitwise_not.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/BitwiseOrFixture.h b/tests/validation/fixtures/BitwiseOrFixture.h index a40f635b9e..03560e0171 100644 --- a/tests/validation/fixtures/BitwiseOrFixture.h +++ b/tests/validation/fixtures/BitwiseOrFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BitwiseOrValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -69,17 +68,17 @@ protected: bitwise_or.configure(&src1, &src2, &dst); - ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src1.allocator()->allocate(); src2.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src1), 0); diff --git a/tests/validation/fixtures/BitwiseXorFixture.h b/tests/validation/fixtures/BitwiseXorFixture.h index c103033fe6..4872b231a5 100644 --- a/tests/validation/fixtures/BitwiseXorFixture.h +++ b/tests/validation/fixtures/BitwiseXorFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BitwiseXorValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -69,17 +68,17 @@ protected: bitwise_xor.configure(&src1, &src2, &dst); - ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src1.allocator()->allocate(); src2.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src1), 0); diff --git a/tests/validation/fixtures/BoundingBoxTransformFixture.h b/tests/validation/fixtures/BoundingBoxTransformFixture.h index 7155848b4d..03edaeab16 100644 --- a/tests/validation/fixtures/BoundingBoxTransformFixture.h +++ b/tests/validation/fixtures/BoundingBoxTransformFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,6 @@ class BoundingBoxTransformGenericFixture : public framework::Fixture public: using TDeltas = typename std::conditional<std::is_same<typename std::decay<T>::type, uint16_t>::value, uint8_t, T>::type; - template <typename...> void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo) { const bool is_qasymm16 = data_type == DataType::QASYMM16; @@ -157,17 +156,17 @@ protected: FunctionType bbox_transform; bbox_transform.configure(&boxes, &pred_boxes, &deltas, bbox_info); - ARM_COMPUTE_EXPECT(deltas.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(boxes.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(pred_boxes.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(deltas.info()->is_resizable()); + ARM_COMPUTE_ASSERT(boxes.info()->is_resizable()); + ARM_COMPUTE_ASSERT(pred_boxes.info()->is_resizable()); // Allocate tensors deltas.allocator()->allocate(); boxes.allocator()->allocate(); pred_boxes.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!deltas.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!boxes.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!deltas.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!boxes.info()->is_resizable()); // Fill tensors TensorShape img_shape(bbox_info.scale() * bbox_info.img_width(), bbox_info.scale() * bbox_info.img_height()); @@ -215,7 +214,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BoundingBoxTransformFixture : public BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type) { BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(deltas_shape, info, data_type, QuantizationInfo()); @@ -228,7 +226,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class BoundingBoxTransformQuantizedFixture : public BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape deltas_shape, const BoundingBoxTransformInfo &info, DataType data_type, QuantizationInfo deltas_qinfo) { BoundingBoxTransformGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(deltas_shape, info, data_type, deltas_qinfo); diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h index c9764af37c..e9d624e6f3 100644 --- a/tests/validation/fixtures/CastFixture.h +++ b/tests/validation/fixtures/CastFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class CastValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) { _target = compute_target(shape, dt_in, dt_out, policy); @@ -86,6 +85,16 @@ protected: library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), static_cast<int32_t>(signed_max)); break; } + case DataType::U64: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint64_t>(unsigned_min), static_cast<uint64_t>(unsigned_max)); + break; + } + case DataType::S64: + { + library->fill_tensor_uniform(tensor, i, static_cast<int64_t>(signed_min), static_cast<int64_t>(signed_max)); + break; + } default: ARM_COMPUTE_ERROR("NOT SUPPORTED!"); } @@ -106,15 +115,15 @@ protected: FunctionType cast; cast.configure(&src, &dst, policy); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0, dt_in, dt_out); diff --git a/tests/validation/fixtures/ChannelShuffleLayerFixture.h b/tests/validation/fixtures/ChannelShuffleLayerFixture.h index de718fbbaa..530dba3893 100644 --- a/tests/validation/fixtures/ChannelShuffleLayerFixture.h +++ b/tests/validation/fixtures/ChannelShuffleLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ChannelShuffleLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, unsigned int num_groups, DataType data_type, DataLayout data_layout) { _target = compute_target(shape, data_type, num_groups, data_layout); @@ -75,15 +74,15 @@ protected: FunctionType channel_shuffle_func; channel_shuffle_func.configure(&src, &dst, num_groups); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/Col2ImFixture.h b/tests/validation/fixtures/Col2ImFixture.h index f8673af38a..4d56d607b7 100644 --- a/tests/validation/fixtures/Col2ImFixture.h +++ b/tests/validation/fixtures/Col2ImFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,10 +45,9 @@ namespace validation using namespace arm_compute::misc::shape_calculator; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool batch_size_on_z> -class Col2ImValidationFixture : public framework::Fixture +class Col2ImOpValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, const unsigned int convolved_width, unsigned int convolved_height, unsigned int num_groups, DataType data_type) { const Size2D convolved_dims(convolved_width, convolved_height); @@ -74,23 +73,28 @@ protected: // Create and configure function FunctionType col2im_func; - col2im_func.configure(&src, &dst, convolved_dims, num_groups); + col2im_func.configure(src.info(), dst.info(), convolved_dims, num_groups); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); + arm_compute::ITensorPack pack = + { + { arm_compute::TensorType::ACL_SRC, &src }, + { arm_compute::TensorType::ACL_DST, &dst } + }; // Compute function - col2im_func.run(); + col2im_func.run(pack); return dst; } diff --git a/tests/validation/fixtures/ComparisonFixture.h b/tests/validation/fixtures/ComparisonFixture.h index 43da0ae49c..f25d5abb73 100644 --- a/tests/validation/fixtures/ComparisonFixture.h +++ b/tests/validation/fixtures/ComparisonFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComparisonValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1) { _target = compute_target(op, shape0, shape1, data_type, qinfo0, qinfo1); @@ -71,18 +70,18 @@ protected: FunctionType comp_op; comp_op.configure(&ref_src1, &ref_src2, &dst, op); - ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(ref_src1), 0); @@ -117,7 +116,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComparisonBroadcastValidationFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type) { ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, data_type, QuantizationInfo(), QuantizationInfo()); @@ -128,7 +126,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComparisonValidationFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(ComparisonOperation op, const TensorShape &shape, DataType data_type) { ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape, shape, data_type, QuantizationInfo(), QuantizationInfo()); @@ -139,7 +136,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComparisonValidationQuantizedFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(ComparisonOperation op, const TensorShape &shape, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1) { @@ -151,7 +147,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComparisonQuantizedBroadcastValidationFixture : public ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(ComparisonOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1) { ComparisonValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, data_type, qinfo0, qinfo1); diff --git a/tests/validation/fixtures/ComputeAllAnchorsFixture.h b/tests/validation/fixtures/ComputeAllAnchorsFixture.h index f385cb81f0..620f1b53fa 100644 --- a/tests/validation/fixtures/ComputeAllAnchorsFixture.h +++ b/tests/validation/fixtures/ComputeAllAnchorsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComputeAllAnchorsGenericFixture : public framework::Fixture { public: - template <typename...> void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type, QuantizationInfo qinfo) { _target = compute_target(num_anchors, data_type, info, qinfo); @@ -69,13 +68,13 @@ protected: FunctionType compute_all_anchors; compute_all_anchors.configure(&anchors, &all_anchors, info); - ARM_COMPUTE_EXPECT(all_anchors.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(all_anchors.info()->is_resizable()); // Allocate tensors all_anchors.allocator()->allocate(); anchors.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!all_anchors.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!all_anchors.info()->is_resizable()); // Fill tensors fill(AccessorType(anchors)); @@ -107,7 +106,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComputeAllAnchorsFixture : public ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type) { ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(num_anchors, info, data_type, QuantizationInfo()); @@ -118,7 +116,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ComputeAllAnchorsQuantizedFixture : public ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(size_t num_anchors, const ComputeAnchorsInfo &info, DataType data_type, QuantizationInfo qinfo) { ComputeAllAnchorsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(num_anchors, info, data_type, qinfo); diff --git a/tests/validation/fixtures/ConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h index d9615ff8b5..3a021661ac 100644 --- a/tests/validation/fixtures/ConcatenateLayerFixture.h +++ b/tests/validation/fixtures/ConcatenateLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,6 @@ private: using SrcITensorType = typename std::conditional<CI, const ITensorType, ITensorType>::type; public: - template <typename...> void setup(TensorShape shape, DataType data_type, unsigned int axis) { // Create input shapes @@ -119,20 +118,20 @@ protected: for(auto &src : srcs) { - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); } - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors for(auto &src : srcs) { src.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); } dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors int i = 0; diff --git a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h index d7984830f5..7ad14e1b40 100644 --- a/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h +++ b/tests/validation/fixtures/ConvertFullyConnectedWeightsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ConvertFullyConnectedWeightsValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, unsigned int weights_w, DataLayout training_data_layout, DataType data_type) { const unsigned int height = input_shape.x() * input_shape.y() * input_shape.z(); @@ -61,7 +60,7 @@ protected: { case DataType::QASYMM8: { - std::uniform_int_distribution<uint8_t> distribution(0, 10); + std::uniform_int_distribution<uint32_t> distribution(0, 10); library->fill(tensor, distribution, i); break; } @@ -93,15 +92,15 @@ protected: convert_weights.configure(&src, &dst, input_shape, training_data_layout); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index a4db49fc8e..0622e5e6f0 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,19 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_CONVOLUTIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_CONVOLUTIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/graph/Utils.h" +#ifdef ARM_COMPUTE_OPENCL_ENABLED +#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h" +#endif // ARM_COMPUTE_OPENCL_ENABLED #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" +#include "src/graph/mutators/MutatorUtils.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -35,10 +42,12 @@ #include "tests/validation/Helpers.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/PadLayer.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/Utils.h" #include <random> +#include <type_traits> namespace arm_compute { @@ -49,13 +58,30 @@ namespace validation namespace detail { template <typename ConvolutionFunction, typename TensorType> -void configure_conv_function(ConvolutionFunction &func, +#ifdef ARM_COMPUTE_OPENCL_ENABLED +std::enable_if_t<!std::is_same<ConvolutionFunction, CLGEMMConvolutionLayer>::value, void> +#else // ARM_COMPUTE_OPENCL_ENABLED +void +#endif // ARM_COMPUTE_OPENCL_ENABLED +configure_conv_function(ConvolutionFunction &func, + TensorType *src, const TensorType *weights, const TensorType *bias, TensorType *dst, + const PadStrideInfo &info, const WeightsInfo &weights_info, + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) +{ + func.configure(src, weights, bias, dst, info, weights_info, dilation, act_info, false /* enable_fast_math */, num_groups); +} + +#ifdef ARM_COMPUTE_OPENCL_ENABLED +template <typename ConvolutionFunction, typename TensorType> +std::enable_if_t<std::is_same<ConvolutionFunction, CLGEMMConvolutionLayer>::value, void> +configure_conv_function(ConvolutionFunction &func, TensorType *src, const TensorType *weights, const TensorType *bias, TensorType *dst, const PadStrideInfo &info, const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { func.configure(src, weights, bias, dst, info, weights_info, dilation, act_info, num_groups); } +#endif // ARM_COMPUTE_OPENCL_ENABLED } // namespace detail template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW> @@ -66,26 +92,84 @@ public: || std::is_same<typename std::decay<T>::type, int8_t>::value, int32_t, T >::type; + void setup_quantization(TensorShape input_shape, TensorShape weights_shape, QuantizationInfo &input_q_info, + QuantizationInfo &weights_q_info, DataType data_type) + { + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + _quantization_info = QuantizationInfo(scale_lhs, offset_lhs); + _weight_quantization_info = QuantizationInfo(scale_rhs, offset_rhs); + + QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(input_q_info, weights_q_info, + weights_shape.y() /* heights */, weights_shape.x() /* width */, input_shape.z() /* channels */, + data_type, 0.5f /* bias_fraction */); + + _dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + } + public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, - DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info, ActivationLayerInfo act_info) + DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info, ActivationLayerInfo act_info, + bool mixed_layout = false, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false) { + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + + + weights_shape[0] + weights_shape[1] + weights_shape[2] + weights_shape[3] + + mixed_layout + (data_type == DataType::QASYMM8_SIGNED) + (data_layout == DataLayout::NHWC); + + _mixed_layout = mixed_layout; _data_type = data_type; _weights_data_type = weights_data_type; - _is_quantized = is_data_type_quantized_asymmetric(data_type); + const bool is_quantized = is_data_type_quantized(weights_data_type); _is_bfloat16 = data_type == DataType::BFLOAT16; - _bias_data_type = _is_quantized ? DataType::S32 : (_is_bfloat16 ? DataType::F32 : data_type); + _bias_data_type = is_quantized ? DataType::S32 : (_is_bfloat16 ? DataType::F32 : data_type); _output_data_type = _is_bfloat16 ? DataType::F32 : data_type; _quantization_info = quantization_info; _weight_quantization_info = weight_quantization_info; _data_layout = data_layout; + _dst_q_info = quantization_info; + + if(is_quantized && !is_data_type_quantized_symmetric(weights_data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY)) + { + setup_quantization(input_shape, weights_shape, _quantization_info, _weight_quantization_info, data_type); + _use_dynamic_output_quant = true; + } - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info, pre_pad_layer, padded_weights); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info, pre_pad_layer); } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(_data_layout); + dst.info()->set_data_layout(_data_layout); + } + void regularize_values(void *values, size_t size) { float *fvalues = static_cast<float *>(values); @@ -102,16 +186,34 @@ protected: { case DataType::QASYMM8: { - std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); - library->fill(tensor, distribution, i); + if(_use_dynamic_output_quant) + { + std::uniform_int_distribution<int32_t> distribution(0, 255); + library->fill(tensor, distribution, i); + } + else + { + // Legacy initialization in case the output quantization info can't be reliably estimated + std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second); + library->fill(tensor, distribution, i); + } break; } case DataType::QASYMM8_SIGNED: { - std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second); - library->fill(tensor, distribution, i); + if(_use_dynamic_output_quant) + { + std::uniform_int_distribution<int32_t> distribution(-128, 127); + library->fill(tensor, distribution, i); + } + else + { + // Legacy initialization in case the output quantization info can't be reliably estimated + std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second); + library->fill(tensor, distribution, i); + } break; } case DataType::QSYMM8_PER_CHANNEL: @@ -130,13 +232,13 @@ protected: max_bound = bounds.second; } } - std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound); + std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound); library->fill(tensor, distribution, i); break; } case DataType::S32: { - std::uniform_int_distribution<int32_t> distribution(-100, 100); + std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias); library->fill(tensor, distribution, i); break; } @@ -163,8 +265,9 @@ protected: } } + // given input is IN nchw format TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info, - bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info) + bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}), bool padded_weights = false) { ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); @@ -175,6 +278,18 @@ protected: permute(input_shape, PermutationVector(2U, 0U, 1U)); permute(weights_shape, PermutationVector(2U, 0U, 1U)); permute(output_shape, PermutationVector(2U, 0U, 1U)); + + if(pre_pad_layer.size() > 0) + { + // make sure paddings exist for each c,h,w dimensions + for(unsigned int i = 0; i < 3 - pre_pad_layer.size(); ++i) + { + pre_pad_layer.push_back({ 0, 0 }); + } + + // rotate padding info from nchw to nhwc + std::rotate(pre_pad_layer.begin(), pre_pad_layer.begin() + 2, pre_pad_layer.begin() + 3); + } } const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); @@ -186,17 +301,47 @@ protected: // Create tensors TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info, _data_layout); TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _weights_data_type, 1, _weight_quantization_info, _data_layout); - TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info, _data_layout); - TensorType dst = create_tensor<TensorType>(output_shape, _output_data_type, 1, _quantization_info, _data_layout); + TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, QuantizationInfo() /*bias is not a quantized type*/, _data_layout); + TensorType dst = create_tensor<TensorType>(output_shape, _output_data_type, 1, _dst_q_info, _data_layout); // Create and configure function FunctionType conv; - detail::configure_conv_function(conv, &src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + const unsigned int height_index = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::HEIGHT); + const unsigned int width_index = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::WIDTH); + + const PaddingInfo pad_w = width_index < pre_pad_layer.size() ? pre_pad_layer[width_index] : PaddingInfo(0, 0); + const PaddingInfo pad_h = height_index < pre_pad_layer.size() ? pre_pad_layer[height_index] : PaddingInfo(0, 0); + + if(pre_pad_layer.size() > 0 && arm_compute::graph::is_padding_in_height_or_width(_data_layout, pre_pad_layer)) + { + // this is the logic implemented in NodeFusionMutator -> fuse_pad_with_convolution + const PadStrideInfo new_conv_info( + info.stride().first, + info.stride().second, + info.pad_left() + pad_w.first, + info.pad_right() + pad_w.second, + info.pad_top() + pad_h.first, + info.pad_bottom() + pad_h.second, + info.round()); + detail::configure_conv_function(conv, &src, &weights, &bias, &dst, new_conv_info, weights_info, dilation, act_info, num_groups); + } + else + { + detail::configure_conv_function(conv, &src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups); + } + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + // Test "add padding after configure" behavior. This behavior should not affect the correctness + add_padding_x({ &src, &bias, &dst }, _data_layout); + // Padding weights may affect code path in some backends + if (padded_weights) + { + add_padding_x({ &weights }, _data_layout); + } // Allocate tensors src.allocator()->allocate(); @@ -204,24 +349,31 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors - fill(AccessorType(src), 0); - fill(AccessorType(weights), 1); - fill(AccessorType(bias), 2); + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(weights), 1 + _hash); + fill(AccessorType(bias), 2 + _hash); - // Compute NEConvolutionLayer function - conv.run(); + if(_mixed_layout) + { + mix_layout(conv, src, dst); + } + else + { + // Compute Convolution function + conv.run(); + } return dst; } SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - const Size2D &dilation, const ActivationLayerInfo act_info) + const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({})) { ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); @@ -237,9 +389,9 @@ protected: SimpleTensor<TW> weights{ weights_shape, weights_dt, 1, _weight_quantization_info }; SimpleTensor<TBias> bias{ bias_shape, bias_dt, 1, _quantization_info }; - fill(src, 0); - fill(weights, 1); - fill(bias, 2); + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(bias, 2 + _hash); // Fill with bfloat16 to perform the conversion and reduce the mismatches in the output if(_is_bfloat16) @@ -248,9 +400,14 @@ protected: regularize_values(static_cast<void *>(weights.data()), weights.num_elements()); } - return (act_info.enabled()) ? reference::activation_layer<T>(reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups), + if(pre_pad_layer.size() > 0) + { + src = reference::pad_layer<T>(src, pre_pad_layer, PixelValue(0), PaddingMode::CONSTANT); + } + + return (act_info.enabled()) ? reference::activation_layer<T>(reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups, _dst_q_info), act_info) : - reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups); + reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups, _dst_q_info); } TensorType _target{}; @@ -262,34 +419,63 @@ protected: DataLayout _data_layout{}; QuantizationInfo _quantization_info{}; QuantizationInfo _weight_quantization_info{}; - bool _is_quantized = false; + QuantizationInfo _dst_q_info{}; bool _is_bfloat16 = false; + bool _mixed_layout = false; + bool _use_dynamic_output_quant{false}; + int32_t _hash{0}; + int32_t _min_bias{-100}; + int32_t _max_bias{100}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class ConvolutionValidationFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info) { ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, data_type, data_layout, - QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), act_info, mixed_layout); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> +class ConvolutionValidationPaddedWeightsFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> +{ +public: + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, + DataLayout data_layout) + { + ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, + data_type, data_type, data_layout, + QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), mixed_layout, PaddingList({}), true); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> +class ConvolutionValidationWithPaddingFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> +{ +public: + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, + DataLayout data_layout, ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({})) + { + ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, + data_type, data_type, data_layout, + QuantizationInfo(), QuantizationInfo(), act_info, mixed_layout, pre_pad_layer); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class ConvolutionValidationQuantizedFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info) { ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, - data_type, data_type, data_layout, quantization_info, quantization_info, act_info); + data_type, data_type, data_layout, quantization_info, quantization_info, act_info, mixed_layout); } }; @@ -297,7 +483,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ConvolutionValidationQuantizedPerChannelFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataType weights_data_type) { @@ -313,7 +498,311 @@ public: quantization_info, QuantizationInfo(weights_scales), act_info); } }; + + +#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS +inline TensorInfo prepare_weights(const TensorInfo tensor_info, const arm_compute::WeightFormat weight_format) +{ + const DataLayout data_layout = tensor_info.data_layout(); + ARM_COMPUTE_EXPECT(data_layout == DataLayout::NHWC, framework::LogLevel::ERRORS); + const DataType data_type = tensor_info.data_type(); + const TensorShape tensor_shape = tensor_info.tensor_shape(); + const int N = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O + const int H = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)]; + const int W = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)]; + const int C = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I + + const int interleave_by = arm_compute::interleave_by(weight_format); + const int block_by = arm_compute::block_by(weight_format); + const int Ip = arm_gemm::roundup<unsigned int>(C, block_by); // C'=I' + const int Op = arm_gemm::roundup<unsigned int>(N, interleave_by); // O'=N' + + arm_compute::Strides strides_in_bytes = tensor_info.strides_in_bytes(); + strides_in_bytes.set(1, Ip * interleave_by * H * W * tensor_info.element_size()); + strides_in_bytes.set(2, Ip * Op * tensor_info.element_size()); + + const size_t offset_first_element_in_bytes = tensor_info.offset_first_element_in_bytes(); + + // Total size needs to include padded dimensions + const size_t total_size_in_bytes = Op * H * W * Ip * tensor_info.element_size(); + + const TensorShape TS(Ip, W, H, Op); + + TensorInfo new_tensor_info = tensor_info; + new_tensor_info.init(TS, 1 /*num_channels, deprecated*/, data_type, strides_in_bytes, + offset_first_element_in_bytes, total_size_in_bytes); + return new_tensor_info; +} + +template <typename ScalarType, typename AccessorType> +inline void rearrange_data(const AccessorType src, AccessorType dst, const arm_compute::WeightFormat weight_format) +{ + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format(weight_format), framework::LogLevel::ERRORS); + // Data Layout: OHWIo<interleave_by>i<block_by> + const int interleave_by = arm_compute::interleave_by(weight_format); + const int block_by = arm_compute::block_by(weight_format); + const TensorShape src_tensor_shape = src.shape(); + const DataLayout data_layout = src.data_layout(); + ARM_COMPUTE_EXPECT(data_layout == DataLayout::NHWC, framework::LogLevel::ERRORS); + const unsigned int O = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O + const unsigned int H = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)]; + const unsigned int W = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)]; + const unsigned int I = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I + const unsigned int Ip = arm_gemm::roundup<unsigned int>(I, block_by); // C'=I' + const unsigned int Op = arm_gemm::roundup<unsigned int>(O, interleave_by); // N'=O' + + ARM_COMPUTE_EXPECT_EQUAL(Op * H * W * Ip, (unsigned)dst.num_elements(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(src.num_elements() <= dst.num_elements(), framework::LogLevel::ERRORS); + + const ScalarType *src_ptr = reinterpret_cast<const ScalarType *>(src.data()); + ScalarType *dst_ptr = reinterpret_cast<ScalarType *>(dst.data()); + for(unsigned i = 0; i < I; ++i) + for(unsigned w = 0; w < W; ++w) + for(unsigned h = 0; h < H; ++h) + for(unsigned o = 0; o < O; ++o) + { + ScalarType src_element; + switch(data_layout) + { + case DataLayout::NHWC: + { + src_element = src_ptr[o * H * W * I + h * W * I + w * I + i]; + } + break; + default: + { + ARM_COMPUTE_ERROR("Unsupported memory layout."); + } + } + const int x5 = std::floor(((float)o) / interleave_by); + const int x4 = h; + const int x3 = w; + const int x2 = std::floor((float)i / block_by); + const int x1 = o % interleave_by; + const int x0 = i % block_by; + unsigned dst_idx = x5 * H * W * Ip * interleave_by + + x4 * W * Ip * interleave_by + + x3 * Ip * interleave_by + + x2 * interleave_by * block_by + + x1 * block_by + + x0; + dst_ptr[dst_idx] = src_element; + } +} + +template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math> +class VariableWeightsFixtureBaseClass : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataLayout data_layout, + const DataType data_type) + { + conv = std::make_unique<ConvolutionFunction>(); + // prepare data + _data_layout = data_layout; + // Fixed format kernels for variable weights can work only with NHWC format. + ARM_COMPUTE_EXPECT_EQUAL(_data_layout, DataLayout::NHWC, framework::LogLevel::ERRORS); + _data_type = data_type; + // run the code + compute_target(input_shape, weights_shape, bias_shape, output_shape, info, dilation); + compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation); + } + void teardown() + { + _target.allocator()->free(); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + +private: + virtual void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info, + const PadStrideInfo &conv_info, + const Size2D &dilation) = 0; + + void compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &conv_info, + const Size2D &dilation) + { + // The dataset is always in NCHW format - we need to make C the + // innermost dimension because the fixed-format kernel work only + // with NHWC layout. + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + const auto src_tensor_info = TensorInfo(input_shape, 1, _data_type, _data_layout); + const auto weight_tensor_info = TensorInfo(weights_shape, 1, _data_type, _data_layout); + const auto bias_tensor_info = TensorInfo(bias_shape, 1, _data_type, _data_layout); + auto dst_tensor_info = TensorInfo(output_shape, 1, _data_type, _data_layout); + + const int kernel_height = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT)]; + const int kernel_width = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH)]; + const int num_kernels = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::BATCHES)]; + + const WeightsInfo query_weights_info(/*reshape_weights*/ false, kernel_width, kernel_height, num_kernels, false, arm_compute::WeightFormat::ANY); + const bool kernel_found = bool(ConvolutionFunction::has_opt_impl(_computed_weight_format, &src_tensor_info, &weight_tensor_info, + &bias_tensor_info, &dst_tensor_info, conv_info, query_weights_info)); + // Make surethat the setup founds a fixed-format kernel as requested by the test case. + ARM_COMPUTE_EXPECT(kernel_found, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format(_computed_weight_format), framework::LogLevel::ERRORS); + + const WeightsInfo weights_info(/*reshape_weights*/ false, kernel_width, kernel_height, num_kernels, false, _computed_weight_format); + configure_and_execute_kernel(src_tensor_info, weight_tensor_info, bias_tensor_info, dst_tensor_info, weights_info, conv_info, + dilation); + } + void compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + const Size2D &dilation) + { + ARM_COMPUTE_UNUSED(input_shape, weights_shape, bias_shape, output_shape, info, + dilation); + + // Create reference + SimpleTensor<ScalarType> src{ input_shape, _data_type }; + SimpleTensor<ScalarType> weights{ weights_shape, _data_type }; + SimpleTensor<ScalarType> bias{ bias_shape, _data_type }; + fill(src, 0); + fill(bias, 1); + fill(weights, 3); + _reference = reference::convolution_layer<ScalarType>(src, weights, bias, output_shape, info, dilation, 1 /*num_groups*/); + } + DataLayout _data_layout{}; + DataType _data_type{}; + +protected: + std::unique_ptr<ConvolutionFunction> conv{}; + arm_compute::WeightFormat _computed_weight_format{ arm_compute::WeightFormat::UNSPECIFIED }; + TensorClass _target{}; + SimpleTensor<ScalarType> _reference{}; +}; + +template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math> +class VariableWeightsFixture : public VariableWeightsFixtureBaseClass<ConvolutionFunction, TensorClass, AccessorType, ScalarType, enable_fast_math> +{ + void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info, + const PadStrideInfo &conv_info, + const Size2D &dilation) + { + this->conv->configure(&src_tensor_info, &weight_tensor_info, &bias_tensor_info, &dst_tensor_info, conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math); + + // Allocate input tensors + auto src = create_tensor<TensorClass>(src_tensor_info); + auto weights_original = create_tensor<TensorClass>(weight_tensor_info); + const TensorInfo new_tensor_info = prepare_weights(weight_tensor_info, this->_computed_weight_format); + auto weights_transformed = create_tensor<TensorClass>(new_tensor_info); + auto bias = create_tensor<TensorClass>(bias_tensor_info); + src.allocator()->allocate(); + weights_original.allocator()->allocate(); + weights_transformed.allocator()->allocate(); + bias.allocator()->allocate(); + // Allocate destination tensor + this->_target = create_tensor<TensorClass>(dst_tensor_info); + this->_target.allocator()->allocate(); + + // Prepare source and biases that are left unchanged. + this->fill(AccessorType(src), 0); + this->fill(AccessorType(bias), 1); + + // First run + this->fill(AccessorType(weights_original), 2); + rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format); + ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weights_transformed }, { TensorType::ACL_SRC_2, &bias }, { TensorType::ACL_DST, &(this->_target) } }; + this->conv->run(run_pack); + // Second run, with new weights + this->fill(AccessorType(weights_original), 3); + rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format); + this->conv->run(run_pack); + src.allocator()->free(); + weights_original.allocator()->free(); + weights_transformed.allocator()->free(); + bias.allocator()->free(); + } +}; + +template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math> +class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass<ConvolutionFunction, TensorClass, AccessorType, ScalarType, enable_fast_math> +{ + void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info, + const PadStrideInfo &conv_info, + const Size2D &dilation) + { + // Allocate input tensors + auto src = create_tensor<TensorClass>(src_tensor_info); + auto weights_original = create_tensor<TensorClass>(weight_tensor_info); + const TensorInfo new_tensor_info = prepare_weights(weight_tensor_info, this->_computed_weight_format); + auto weights_transformed = create_tensor<TensorClass>(new_tensor_info); + auto bias = create_tensor<TensorClass>(bias_tensor_info); + src.allocator()->allocate(); + weights_original.allocator()->allocate(); + weights_transformed.allocator()->allocate(); + bias.allocator()->allocate(); + // Allocate destination tensor + this->_target = create_tensor<TensorClass>(dst_tensor_info); + this->_target.allocator()->allocate(); + this->conv->configure(&src, &weights_transformed, &bias, &(this->_target), conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math); + // Prepare source and biases that are left unchanged. + this->fill(AccessorType(src), 0); + this->fill(AccessorType(bias), 1); + + // First run + this->fill(AccessorType(weights_original), 2); + rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format); + this->conv->run(); + // Second run, with new weights + this->fill(AccessorType(weights_original), 3); + rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format); + this->conv->run(); + src.allocator()->free(); + weights_original.allocator()->free(); + weights_transformed.allocator()->free(); + bias.allocator()->free(); + } +}; + +template <typename ConvolutionClass, bool enable_fast_math> +class HasOptImplFixture : public framework::Fixture +{ +public: + void setup(DataType data_type, arm_compute::WeightFormat query_weight_format) + { + auto conv = std::make_unique<ConvolutionClass>(); + const auto src_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC); + const auto weight_info = TensorInfo(TensorShape(64, 3U, 3U, 64U), 1, enable_fast_math ? DataType::BFLOAT16 : data_type, DataLayout::NHWC); + const auto bias_info = TensorInfo(TensorShape(64U), 1, data_type, DataLayout::NHWC); + auto dst_info = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC); + const auto conv_info = PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR); + const WeightsInfo weights_info(false, 3U, 3U, 64U, false, query_weight_format); + _kernel_found = bool(ConvolutionClass::has_opt_impl(_computed_weight_format, &src_info, &weight_info, + &bias_info, &dst_info, conv_info, weights_info, + Size2D(1U, 1U) /*dilation*/, ActivationLayerInfo() /*act_info*/, enable_fast_math)); + } + +protected: + bool _kernel_found{ false }; + arm_compute::WeightFormat _computed_weight_format{ arm_compute::WeightFormat::UNSPECIFIED }; +}; +#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS + } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE */ + +#endif // ACL_TESTS_VALIDATION_FIXTURES_CONVOLUTIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/CopyFixture.h b/tests/validation/fixtures/CopyFixture.h index feb1d7dfb5..f5e711a500 100644 --- a/tests/validation/fixtures/CopyFixture.h +++ b/tests/validation/fixtures/CopyFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class CopyFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type) { _target = compute_target(input_shape, output_shape, data_type); @@ -61,7 +60,7 @@ protected: TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type) { // Check if indeed the input shape can be reshape to the output one - ARM_COMPUTE_EXPECT(input_shape.total_size() == output_shape.total_size(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size()); // Create tensors TensorType src = create_tensor<TensorType>(input_shape, data_type); @@ -72,15 +71,15 @@ protected: copy.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h index 4f6389155a..30a3fd8569 100644 --- a/tests/validation/fixtures/CropResizeFixture.h +++ b/tests/validation/fixtures/CropResizeFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" -#include "tests/RawLutAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" @@ -47,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class CropResizeFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value, bool is_outside_bounds, DataType data_type) { @@ -88,15 +86,15 @@ protected: FunctionType crop; crop.configure(&src, &boxes, &boxes_ind, &dst, crop_size, method, extrapolation_value); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index 6ea2335ae9..83170c413c 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,22 +42,24 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW> class DeconvolutionLayerFixtureBase : public framework::Fixture { public: using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value || std::is_same<typename std::decay<T>::type, int8_t>::value, int32_t, T >::type; public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, - DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias) + DataType data_type, DataType weights_data_type, DataLayout data_layout, + QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, QuantizationInfo weights_quantization_info, bool add_bias) { - _data_type = data_type; - _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - _data_layout = data_layout; - _input_quantization_info = input_quantization_info; - _output_quantization_info = output_quantization_info; + _data_type = data_type; + _weights_data_type = weights_data_type; + _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + _data_layout = data_layout; + _input_quantization_info = input_quantization_info; + _output_quantization_info = output_quantization_info; + _weights_quantization_info = weights_quantization_info; _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, add_bias); _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, add_bias); @@ -72,14 +74,34 @@ protected: case DataType::QASYMM8: { std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, i); break; } case DataType::QASYMM8_SIGNED: { std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second); + library->fill(tensor, distribution, i); + break; + } + case DataType::QSYMM8_PER_CHANNEL: + { + int min_bound = 128; + int max_bound = -127; + for(size_t i = 0; i < _input_quantization_info.scale().size(); i++) + { + std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f); + if(bounds.first < min_bound) + { + min_bound = bounds.first; + } + if(bounds.second > max_bound) + { + max_bound = bounds.second; + } + } + std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound); library->fill(tensor, distribution, i); break; } @@ -113,8 +135,7 @@ protected: { case DataType::S32: { - const int32_t value = static_cast<int32_t>(tensor.quantization_info().uniform().offset); - library->fill_tensor_value(tensor, value); + library->fill_tensor_value(tensor, 0); break; } case DataType::F16: @@ -140,7 +161,7 @@ protected: // Create tensors TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _input_quantization_info, _data_layout); - TensorType weights = create_tensor<TensorType>(weights_shape, _data_type, 1, _input_quantization_info, _data_layout); + TensorType weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, _output_quantization_info, _data_layout); @@ -148,13 +169,13 @@ protected: FunctionType conv; conv.configure(&src, &weights, add_bias ? &bias : nullptr, &dst, info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); if(add_bias) { - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); } - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -165,13 +186,13 @@ protected: } dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); if(add_bias) { - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); } - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); @@ -183,7 +204,6 @@ protected: // Compute DeconvolutionLayer function conv.run(); - return dst; } @@ -192,7 +212,7 @@ protected: { // Create reference SimpleTensor<T> src{ input_shape, _data_type, 1, _input_quantization_info }; - SimpleTensor<T> weights{ weights_shape, _data_type, 1, _input_quantization_info }; + SimpleTensor<TW> weights{ weights_shape, _weights_data_type, 1, _weights_quantization_info }; SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _input_quantization_info }; // Fill reference @@ -207,28 +227,27 @@ protected: { fill_zeros(bias); } - - return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, _output_quantization_info); + return reference::deconvolution_layer<T, TW>(src, weights, bias, output_shape, info, _output_quantization_info); } TensorType _target{}; SimpleTensor<T> _reference{}; DataType _data_type{}; + DataType _weights_data_type{}; DataType _bias_data_type{}; DataLayout _data_layout{}; QuantizationInfo _input_quantization_info{}; QuantizationInfo _output_quantization_info{}; + QuantizationInfo _weights_quantization_info{}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y> -class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T> +class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -236,20 +255,18 @@ public: TensorInfo input_info(input_shape, 1, data_type); TensorInfo weights_info(weights_shape, 1, data_type); TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); - DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, QuantizationInfo(), - QuantizationInfo(), add_bias); + DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_type, data_layout, QuantizationInfo(), + QuantizationInfo(), QuantizationInfo(), add_bias); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y> -class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T> +class DeconvolutionValidationAsymmFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top, unsigned int pad_bottom, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::CEIL); @@ -257,20 +274,18 @@ public: TensorInfo input_info(input_shape, 1, data_type); TensorInfo weights_info(weights_shape, 1, data_type); TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); - DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, QuantizationInfo(), - QuantizationInfo(), add_bias); + DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_type, data_layout, QuantizationInfo(), + QuantizationInfo(), QuantizationInfo(), add_bias); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y> -class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T> +class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -278,8 +293,38 @@ public: TensorInfo input_info(input_shape, 1, data_type, input_quantization_info); TensorInfo weights_info(weights_shape, 1, data_type, input_quantization_info); TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); - DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_layout, input_quantization_info, - output_quantization_info, add_bias); + DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, data_type, data_layout, + input_quantization_info, + output_quantization_info, input_quantization_info, add_bias); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW, unsigned int kernel_size_x, unsigned int kernel_size_y> +class DeconvolutionValidationQuantizedPerChannelFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, TW> +{ +public: + void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, + unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias, + DataType weights_data_type) + { + const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info); + TensorInfo input_info(input_shape, 1, data_type, input_quantization_info); + TensorInfo weights_info(weights_shape, 1, weights_data_type, input_quantization_info); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); + + std::vector<float> weights_scales{}; + std::mt19937 gen(library->seed()); + std::uniform_real_distribution<float> dis(0.01f, 1.f); + for(size_t i = 0; i < output_shape[2]; ++i) + { + weights_scales.push_back(dis(gen)); + } + DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T, TW>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, weights_data_type, data_layout, + input_quantization_info, + output_quantization_info, QuantizationInfo(weights_scales), add_bias); } }; diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h index 937a1a06a9..f55d20bf3e 100644 --- a/tests/validation/fixtures/DepthConvertLayerFixture.h +++ b/tests/validation/fixtures/DepthConvertLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DepthConvertLayerValidationBaseFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info) { _shift = shift; @@ -61,13 +60,13 @@ protected: if(is_data_type_quantized(tensor.data_type())) { std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, i); } else { - // When converting S32 to F16, both reference and Neon implementations are + or - infinity outside the F16 range. + // When converting S32 to F16, both reference and Compute Library implementations are + or - infinity outside the F16 range. if(dt_in == DataType::S32 && dt_out == DataType::F16) { std::uniform_int_distribution<int32_t> distribution_s32(-65504, 65504); @@ -90,15 +89,15 @@ protected: FunctionType depth_convert; depth_convert.configure(&src, &dst, policy, shift); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0, dt_in, dt_out); @@ -130,7 +129,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DepthConvertLayerValidationFixture : public DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2> { public: - template <typename...> void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) { DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, @@ -142,7 +140,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DepthConvertLayerValidationQuantizedFixture : public DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2> { public: - template <typename...> void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info) { DepthConvertLayerValidationBaseFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, diff --git a/tests/validation/fixtures/DepthToSpaceLayerFixture.h b/tests/validation/fixtures/DepthToSpaceLayerFixture.h index a254ba4322..abe3d8b22f 100644 --- a/tests/validation/fixtures/DepthToSpaceLayerFixture.h +++ b/tests/validation/fixtures/DepthToSpaceLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DepthToSpaceLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, int32_t block_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout) { _target = compute_target(input_shape, block_shape, output_shape, data_type, data_layout); @@ -73,15 +72,15 @@ protected: FunctionType depth_to_space; depth_to_space.configure(&input, &output, block_shape); - ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); // Allocate tensors input.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors fill(AccessorType(input), 0); diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index d9806b5c84..6e2e3a3846 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE -#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DEPTHWISECONVOLUTIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DEPTHWISECONVOLUTIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -38,6 +38,7 @@ #include "utils/Utils.h" +#include <cstdint> #include <random> namespace arm_compute @@ -54,35 +55,83 @@ class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixt public: using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; + void setup_quantization(TensorShape input_shape, TensorShape weights_shape, QuantizationInfo &input_q_info, + QuantizationInfo &weights_q_info, DataType data_type) + { + ARM_COMPUTE_UNUSED(input_shape); + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + _input_quantization_info = QuantizationInfo(scale_lhs, offset_lhs); + _weights_quantization_info = QuantizationInfo(scale_rhs, offset_rhs); + + QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(input_q_info, weights_q_info, + weights_shape.y() /* heights */, weights_shape.x() /* width */, 1 /* channels */, + data_type, 0.5f /* bias_fraction */); + + _output_quantization_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + } + public: - template <typename...> void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type, QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info, - DataLayout data_layout, ActivationLayerInfo act_info) + DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false, bool run_twice = false) { + ARM_COMPUTE_ERROR_ON(mixed_layout && in_place); + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + _hash = in_shape[0] + in_shape[1] + in_shape[2] + in_shape[3] + + kernel_size.width + kernel_size.height + dilation.x() + + dilation.y() + pad_stride_info.pad_bottom() + pad_stride_info.pad_left() + pad_stride_info.pad_right() + pad_stride_info.pad_top(); + + _mixed_layout = mixed_layout; _input_shape = in_shape; _input_data_type = input_data_type; _weights_data_type = weights_data_type; - _input_quantization_info = input_quantization_info; - _weights_quantization_info = weights_quantization_info; - _output_quantization_info = output_quantization_info; _data_layout = data_layout; _pad_stride_info = pad_stride_info; _act_info = act_info; _depth_multiplier = depth_multiplier; _dilation = dilation; + _in_place = in_place; + _run_twice = run_twice; _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type; _weights_shape = TensorShape(kernel_size.width, kernel_size.height); - const TensorInfo in_info(_input_shape, 1, _input_data_type); - const TensorInfo we_info(_weights_shape, 1, _weights_data_type); - _output_shape = compute_depthwise_convolution_shape(in_info, we_info, _pad_stride_info, _depth_multiplier, _dilation); + const TensorInfo in_info(_input_shape, 1, _input_data_type); + const TensorInfo we_info(_weights_shape, 1, _weights_data_type); + const ConvolutionInfo info{ _pad_stride_info, _depth_multiplier, _act_info, _dilation }; + _output_shape = compute_depthwise_convolution_shape(in_info, we_info, info); _weights_shape.set(2, _output_shape.z()); _biases_shape = TensorShape(_weights_shape[2]); + + _input_quantization_info = input_quantization_info; + _weights_quantization_info = weights_quantization_info; + _output_quantization_info = output_quantization_info; + + if(is_data_type_quantized(_input_data_type) && !is_data_type_quantized_symmetric(weights_data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY)) + { + setup_quantization(in_shape, _weights_shape, _input_quantization_info, _weights_quantization_info, _input_data_type); + _use_dynamic_output_quant = true; + } } void configure_target() @@ -99,18 +148,33 @@ public: } // Create tensors - _src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout); - _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); - _biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); - _target = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout); + _src = create_tensor<TensorType>(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout); + _weights = create_tensor<TensorType>(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); + if(_run_twice) { + _weights.info()->set_are_values_constant(false); + } + _biases = create_tensor<TensorType>(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); + TensorType *target_to_use = nullptr; + if(!_in_place) + { + _target = create_tensor<TensorType>(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout); + target_to_use = &_target; + } + + add_padding_x({ &_src, &_biases }, _data_layout); + add_padding_x({ &_weights }, _data_layout, true); + if(!_in_place) + { + add_padding_x({ &_target }, _data_layout); + } // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, &_target, _pad_stride_info, _depth_multiplier, _act_info, _dilation); + _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation); - ARM_COMPUTE_EXPECT(_src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_target.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_biases.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_target.info()->is_resizable()); } void allocate_and_run_target() @@ -119,20 +183,41 @@ public: _src.allocator()->allocate(); _weights.allocator()->allocate(); _biases.allocator()->allocate(); - _target.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!_src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_target.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!_src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable()); + + if(!_in_place) + { + _target.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); + } // Fill tensors - fill(AccessorType(_src), 0); - fill(AccessorType(_weights), 1); - fill(AccessorType(_biases), 2); + fill(AccessorType(_src), 0 + _hash); + fill(AccessorType(_weights), 1 + _hash); + fill(AccessorType(_biases), 2 + _hash); + + // Run with variable input + if(_run_twice) { + _dwc.run(); + + // Fill tensors with a new seed + fill(AccessorType(_src), 3 + _hash); + fill(AccessorType(_weights), 4 + _hash); + fill(AccessorType(_biases), 5 + _hash); + } - // Compute function - _dwc.run(); + if(_mixed_layout) + { + mix_layout(_dwc, _src, _target); + } + else + { + // Compute function + _dwc.run(); + } } void compute_reference() @@ -141,15 +226,41 @@ public: SimpleTensor<TW> weights{ _weights_shape, _weights_data_type, 1, _weights_quantization_info }; SimpleTensor<TBias> biases{ _biases_shape, _bias_data_type, 1, _input_quantization_info }; - fill(src, 0); - fill(weights, 1); - fill(biases, 2); + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(biases, 2 + _hash); + + if(_run_twice) { + SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info); + if(_act_info.enabled()) { + reference::activation_layer<T>(depth_out, _act_info); + } + + fill(src, 3 + _hash); + fill(weights, 4 + _hash); + fill(biases, 5 + _hash); + } SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, _output_shape, _pad_stride_info, _depth_multiplier, _dilation, _output_quantization_info); _reference = (_act_info.enabled()) ? reference::activation_layer<T>(depth_out, _act_info) : depth_out; } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + ARM_COMPUTE_ERROR_ON(_in_place); + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(_data_layout); + dst.info()->set_data_layout(_data_layout); + } + template <typename U> void fill(U &&tensor, int i) { @@ -157,32 +268,77 @@ protected: { case DataType::QASYMM8: { - std::uniform_int_distribution<uint8_t> distribution(0, 10); - library->fill(tensor, distribution, i); + if(_use_dynamic_output_quant) + { + std::uniform_int_distribution<int32_t> distribution(0, 255); + library->fill(tensor, distribution, i); + } + else + { + // Legacy initialization in case the output quantization info can't be reliably estimated + std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second); + library->fill(tensor, distribution, i); + } break; } case DataType::QASYMM8_SIGNED: + { + if(_use_dynamic_output_quant) + { + std::uniform_int_distribution<int32_t> distribution(-128, 127); + library->fill(tensor, distribution, i); + } + else + { + // Legacy initialization in case the output quantization info can't be reliably estimated + std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second); + library->fill(tensor, distribution, i); + } + break; + } case DataType::QSYMM8_PER_CHANNEL: { - std::uniform_int_distribution<int8_t> distribution(-10, 10); + int min_bound = 128; + int max_bound = -127; + for(size_t i = 0; i < _weights_quantization_info.scale().size(); i++) + { + std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i); + if(bounds.first < min_bound) + { + min_bound = bounds.first; + } + if(bounds.second > max_bound) + { + max_bound = bounds.second; + } + } + std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound); library->fill(tensor, distribution, i); break; } - case DataType::F16: + case DataType::S32: { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias); library->fill(tensor, distribution, i); break; } - case DataType::F32: + case DataType::BFLOAT16: { - std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + arm_compute::utils::uniform_real_distribution_16bit<bfloat16> distribution{ -1.0f, 1.0f }; library->fill(tensor, distribution, i); break; } - case DataType::S32: + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: { - std::uniform_int_distribution<int32_t> distribution(-100, 100); + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); library->fill(tensor, distribution, i); break; } @@ -214,19 +370,33 @@ protected: ActivationLayerInfo _act_info{}; unsigned int _depth_multiplier{}; Size2D _dilation{}; + bool _mixed_layout{ false }; + bool _in_place{ false }; + bool _run_twice{ false }; + bool _use_dynamic_output_quant{false}; + + int32_t _hash{0}; + // Random initialization limits + // Default values are previously handcrafted limits + // that sould be used when we don't use dynamic quantization + int32_t _min_bias{-100}; + int32_t _max_bias{100}; + int32_t _min_u8{0}; + int32_t _max_u8{50}; + int32_t _min_s8{-25}; + int32_t _max_s8{25}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false, bool run_twice = false> class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info) { DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), - data_layout, act_info); + data_layout, act_info, mixed_layout, in_place, run_twice); } }; @@ -234,7 +404,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DepthwiseConvolutionLayerNativeValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type, DataLayout data_layout) { @@ -249,7 +418,7 @@ public: if(padding_valid) { - _conv_info = PadStrideInfo(); + _conv_info = PadStrideInfo(stride.width, stride.height); } else { @@ -274,13 +443,20 @@ public: _biases = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); _target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); - // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, &_target, _conv_info, _depth_multiplier, _dilation); + add_padding_x({ &_src, &_biases, &_target }, _data_layout); + add_padding_x({ &_weights }, _data_layout, true); - ARM_COMPUTE_EXPECT(_src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_target.info()->is_resizable(), framework::LogLevel::ERRORS); + // Create Depthwise Convolution configure function + const ConvolutionInfo info + { + _conv_info, _depth_multiplier, ActivationLayerInfo(), _dilation + }; + _dwc.configure(_src.info(), _weights.info(), _biases.info(), _target.info(), info); + + ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_biases.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_target.info()->is_resizable()); } void allocate_and_run_target() @@ -291,18 +467,24 @@ public: _biases.allocator()->allocate(); _target.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!_src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_target.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!_src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); // Fill tensors fill(AccessorType(_src), 0); fill(AccessorType(_weights), 1); fill(AccessorType(_biases), 2); + arm_compute::ITensorPack pack; + pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_0, &_src); + pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_1, &_weights); + pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_2, &_biases); + pack.add_tensor(arm_compute::TensorType::ACL_DST, &_target); + // Compute function - _dwc.run(); + _dwc.run(pack); } void compute_reference() @@ -315,9 +497,9 @@ public: fill(weights, 1); fill(biases, 2); - const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), _conv_info, - _depth_multiplier, _dilation); - _reference = reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation); + const ConvolutionInfo info{ _conv_info, _depth_multiplier, ActivationLayerInfo(), _dilation }; + const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info); + _reference = reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation); } protected: @@ -355,20 +537,21 @@ protected: unsigned int _depth_multiplier{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool in_place = false> class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type, - DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0) + DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0, bool export_to_cl_image) { - _dilation = dilation; - _depth_multiplier = depth_multiplier; - _data_type = data_type; - _data_layout = data_layout; - _act_info = act_info; - _n0 = n0; + _dilation = dilation; + _depth_multiplier = depth_multiplier; + _data_type = data_type; + _data_layout = data_layout; + _act_info = act_info; + _n0 = n0; + _export_to_cl_image = export_to_cl_image; + _in_place = in_place; _input_shape = TensorShape(width, height, channel, batch); _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier); @@ -376,16 +559,29 @@ public: if(padding_valid) { - _conv_info = PadStrideInfo(); + _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation); } else { - _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation); + _conv_info = PadStrideInfo(stride.width, stride.height); } } void configure_target() { +#if defined(ARM_COMPUTE_OPENCL_ENABLED) + if(_export_to_cl_image) + { + _validate_output &= image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + _validate_output &= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0); + } +#endif // ARM_COMPUTE_OPENCL_ENABLED + + if(!_validate_output) + { + return; + } + TensorShape input_shape = _input_shape; TensorShape weights_shape = _weights_shape; @@ -396,50 +592,89 @@ public: } // Create tensors - _src = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout); - _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout); - _biases = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); - _target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); + _src = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout); + _weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout); + _biases = create_tensor<TensorType>(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); + TensorType *target_to_use = nullptr; + if(!_in_place) + { + _target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); + target_to_use = &_target; + } + + DWCComputeKernelInfo dwc_info; + dwc_info.n0 = _n0; + dwc_info.m0 = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1; + dwc_info.export_input_to_cl_image = false; + dwc_info.export_weights_to_cl_image = _export_to_cl_image; - DWCWeightsKernelInfo dwc_weights_info; - dwc_weights_info.n0 = _n0; + const ConvolutionInfo conv_kernel_info + { + _conv_info, _depth_multiplier, _act_info, _dilation + }; - DWCKernelInfo dwc_info; - dwc_info.activation_info = _act_info; + add_padding_x({ &_src, &_biases, &_target }, _data_layout); + add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation); + _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info); - ARM_COMPUTE_EXPECT(_src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_target.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_biases.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_target.info()->is_resizable()); } void allocate_and_run_target() { + if(!_validate_output) + { + return; + } + // Allocate tensors _src.allocator()->allocate(); _weights.allocator()->allocate(); _biases.allocator()->allocate(); - _target.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!_src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_biases.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_target.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!_src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable()); + if(!_in_place) + { + _target.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); + } // Fill tensors fill(AccessorType(_src), 0); fill(AccessorType(_weights), 1); fill(AccessorType(_biases), 2); + // Test Multi DataLayout graph cases, when the data layout changes after configure + _src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + if(!_in_place) + { + _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + } + // Compute function _dwc.run(); + + // Reinstating original data layout for the test suite to properly check the values + if(!_in_place) + { + _target.info()->set_data_layout(_data_layout); + } } void compute_reference() { + if(!_validate_output) + { + return; + } + SimpleTensor<T> src{ _input_shape, _data_type }; SimpleTensor<T> weights{ _weights_shape, _data_type }; SimpleTensor<T> biases{ _biases_shape, _data_type }; @@ -448,9 +683,9 @@ public: fill(weights, 1); fill(biases, 2); - const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), _conv_info, - _depth_multiplier, _dilation); - _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info); + const ConvolutionInfo info{ _conv_info, _depth_multiplier, _act_info, _dilation }; + const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info); + _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info); } protected: @@ -494,27 +729,28 @@ protected: Size2D _dilation{}; unsigned int _depth_multiplier{}; unsigned int _n0{}; + bool _export_to_cl_image{}; + bool _validate_output{ true }; + bool _in_place{ false }; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false, bool in_place = false> class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T> { public: - template <typename...> void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, ActivationLayerInfo act_info) { DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type, input_quantization_info, input_quantization_info, output_quantization_info, - data_layout, act_info); + data_layout, act_info, mixed_layout, in_place); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW, bool in_place = false> class DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW> { public: - template <typename...> void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, ActivationLayerInfo act_info) { @@ -532,10 +768,10 @@ public: DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, input_data_type, weights_data_type, input_quantization_info, QuantizationInfo(weights_scales), output_quantization_info, - data_layout, act_info); + data_layout, act_info, false, in_place); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_DEPTHWISECONVOLUTIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h index 1c1f46a64c..4eb25a5bc5 100644 --- a/tests/validation/fixtures/DequantizationLayerFixture.h +++ b/tests/validation/fixtures/DequantizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DequantizationValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType src_data_type, DataType dst_datatype, DataLayout data_layout) { _quantization_info = generate_quantization_info(src_data_type, shape.z()); @@ -77,15 +76,15 @@ protected: FunctionType dequantization_layer; dequantization_layer.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/DirectConvolution3DFixture.h b/tests/validation/fixtures/DirectConvolution3DFixture.h new file mode 100644 index 0000000000..e80ad2f54f --- /dev/null +++ b/tests/validation/fixtures/DirectConvolution3DFixture.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTION3DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTION3DFIXTURE_H + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/Conv3D.h" + +#include <random> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using namespace arm_compute::misc::shape_calculator; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DirectConvolution3DValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; + + void setup(const TensorShape &input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, + unsigned int num_kernels, bool has_bias, const ActivationLayerInfo &act_info, const DataType &data_type, const DataLayout &data_layout, + const QuantizationInfo &src_qinfo = QuantizationInfo(), const QuantizationInfo &weights_qinfo = QuantizationInfo(), const QuantizationInfo &dst_qinfo = QuantizationInfo()) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NDHWC); + + const TensorShape weights_shape(num_kernels, input_shape[0], kernel_width, kernel_height, kernel_depth); + const TensorShape bias_shape(num_kernels); + const DataType bias_data_type = is_data_type_quantized(data_type) ? DataType::S32 : data_type; + const Conv3dInfo conv3d_info(Size3D(stride_x, stride_y, stride_z), Padding3D(pad_x, pad_y, pad_z), act_info, Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false); + const TensorShape output_shape = compute_conv3d_shape(input_shape, weights_shape, conv3d_info); + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, bias_data_type, data_layout, src_qinfo, weights_qinfo, dst_qinfo); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv3d_info, has_bias, data_type, bias_data_type, src_qinfo, weights_qinfo, dst_qinfo); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const Conv3dInfo &conv3d_info, + bool has_bias, const DataType &data_type, const DataType &bias_data_type, const DataLayout &data_layout, const QuantizationInfo &src_qinfo, + const QuantizationInfo &weights_qinfo, const QuantizationInfo &dst_qinfo) + { + // Create tensors + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, src_qinfo, data_layout); + TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, weights_qinfo, data_layout); + TensorType bias = has_bias ? create_tensor<TensorType>(bias_shape, bias_data_type, 1, QuantizationInfo()) : TensorType(); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, dst_qinfo, data_layout); + + // Create and configure function + FunctionType conv{}; + conv.configure(&src, &weights, has_bias ? &bias : nullptr, &dst, conv3d_info); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src), 0); + fill(AccessorType(weights), 1); + + if(has_bias) + { + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + bias.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + fill(AccessorType(bias), 2); + } + + // Compute Direct Convolution 3D function + conv.run(); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, + const Conv3dInfo &conv3d_info, bool has_bias, const DataType &data_type, const DataType &bias_data_type, const QuantizationInfo &src_qinfo, + const QuantizationInfo &weights_qinfo, const QuantizationInfo &dst_qinfo) + { + // Create reference + SimpleTensor<T> src{ input_shape, data_type, 1, src_qinfo }; + SimpleTensor<T> weights{ weights_shape, data_type, 1, weights_qinfo }; + SimpleTensor<TBias> bias{ bias_shape, bias_data_type }; + SimpleTensor<T> dst{ output_shape, data_type, 1, dst_qinfo }; + + // Fill reference + fill(src, 0); + fill(weights, 1); + + if(has_bias) + { + fill(bias, 2); + } + + return reference::activation_layer(reference::conv3d<T, TBias>(src, weights, bias, dst, conv3d_info), conv3d_info.act_info); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DirectConvolution3DValidationFixture : public DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, + unsigned int num_kernels, bool has_bias, ActivationLayerInfo act_info, DataType data_type, DataLayout data_layout) + { + DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, stride_z, pad_x, pad_y, pad_z, kernel_width, kernel_height, + kernel_depth, num_kernels, has_bias, act_info, data_type, data_layout); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DirectConvolution3DValidationQuantizedFixture : public DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, int stride_x, int stride_y, int stride_z, int pad_x, int pad_y, int pad_z, unsigned int kernel_width, int kernel_height, int kernel_depth, + unsigned int num_kernels, bool has_bias, ActivationLayerInfo act_info, DataType data_type, DataLayout data_layout, QuantizationInfo src_qinfo, QuantizationInfo weights_qinfo, + QuantizationInfo dst_qinfo) + { + DirectConvolution3DValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, stride_z, pad_x, pad_y, pad_z, kernel_width, kernel_height, + kernel_depth, num_kernels, has_bias, act_info, data_type, data_layout, src_qinfo, + weights_qinfo, dst_qinfo); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTION3DFIXTURE_H diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h index 8e4de77535..6f204642ca 100644 --- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,6 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTIONLAYERFIXTURE_H + #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -51,12 +55,54 @@ class DirectConvolutionValidationGenericFixture : public framework::Fixture public: using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; - template <typename...> + void setup_quantization(const TensorShape &input_shape, const TensorShape &weights_shape, QuantizationInfo &input_q_info, + QuantizationInfo &weights_q_info, DataType data_type) + { + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + input_q_info = QuantizationInfo(scale_lhs, offset_lhs); + weights_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + QuantizationHint q_hint = suggest_conv_dst_q_info_and_bias(input_q_info, weights_q_info, + weights_shape.y() /* heights */, weights_shape.x() /* width */, input_shape.z() /* channels */, + data_type, 0.5f /* bias_fraction */); + + _dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + + // Do not change here as these limits are the natural limits of the associated data types and + // are embeded in the computation of the dst quantization info. + _min_u8 = 0; + _max_u8 = 255; + _min_s8 = -128; + _max_s8 = 127; + } + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, - DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout) + DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout, bool mixed_layout = false) { - _quantization_info = quantization_info; + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + + stride_x + stride_y + pad_x + pad_y + kernel_size + num_kernels + mixed_layout + + (data_layout == DataLayout::NHWC); + _data_type = data_type; + _mixed_layout = mixed_layout; TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); @@ -68,27 +114,66 @@ public: const TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, info); - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info, data_layout); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info); + QuantizationInfo input_q_info = quantization_info; + QuantizationInfo weights_q_info = quantization_info; + _dst_q_info = quantization_info; + + if(is_data_type_quantized(data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY)) + { + setup_quantization(input_shape, weights_shape, input_q_info, weights_q_info, data_type); + } + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info); } - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout) { ARM_COMPUTE_ERROR_ON(data_layout == DataLayout::UNKNOWN); ARM_COMPUTE_UNUSED(dilation); - _quantization_info = quantization_info; + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + _hash = input_shape[0] + input_shape[1] + input_shape[2] + input_shape[3] + + weights_shape[0] + weights_shape[1] + weights_shape[2] + weights_shape[3] + dilation.x() + + dilation.y() + info.pad_bottom() + info.pad_left() + info.pad_right() + info.pad_top(); + _data_type = data_type; const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info, data_layout); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info, act_info); + QuantizationInfo input_q_info = quantization_info; + QuantizationInfo weights_q_info = quantization_info; + _dst_q_info = quantization_info; + + if(is_data_type_quantized(data_type) && (!act_info.enabled() || act_info.activation() == ActivationFunction::IDENTITY)) + { + setup_quantization(input_shape, weights_shape, input_q_info, weights_q_info, data_type); + } + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, input_q_info, weights_q_info, act_info); } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + DataLayout data_layout = src.info()->data_layout(); + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout); + dst.info()->set_data_layout(data_layout); + } + template <typename U> void fill(U &&tensor, int i) { @@ -96,14 +181,14 @@ protected: { case DataType::QASYMM8: { - std::uniform_int_distribution<uint8_t> distribution(0, 50); + std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8); library->fill(tensor, distribution, i); break; } case DataType::QASYMM8_SIGNED: { // Use small input range to avoid all the test results being saturated at the end. - std::uniform_int_distribution<int8_t> distribution(-25, 25); + std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8); library->fill(tensor, distribution, i); break; } @@ -121,7 +206,7 @@ protected: } case DataType::S32: { - std::uniform_int_distribution<int32_t> distribution(-5, 5); + std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias); library->fill(tensor, distribution, i); break; } @@ -131,7 +216,7 @@ protected: } TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info, - DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, const DataLayout &data_layout) + DataType data_type, DataType bias_data_type, QuantizationInfo input_q_info, QuantizationInfo weights_q_info, ActivationLayerInfo act_info, const DataLayout &data_layout) { if(data_layout == DataLayout::NHWC) { @@ -141,19 +226,22 @@ protected: } // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, quantization_info, data_layout); - TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, quantization_info, data_layout); - TensorType bias = create_tensor<TensorType>(bias_shape, bias_data_type, 1, quantization_info); - TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, quantization_info, data_layout); + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, input_q_info, data_layout); + TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, weights_q_info, data_layout); + TensorType bias = create_tensor<TensorType>(bias_shape, bias_data_type, 1, QuantizationInfo()); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, _dst_q_info, data_layout); + + add_padding_x({ &src, &bias, &dst }, data_layout); + add_padding_x({ &weights }, data_layout, input_shape[0] % 4 == 0); // Don't add left padding if cl image will be used // Create and configure function FunctionType conv; conv.configure(&src, &weights, &bias, &dst, info, act_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -161,67 +249,86 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors - fill(AccessorType(src), 0); - fill(AccessorType(weights), 1); - fill(AccessorType(bias), 2); + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(weights), 1 + _hash); + fill(AccessorType(bias), 2 + _hash); - // Compute NEConvolutionLayer function - conv.run(); + if(_mixed_layout) + { + mix_layout(conv, src, dst); + } + else + { + // Compute Convolution function + conv.run(); + } return dst; } SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info) + DataType data_type, DataType bias_data_type, QuantizationInfo input_q_info, QuantizationInfo weights_q_info, ActivationLayerInfo act_info) { // Create reference - SimpleTensor<T> src{ input_shape, data_type, 1, quantization_info }; - SimpleTensor<T> weights{ weights_shape, data_type, 1, quantization_info }; - SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, quantization_info }; + SimpleTensor<T> src{ input_shape, data_type, 1, input_q_info }; + SimpleTensor<T> weights{ weights_shape, data_type, 1, weights_q_info }; + SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, QuantizationInfo() }; // Fill reference - fill(src, 0); - fill(weights, 1); - fill(bias, 2); + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(bias, 2 + _hash); - SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info); - return (act_info.enabled()) ? reference::activation_layer<T>(dst, act_info) : dst; + SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info, + Size2D(1U, 1U) /* dilation */, 1 /* num_groups */, _dst_q_info); + SimpleTensor<T> dst2 = (act_info.enabled()) ? reference::activation_layer<T>(dst, act_info) : dst; + return dst2; } TensorType _target{}; SimpleTensor<T> _reference{}; - QuantizationInfo _quantization_info{}; + QuantizationInfo _dst_q_info{}; DataType _data_type{}; + bool _mixed_layout{ false }; + int32_t _hash{0}; + + // Random initialization limits + // Default values are previously handcrafted limits + // that sould be used when we don't use dynamic quantization + int32_t _min_bias{-5}; + int32_t _max_bias{5}; + int32_t _min_u8{0}; + int32_t _max_u8{50}; + int32_t _min_s8{-25}; + int32_t _max_s8{25}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class DirectConvolutionValidationFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, ActivationLayerInfo act_info, DataLayout data_layout) { DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(), - act_info, data_layout); + act_info, data_layout, mixed_layout); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class DirectConvolutionValidationQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout) { DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, quantization_info, - act_info, data_layout); + act_info, data_layout, mixed_layout); } }; @@ -229,7 +336,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DirectConvolutionValidationWithTensorShapesQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataLayout data_layout) { @@ -242,7 +348,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DirectConvolutionValidationWithTensorShapesFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type, ActivationLayerInfo act_info) { @@ -254,3 +359,5 @@ public: } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DIRECTCONVOLUTIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h deleted file mode 100644 index 6ef30d3c21..0000000000 --- a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/Helpers.h" -#include "tests/validation/fixtures/ConvolutionLayerFixture.h" -#include "tests/validation/reference/ConvolutionLayer.h" - -#include <random> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DirectConvolutionValidationGenericTensorShiftFixture : public framework::Fixture -{ -public: - using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type; - -public: - template <typename...> - void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, - DataType data_type, QuantizationInfo quantization_info) - { - _quantization_info = quantization_info; - _data_type = data_type; - - const TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); - const TensorShape bias_shape(num_kernels); - const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); - const TensorShape output_shape = get_output_shape(input_shape, weights_shape, info); - const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info); - } - - template <typename...> - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y, - DataType data_type, QuantizationInfo quantization_info) - { - ARM_COMPUTE_UNUSED(dilation_x, dilation_y); - - _quantization_info = quantization_info; - _data_type = data_type; - - const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info); - } - -protected: - template <typename U> - void fill(U &&tensor, int i) - { - switch(tensor.data_type()) - { - case DataType::QASYMM8: - { - std::uniform_int_distribution<uint8_t> distribution(0, 50); - library->fill(tensor, distribution, i); - break; - } - case DataType::F16: - { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, i); - break; - } - case DataType::S32: - { - std::uniform_int_distribution<int32_t> distribution(-5, 5); - library->fill(tensor, distribution, i); - break; - } - default: - library->fill_tensor_uniform(tensor, i); - } - } - - TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info) - { - // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, quantization_info); - TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, quantization_info); - TensorType bias = create_tensor<TensorType>(bias_shape, bias_data_type, 1, quantization_info); - TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, quantization_info); - - TensorShape output_shape1 = get_output_shape(output_shape, weights_shape, info); - TensorType dst1 = create_tensor<TensorType>(output_shape1, data_type, 1, quantization_info); - - // Create and configure function - FunctionType conv; - conv.configure(&src, &weights, &bias, &dst, info); - FunctionType conv1; - conv1.configure(&dst, &weights, &bias, &dst1, info); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst1.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - dst1.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst1.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - fill(AccessorType(src), 0); - fill(AccessorType(weights), 1); - fill(AccessorType(bias), 2); - - // Compute NEConvolutionLayer function - GCScheduler::get().memory_barrier(); - conv.run(); - GCScheduler::get().memory_barrier(); - conv1.run(); - - return dst1; - } - - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info) - { - // Create reference - SimpleTensor<T> src{ input_shape, data_type, 1, quantization_info }; - SimpleTensor<T> weights{ weights_shape, data_type, 1, quantization_info }; - SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, quantization_info }; - - SimpleTensor<T> dst{ output_shape, data_type, 1, quantization_info }; - TensorShape output_shape1 = get_output_shape(output_shape, weights_shape, info); - - // Fill reference - fill(src, 0); - fill(weights, 1); - fill(bias, 2); - - dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info); - return reference::convolution_layer<T>(dst, weights, bias, output_shape1, info); - } - - TensorType _target{}; - SimpleTensor<T> _reference{}; - QuantizationInfo _quantization_info{}; - DataType _data_type{}; - -private: - TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &info) - { - TensorShape out_shape(in_shape); - const std::pair<unsigned int, unsigned int> scaled_dims = scaled_dimensions(in_shape.x(), - in_shape.y(), - kernel_shape.x(), - kernel_shape.y(), - info); - out_shape.set(0, scaled_dims.first); - out_shape.set(1, scaled_dims.second); - out_shape.set(2, kernel_shape[3]); - return out_shape; - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DirectConvolutionValidationTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type) - { - DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, - QuantizationInfo()); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DirectConvolutionValidationQuantizedTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info) - { - DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, - quantization_info); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DirectConvolutionValidationWithTensorShapesQuantizedTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y, - DataType data_type, QuantizationInfo quantization_info) - { - DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation_x, dilation_y, data_type, - quantization_info); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DirectConvolutionValidationWithTensorShapesTensorShiftFixture : public DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y, - DataType data_type) - { - DirectConvolutionValidationGenericTensorShiftFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation_x, dilation_y, data_type, - QuantizationInfo()); - } -}; - -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/fixtures/DropoutLayerFixture.h b/tests/validation/fixtures/DropoutLayerFixture.h index 63df936032..a84f2a6407 100644 --- a/tests/validation/fixtures/DropoutLayerFixture.h +++ b/tests/validation/fixtures/DropoutLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DropoutLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, float ratio, bool forward, DataType data_type) { _target = compute_target(shape, ratio, forward, data_type); @@ -70,17 +69,17 @@ protected: FunctionType dropout_layer; dropout_layer.configure(&src, &mask, &dst, ratio, forward); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); mask.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!mask.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!mask.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/ElementWiseUnaryFixture.h b/tests/validation/fixtures/ElementWiseUnaryFixture.h deleted file mode 100644 index 8cffef48f6..0000000000 --- a/tests/validation/fixtures/ElementWiseUnaryFixture.h +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE -#define ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/reference/ElementWiseUnary.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class ElementWiseUnaryValidationFixture : public framework::Fixture -{ -public: - template <typename...> - void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op) - { - _op = op; - _target = compute_target(input_shape, input_data_type, in_place); - _reference = compute_reference(input_shape, input_data_type); - } - -protected: - template <typename U> - void fill(U &&tensor, int i, DataType data_type) - { - using FloatType = typename std::conditional < std::is_same<T, half>::value || std::is_floating_point<T>::value, T, float >::type; - using FloatDistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<FloatType>>::type; - - switch(_op) - { - case ElementWiseUnary::EXP: - { - FloatDistributionType distribution{ FloatType(-1.0f), FloatType(1.0f) }; - library->fill(tensor, distribution, i); - break; - } - case ElementWiseUnary::RSQRT: - { - FloatDistributionType distribution{ FloatType(1.0f), FloatType(2.0f) }; - library->fill(tensor, distribution, i); - break; - } - case ElementWiseUnary::ABS: - case ElementWiseUnary::NEG: - { - switch(data_type) - { - case DataType::F16: - { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -2.0f, 2.0f }; - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - FloatDistributionType distribution{ FloatType(-2.0f), FloatType(2.0f) }; - library->fill(tensor, distribution, i); - break; - } - case DataType::S32: - { - std::uniform_int_distribution<int32_t> distribution(-100, 100); - library->fill(tensor, distribution, i); - break; - } - default: - ARM_COMPUTE_ERROR("DataType for Elementwise Negation Not implemented"); - } - break; - } - case ElementWiseUnary::LOG: - { - FloatDistributionType distribution{ FloatType(0.0000001f), FloatType(100.0f) }; - library->fill(tensor, distribution, i); - break; - } - case ElementWiseUnary::SIN: - { - FloatDistributionType distribution{ FloatType(-100.00f), FloatType(100.00f) }; - library->fill(tensor, distribution, i); - break; - } - case ElementWiseUnary::ROUND: - { - FloatDistributionType distribution{ FloatType(100.0f), FloatType(-100.0f) }; - library->fill(tensor, distribution, i); - break; - } - default: - ARM_COMPUTE_ERROR("Not implemented"); - } - } - - TensorType compute_target(const TensorShape &shape, DataType data_type, bool in_place) - { - // Create tensors - TensorType src = create_tensor<TensorType>(shape, data_type); - TensorType dst = create_tensor<TensorType>(shape, data_type); - - TensorType *actual_dst = in_place ? &src : &dst; - - // Create and configure function - FunctionType elwiseunary_layer; - elwiseunary_layer.configure(&src, actual_dst); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - src.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - if(!in_place) - { - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - } - - // Fill tensors - fill(AccessorType(src), 0, data_type); - - // Compute function - elwiseunary_layer.run(); - - if(in_place) - { - return src; - } - else - { - return dst; - } - } - - SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type) - { - // Create reference - SimpleTensor<T> src{ shape, data_type }; - - // Fill reference - fill(src, 0, data_type); - - return reference::elementwise_unary<T>(src, _op); - } - - TensorType _target{}; - SimpleTensor<T> _reference{}; - ElementWiseUnary _op{}; -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class RsqrtValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class ExpValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::EXP); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class NegValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::NEG); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class NegValidationInPlaceFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type, bool in_place) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, in_place, ElementWiseUnary::NEG); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class LogValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::LOG); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class AbsValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ABS); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class SinValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::SIN); - } -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class RoundValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> -{ -public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type) - { - ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ROUND); - } -}; -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE */ diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h index dcb408c801..f36a1f75b7 100644 --- a/tests/validation/fixtures/ElementwiseOperationsFixture.h +++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,11 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_FIXTURE -#define ARM_COMPUTE_TEST_ELEMENTWISE_OPERATIONS_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEOPERATIONSFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEOPERATIONSFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -45,12 +46,14 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticOperationsGenericFixture : public framework::Fixture { public: - template <typename...> void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace = false, bool use_dynamic_shape = false) { - _op = op; + _op = op; + _use_dynamic_shape = use_dynamic_shape; + _is_inplace = is_inplace; + _target = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out); _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out); } @@ -83,26 +86,67 @@ protected: QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) { // Create tensors - TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0); - TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1); - TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out); + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0); + TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1); + TensorType dst = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out); + + // Check whether do in-place computation and whether inputs are broadcast compatible + TensorType *actual_dst = &dst; + if(_is_inplace) + { + bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (data_type0 == output_data_type); + bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (data_type1 == output_data_type); + bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace); + ARM_COMPUTE_ASSERT(do_in_place); + + if(src1_is_inplace) + { + actual_dst = &ref_src1; + } + else + { + actual_dst = &ref_src2; + } + } + + // if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes. + // - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic() + // - After configure, tensors are marked as static for run using set_tensor_static() + // - The tensors with static shape are given to run() + if(_use_dynamic_shape) + { + set_tensor_dynamic(ref_src1); + set_tensor_dynamic(ref_src2); + } // Create and configure function FunctionType elem_op; - elem_op.configure(&ref_src1, &ref_src2, &dst); + elem_op.configure(&ref_src1, &ref_src2, actual_dst); - ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + if(_use_dynamic_shape) + { + set_tensor_static(ref_src1); + set_tensor_static(ref_src2); + } + + ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable()); // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); - dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + // If don't do in-place computation, still need to allocate original dst + if(!_is_inplace) + { + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } + + ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable()); // Fill tensors fill(AccessorType(ref_src1), 0); @@ -111,7 +155,7 @@ protected: // Compute function elem_op.run(); - return dst; + return std::move(*actual_dst); } SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, @@ -133,6 +177,8 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; ArithmeticOperation _op{ ArithmeticOperation::ADD }; + bool _use_dynamic_shape{ false }; + bool _is_inplace{ false }; }; // Arithmetic operation fused with activation function @@ -140,15 +186,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticOperationsFuseActivationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace = true) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); - _act_info = act_info; + qinfo0, qinfo1, qinfo_out, is_inplace); + _act_info = act_info; + _is_inplace = is_inplace; } protected: @@ -156,26 +202,51 @@ protected: QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) { // Create tensors - TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0); - TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1); - TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out); + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0); + TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1); + TensorType dst = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out); + + // Check whether do in-place computation and whether inputs are broadcast compatible + TensorType *actual_dst = &dst; + if(_is_inplace) + { + bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (data_type0 == output_data_type); + bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (data_type1 == output_data_type); + bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace); + ARM_COMPUTE_ASSERT(do_in_place); + + if(src1_is_inplace) + { + actual_dst = &ref_src1; + } + else + { + actual_dst = &ref_src2; + } + } // Create and configure function FunctionType elem_op; - elem_op.configure(&ref_src1, &ref_src2, &dst, _act_info); + elem_op.configure(&ref_src1, &ref_src2, actual_dst, _act_info); - ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable()); // Allocate tensors ref_src1.allocator()->allocate(); ref_src2.allocator()->allocate(); - dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + // If don't do in-place computation, still need to allocate original dst + if(!_is_inplace) + { + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } + + ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable()); // Fill tensors fill(AccessorType(ref_src1), 0); @@ -184,7 +255,7 @@ protected: // Compute function elem_op.run(); - return dst; + return std::move(*actual_dst); } SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, @@ -197,18 +268,18 @@ protected: } ActivationLayerInfo _act_info{}; + bool _is_inplace{ false }; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> class ArithmeticDivisionBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -216,12 +287,35 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticDivisionValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ArithmeticDivisionBroadcastDynamicShapeValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) + { + ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1, + data_type0, data_type1, output_data_type, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace, true /* use_dynamic_shape */); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ArithmeticDivisionDynamicShapeValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) + { + ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape, + data_type0, data_type1, output_data_type, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace, true /* use_dynamic_shape */); } }; @@ -229,12 +323,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticDivisionBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -242,12 +335,23 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticDivisionValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ArithmeticDivisionValidationIntegerFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) + { + ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape, + data_type0, data_type1, output_data_type, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -255,14 +359,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ArithmeticDivisionValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -270,12 +373,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMaxBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -283,12 +385,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMaxValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -296,12 +397,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMaxBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -309,12 +409,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMaxValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -322,14 +421,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMaxValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -337,14 +435,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMaxQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -352,12 +449,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMinBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -365,12 +461,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMinValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -378,12 +473,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMinBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -391,12 +485,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMinValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -404,14 +497,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMinValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -419,14 +511,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseMinQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -434,12 +525,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseSquaredDiffBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -447,12 +537,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseSquaredDiffValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -460,12 +549,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseSquaredDiffBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -473,12 +561,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseSquaredDiffValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -486,14 +573,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseSquaredDiffValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -501,14 +587,13 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwiseSquaredDiffQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, - QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) + QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1, data_type0, data_type1, output_data_type, - qinfo0, qinfo1, qinfo_out); + qinfo0, qinfo1, qinfo_out, is_inplace); } }; @@ -516,7 +601,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PReluLayerBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape0, shape1, @@ -529,7 +613,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PReluLayerValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape, shape, @@ -542,7 +625,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PReluLayerValidationQuantizedFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) @@ -557,7 +639,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PReluLayerQuantizedBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out) @@ -572,12 +653,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwisePowerBroadcastValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -585,12 +665,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwisePowerValidationFixture : public ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace) { ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo()); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace); } }; @@ -598,12 +677,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwisePowerBroadcastValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; @@ -611,16 +689,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ElementwisePowerValidationFloatFixture : public ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info) + void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace) { ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape, shape, data_type0, data_type1, output_data_type, - QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info); + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ARITHMETIC_OPERATIONS_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_ELEMENTWISEOPERATIONSFIXTURE_H diff --git a/tests/validation/fixtures/ElementwiseUnaryFixture.h b/tests/validation/fixtures/ElementwiseUnaryFixture.h new file mode 100644 index 0000000000..15344288db --- /dev/null +++ b/tests/validation/fixtures/ElementwiseUnaryFixture.h @@ -0,0 +1,447 @@ +/* + * Copyright (c) 2018-2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE +#define ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE + +#include "arm_compute/core/QuantizationInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ElementwiseUnary.h" + +#include <tuple> +#include <limits> +#include <type_traits> +#include <vector> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ElementWiseUnaryValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op, + bool use_dynamic_shape = false, QuantizationInfo qinfo = QuantizationInfo(), QuantizationInfo qinfo_out = QuantizationInfo()) + { + _op = op; + _target = compute_target(input_shape, input_data_type, in_place, qinfo, qinfo_out); + _reference = compute_reference(input_shape, input_data_type, qinfo, qinfo_out); + _use_dynamic_shape = use_dynamic_shape; + } + +protected: + template <typename U> + void fill(U &&tensor, int i, DataType data_type) + { + using FloatType = typename std::conditional < std::is_same<T, half>::value || std::is_floating_point<T>::value, T, float >::type; + using FloatDistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<FloatType>>::type; + + switch(_op) + { + case ElementWiseUnary::EXP: + { + switch(data_type) + { + case DataType::F32: + { + FloatDistributionType distribution{ FloatType(-86.63f), FloatType(88.36f) }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::F16: + { + FloatDistributionType distribution{ FloatType(-9.00f), FloatType(10.73f) }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + library->fill_tensor_uniform(tensor, i); + break; + + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + + break; + } + case ElementWiseUnary::RSQRT: + case ElementWiseUnary::LOG: + { + // For floating-point data type, the chosen input range is all positive numbers + // (i.e. positive and negative zeros are excluded). + switch(data_type) + { + case DataType::F32: + { + FloatDistributionType distribution{ std::numeric_limits<float>::min(), std::numeric_limits<float>::max() }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::F16: + { + FloatDistributionType distribution{ FloatType(0.00006103515625f), FloatType(65504.0f) }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + library->fill_tensor_uniform(tensor, i); + break; + + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + + break; + } + case ElementWiseUnary::SIN: + { + switch(data_type) + { + case DataType::F32: + case DataType::F16: + { + FloatDistributionType distribution{ FloatType(-100.0f), FloatType(100.0f) }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::S32: + { + std::uniform_int_distribution<int32_t> distribution(std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max()); + library->fill(tensor, distribution, i); + break; + } + + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + library->fill_tensor_uniform(tensor, i); + break; + + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + + break; + } + case ElementWiseUnary::ABS: + case ElementWiseUnary::NEG: + case ElementWiseUnary::ROUND: + { + switch(data_type) + { + case DataType::F32: + { + FloatDistributionType distribution{ std::numeric_limits<float>::lowest() / 2, std::numeric_limits<float>::max() / 2 }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::F16: + { + FloatDistributionType distribution{ FloatType(-65504.0f), FloatType(65504.0f) }; + library->fill(tensor, distribution, i); + break; + } + + case DataType::S32: + { + std::uniform_int_distribution<int32_t> distribution(std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max()); + library->fill(tensor, distribution, i); + break; + } + + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + library->fill_tensor_uniform(tensor, i); + break; + + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + + break; + } + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + } + + TensorType compute_target(const TensorShape &shape, DataType data_type, bool in_place, QuantizationInfo qinfo, QuantizationInfo qinfo_out) + { + // Create tensors + TensorType src = create_tensor<TensorType>(shape, data_type, 1, qinfo); + TensorType dst = create_tensor<TensorType>(shape, data_type, 1, qinfo_out); + TensorType *actual_dst = in_place ? &src : &dst; + + // if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes. + // - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic() + // - After configure, tensors are marked as static for run using set_tensor_static() + // - The tensors with static shape are given to run() + if(_use_dynamic_shape) + { + set_tensor_dynamic(src); + } + + // Create and configure function + FunctionType elwiseunary_layer; + elwiseunary_layer.configure(&src, actual_dst); + + if(_use_dynamic_shape) + { + set_tensor_static(src); + } + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + src.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + if(!in_place) + { + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + dst.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } + + // Fill tensors + fill(AccessorType(src), 0, data_type); + + // Compute function + elwiseunary_layer.run(); + + if(in_place) + { + return src; + } + else + { + return dst; + } + } + + SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo, QuantizationInfo qinfo_out) + { + // Create reference + SimpleTensor<T> src{ shape, data_type, 1, qinfo }; + SimpleTensor<T> dst{ shape, data_type, 1, qinfo_out }; + + // Fill reference + fill(src, 0, data_type); + + return reference::elementwise_unary<T>(src, dst, _op); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + ElementWiseUnary _op{}; + bool _use_dynamic_shape{ false }; + QuantizationInfo _input_qinfo{}; + QuantizationInfo _output_qinfo{}; +}; +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class RsqrtQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo, QuantizationInfo qinfo_out) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT, false, qinfo, qinfo_out); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class RsqrtValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class RsqrtDynamicShapeValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT, true); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ExpValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::EXP); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ExpQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::EXP, false, iq, oq); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class NegValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::NEG); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class NegQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::NEG, false, iq, oq); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class NegValidationInPlaceFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, bool in_place) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, in_place, ElementWiseUnary::NEG); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class NegQuantizedValidationInPlaceFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, bool in_place, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, in_place, ElementWiseUnary::NEG, false, iq, oq); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class LogValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::LOG); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class LogQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::LOG, false, iq, oq); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class AbsValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ABS); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class AbsQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ABS, false, iq, oq); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class SinValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::SIN); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class SinQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::SIN, false, iq, oq); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class RoundValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ROUND); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class RoundQuantizedValidationFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape, DataType data_type, QuantizationInfo iq, QuantizationInfo oq) + { + ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ROUND, false, iq, oq); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_ELEMENTWISE_UNARY_FIXTURE */ diff --git a/tests/validation/fixtures/FFTFixture.h b/tests/validation/fixtures/FFTFixture.h index 86a97272a0..024227b22a 100644 --- a/tests/validation/fixtures/FFTFixture.h +++ b/tests/validation/fixtures/FFTFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class FFTValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -88,15 +87,17 @@ protected: FunctionType fft; fft.configure(&src, &dst, InfoType()); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &dst }); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -132,18 +133,32 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class FFTConvolutionValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, - DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info) + DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false) { - _data_type = data_type; - _data_layout = data_layout; + _mixed_layout = mixed_layout; + _data_type = data_type; + _data_layout = data_layout; _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info); _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info); } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(_data_layout); + dst.info()->set_data_layout(_data_layout); + } + template <typename U> void fill(U &&tensor, int i) { @@ -185,14 +200,16 @@ protected: TensorType bias = create_tensor<TensorType>(bias_shape, _data_type, 1, QuantizationInfo(), _data_layout); TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, QuantizationInfo(), _data_layout); + add_padding_x({ &src, &weights, &bias, &dst }, _data_layout); + // Create and configure function FunctionType conv; conv.configure(&src, &weights, &bias, &dst, info, act_info, _data_type == DataType::F16); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -200,19 +217,25 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); fill(AccessorType(weights), 1); fill(AccessorType(bias), 2); - // Compute convolution function - conv.run(); - + if(_mixed_layout) + { + mix_layout(conv, src, dst); + } + else + { + // Compute Convolution function + conv.run(); + } return dst; } @@ -239,18 +262,18 @@ protected: SimpleTensor<T> _reference{}; DataType _data_type{}; DataLayout _data_layout{}; + bool _mixed_layout{ false }; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class FFTConvolutionValidationFixture : public FFTConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info) { FFTConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, - data_type, data_layout, act_info); + data_type, data_layout, act_info, mixed_layout); } }; } // namespace validation diff --git a/tests/validation/fixtures/FillFixture.h b/tests/validation/fixtures/FillFixture.h index 706c13565d..0239a68903 100644 --- a/tests/validation/fixtures/FillFixture.h +++ b/tests/validation/fixtures/FillFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class FillFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, DataType data_type) { _target = compute_target(input_shape, data_type); diff --git a/tests/validation/fixtures/FlattenLayerFixture.h b/tests/validation/fixtures/FlattenLayerFixture.h index 67c4d2a2b1..e72487c7cf 100644 --- a/tests/validation/fixtures/FlattenLayerFixture.h +++ b/tests/validation/fixtures/FlattenLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class FlattenLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { TensorShape shape_flatten; @@ -83,15 +82,15 @@ protected: FunctionType flatten_layer; flatten_layer.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/FloorFixture.h b/tests/validation/fixtures/FloorFixture.h index 9388486983..7d38666f47 100644 --- a/tests/validation/fixtures/FloorFixture.h +++ b/tests/validation/fixtures/FloorFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class FloorValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -68,15 +67,15 @@ protected: FunctionType floor_func; floor_func.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h index 3760cfb8b7..344187868f 100644 --- a/tests/validation/fixtures/FullyConnectedLayerFixture.h +++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_FULLY_CONNECTED_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_FULLY_CONNECTED_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_FULLYCONNECTEDLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_FULLYCONNECTEDLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -34,6 +34,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" +#include "tests/validation/Validation.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/FullyConnectedLayer.h" #include "tests/validation/reference/Utils.h" @@ -54,16 +55,63 @@ public: using TBias = typename std::conditional < (std::is_same<TDecay, uint8_t>::value || std::is_same<TDecay, int8_t>::value), int32_t, T >::type; public: - template <typename...> + void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type) + { + _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1]; + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + input_q_info = QuantizationInfo(scale_lhs, offset_lhs); + weights_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + + const int k = weights_shape.x(); + QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/); + + _dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + + // Do not change here as these limits are the natural limits of the associated data types and + // are embedded in the computation of the dst quantization info. + _min_u8 = 0; + _max_u8 = 255; + _min_s8 = -128; + _max_s8 = 127; + } + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, - DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info) + DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info, bool mixed_layout = false) { ARM_COMPUTE_UNUSED(weights_shape); ARM_COMPUTE_UNUSED(bias_shape); + _mixed_layout = mixed_layout; _data_type = data_type; _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - _quantization_info = quantization_info; + + // Note : Quantization Info parameter from setup function is only used when quant datatype and activation function is not enabled or is identity. + if(is_data_type_quantized(data_type) && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY)) + { + // Initialises quantization info with appropriate scale and offset for given input shapes. + setup_quantization(weights_shape, output_shape,_input_q_info, _weight_q_info, data_type); + } + else + { + _input_q_info = quantization_info; + _weight_q_info = quantization_info; + _dst_q_info = quantization_info; + } + _activation_info = activation_info; _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights); @@ -71,22 +119,37 @@ public: } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + const DataLayout data_layout = src.info()->data_layout(); + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout); + dst.info()->set_data_layout(data_layout); + } + template <typename U> void fill(U &&tensor, int i) { if(_data_type == DataType::QASYMM8) { - std::uniform_int_distribution<uint8_t> distribution(0, 30); + std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8); library->fill(tensor, distribution, i); } else if(_data_type == DataType::QASYMM8_SIGNED) { - std::uniform_int_distribution<int8_t> distribution(-15, 15); + std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8); library->fill(tensor, distribution, i); } else if(_data_type == DataType::S32) { - std::uniform_int_distribution<int32_t> distribution(-50, 50); + std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias); library->fill(tensor, distribution, i); } else if(_data_type == DataType::F16) @@ -128,10 +191,10 @@ protected: } // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info); - TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _quantization_info); - TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info); - TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, _quantization_info); + TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _input_q_info); + TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _weight_q_info); + TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1); + TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, _dst_q_info); // Create Fully Connected layer info FullyConnectedLayerInfo fc_info; @@ -143,10 +206,12 @@ protected: FunctionType fc; fc.configure(&src, &weights, &bias, &dst, fc_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &weights, &bias, &dst }); // Allocate tensors src.allocator()->allocate(); @@ -154,14 +219,14 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors - fill(AccessorType(src), 0); - fill(AccessorType(bias), 2); + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(bias), 2 + _hash); if(!reshape_weights || !transpose_weights) { @@ -169,7 +234,7 @@ protected: RawTensor tmp(tmp_shape, _data_type, 1); // Fill with original shape - fill(tmp, 1); + fill(tmp, 1 + _hash); // Transpose elementwise tmp = transpose(tmp); @@ -186,11 +251,18 @@ protected: } else { - fill(AccessorType(weights), 1); + fill(AccessorType(weights), 1 + _hash); } - // Compute NEFullyConnectedLayer function - fc.run(); + if(_mixed_layout) + { + mix_layout(fc, src, dst); + } + else + { + // Compute NEFullyConnectedLayer function + fc.run(); + } return dst; } @@ -198,54 +270,384 @@ protected: SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape) { // Create reference - SimpleTensor<T> src{ input_shape, _data_type, 1, _quantization_info }; - SimpleTensor<T> weights{ weights_shape, _data_type, 1, _quantization_info }; - SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _quantization_info }; + SimpleTensor<T> src{ input_shape, _data_type, 1, _input_q_info }; + SimpleTensor<T> weights{ weights_shape, _data_type, 1, _weight_q_info }; + SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, QuantizationInfo() }; // Fill reference - fill(src, 0); - fill(weights, 1); - fill(bias, 2); + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(bias, 2 + _hash); - return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape), _activation_info, _quantization_info); + return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape, _dst_q_info), _activation_info, _dst_q_info); } TensorType _target{}; SimpleTensor<T> _reference{}; DataType _data_type{}; DataType _bias_data_type{}; - QuantizationInfo _quantization_info{}; + bool _mixed_layout{ false }; + QuantizationInfo _input_q_info{}; + QuantizationInfo _weight_q_info{}; + QuantizationInfo _dst_q_info{}; ActivationLayerInfo _activation_info{}; + + // Random initialization limits + // Default values are previously handcrafted limits + // that sould be used when we don't use dynamic quantization + int32_t _min_bias{-50}; + int32_t _max_bias{50}; + + int32_t _min_u8{0}; + int32_t _max_u8{30}; + int32_t _min_s8{-15}; + int32_t _max_s8{15}; + int _hash{0}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, ActivationLayerInfo activation_info) { FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type, - QuantizationInfo(), activation_info); + QuantizationInfo(), activation_info, mixed_layout); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class FullyConnectedLayerValidationQuantizedFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info) { FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type, - quantization_info, activation_info); + quantization_info, activation_info, mixed_layout); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class FullyConnectedWithDynamicTensorsFixture : public framework::Fixture +{ +private: + template <typename U> + void fill(U &&tensor, int i) + { + if(_data_type == DataType::F16) + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + } + else if(_data_type == DataType::F32) + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + } + else if(_data_type == DataType::QASYMM8) + { + std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8); + library->fill(tensor, distribution, i); + } + else if(_data_type == DataType::QASYMM8_SIGNED) + { + std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8); + library->fill(tensor, distribution, i); + } + else if(_data_type == DataType::S32) + { + std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias); + library->fill(tensor, distribution, i); + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + void fill_transposed_weights(TensorType &weights, TensorShape weights_shape, int seed) + { + RawTensor tmp(weights_shape, _data_type, 1); + + // Fill with original shape + fill(tmp, seed); + + // Transpose elementwise + tmp = transpose(tmp); + + AccessorType weights_accessor(weights); + + for(int i = 0; i < tmp.num_elements(); ++i) + { + Coordinates coord = index2coord(tmp.shape(), i); + std::copy_n(static_cast<const RawTensor::value_type *>(tmp(coord)), + tmp.element_size(), + static_cast<RawTensor::value_type *>(weights_accessor(coord))); + } + } + + void validate_with_tolerance(TensorType &target, SimpleTensor<float> &ref) + { + constexpr RelativeTolerance<float> rel_tolerance_f32(0.01f); + constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.001f); + validate(AccessorType(target), ref, rel_tolerance_f32, 0, abs_tolerance_f32); + } + + void validate_with_tolerance(TensorType &target, SimpleTensor<half_float::half> &ref) + { + constexpr AbsoluteTolerance<float> abs_tolerance_f16(0.3f); + const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); + constexpr float tolerance_num_f16 = 0.07f; + + validate(AccessorType(target), ref, rel_tolerance_f16, tolerance_num_f16, abs_tolerance_f16); + } + + void validate_with_tolerance(TensorType &target, SimpleTensor<uint8_t> &ref) + { + constexpr AbsoluteTolerance<uint32_t> tolerance_qasymm8(1); + validate(AccessorType(target), ref, tolerance_qasymm8); + } + + void validate_with_tolerance(TensorType &target, SimpleTensor<int8_t> &ref) + { + constexpr AbsoluteTolerance<int32_t> tolerance_qasymm8_signed(1); + validate(AccessorType(target), ref, tolerance_qasymm8_signed); + } + + void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type) + { + _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1]; + + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + input_q_info = QuantizationInfo(scale_lhs, offset_lhs); + weights_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + const int k = weights_shape.x(); + QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/); + + _dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + + // Do not change here as these limits are the natural limits of the associated data types and + // are embedded in the computation of the dst quantization info. + _min_u8 = 0; + _max_u8 = 255; + _min_s8 = -128; + _max_s8 = 127; + } + +public: + using TDecay = typename std::decay<T>::type; + using TBias = typename std::conditional < (std::is_same<TDecay, uint8_t>::value || std::is_same<TDecay, int8_t>::value), int32_t, T >::type; + + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape, + DataType data_type, ActivationLayerInfo activation_info, bool constant_weights, bool constant_bias, bool weights_reshaped, bool remove_bias = false) + { + _data_type = data_type; + + const bool is_quantized = is_data_type_quantized(data_type); + const DataType bias_data_type = (is_quantized) ? DataType::S32 : data_type; + + if (is_quantized && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY)) + { + setup_quantization(weights_shape, dst_shape, _src_q_info, _weights_q_info, data_type); + } + else + { + _src_q_info = QuantizationInfo(0.1f, 10); + _dst_q_info = QuantizationInfo(0.3f, 20); + _weights_q_info = QuantizationInfo(0.2f, 5); + } + + // Configure TensorInfo Objects + const TensorInfo src_info(src_shape, 1, data_type, _src_q_info); + const TensorInfo dst_info(dst_shape, 1, data_type, _dst_q_info); + TensorInfo bias_info(bias_shape, 1, bias_data_type); + TensorInfo wei_info(weights_shape, 1, data_type, _weights_q_info); + + if(!constant_weights && weights_reshaped) + { + const TensorShape tr_weights_shape{ weights_shape[1], weights_shape[0] }; + wei_info.set_tensor_shape(tr_weights_shape); + } + wei_info.set_are_values_constant(constant_weights); + bias_info.set_are_values_constant(constant_bias); + + // Initialise Tensors + _src.allocator()->init(src_info); + _weights.allocator()->init(wei_info); + if(!remove_bias) + _bias.allocator()->init(bias_info); + _dst.allocator()->init(dst_info); + + // Configure FC layer and mark the weights as non constant + FullyConnectedLayerInfo fc_info; + fc_info.activation_info = activation_info; + if(!constant_weights) + { + fc_info.are_weights_reshaped = weights_reshaped; + fc_info.transpose_weights = !weights_reshaped; + } + FunctionType fc; + fc.configure(&_src, &_weights, (remove_bias) ? nullptr : &_bias, &_dst, fc_info); + + // Allocate all the tensors + _src.allocator()->allocate(); + _weights.allocator()->allocate(); + if(!remove_bias) + _bias.allocator()->allocate(); + _dst.allocator()->allocate(); + + // Run multiple iterations with different inputs + constexpr int num_iterations = 5; + int randomizer_offset = 0; + + // Create reference tensors + SimpleTensor<T> src{ src_shape, data_type, 1, _src_q_info }; + SimpleTensor<T> weights{ weights_shape, data_type, 1, _weights_q_info }; + SimpleTensor<TBias> bias{ bias_shape, bias_data_type }; + + // Fill weights and/or bias if they remain constant + if(constant_weights) + { + fill(AccessorType(_weights), 1 + _hash); + fill(weights, 1 + _hash); + } + if(constant_bias && !remove_bias) + { + fill(AccessorType(_bias), 2 + _hash); + fill(bias, 2 + _hash); + } + // To remove bias, fill with 0 + if(remove_bias && is_quantized) + { + library->fill_tensor_value(bias, 0); + } + else if(remove_bias) + { + library->fill_tensor_value(bias, (float)0.0); + } + + for(int i = 0; i < num_iterations; ++i) + { + // Run target + { + fill(AccessorType(_src), randomizer_offset); + if(!constant_weights) + { + if(weights_reshaped) + { + fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1 + _hash); + } + else + { + fill(AccessorType(_weights), randomizer_offset + 1 +_hash); + } + } + if(!constant_bias && !remove_bias) + { + fill(AccessorType(_bias), randomizer_offset + 2 + _hash); + } + + fc.run(); + } + + // Run reference and compare + { + // Fill reference + fill(src, randomizer_offset); + if(!constant_weights) + { + fill(weights, randomizer_offset + 1 + _hash); + } + if(!constant_bias && !remove_bias) + { + fill(bias, randomizer_offset + 2 + _hash); + } + + auto dst = reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, dst_shape, _dst_q_info), activation_info, _dst_q_info); + + // Validate + validate_with_tolerance(_dst, dst); + } + + randomizer_offset += 100; + } + } + +private: + TensorType _src{}, _weights{}, _bias{}, _dst{}; + DataType _data_type{ DataType::UNKNOWN }; + + QuantizationInfo _src_q_info{}; + QuantizationInfo _weights_q_info{}; + QuantizationInfo _dst_q_info{}; + + // Random initialization limits + // Default values are previously handcrafted limits + // that sould be used when we don't use dynamic quantization + int32_t _min_bias{-50}; + int32_t _max_bias{50}; + + int32_t _min_u8{0}; + int32_t _max_u8{30}; + int32_t _min_s8{-15}; + int32_t _max_s8{15}; + int _hash{0}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class FullyConnectedWithDynamicWeightsFixture : public FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape, + DataType data_type, ActivationLayerInfo activation_info, bool weights_reshaped) + { + FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape, + dst_shape, data_type, activation_info, false, true, weights_reshaped, false); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class FullyConnectedDynamicNoBiasFixture : public FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape, + DataType data_type, ActivationLayerInfo activation_info, bool weights_reshaped) + { + FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape, + dst_shape, data_type, activation_info, false, true, weights_reshaped, true); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class FullyConnectedWithDynamicBiasFixture : public FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape dst_shape, + DataType data_type, ActivationLayerInfo activation_info) + { + FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape, + dst_shape, data_type, activation_info, true, false, false, false); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_FULLY_CONNECTED_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_FULLYCONNECTEDLAYERFIXTURE_H diff --git a/tests/validation/fixtures/FuseBatchNormalizationFixture.h b/tests/validation/fixtures/FuseBatchNormalizationFixture.h index 552dc7c360..a05e4169a7 100644 --- a/tests/validation/fixtures/FuseBatchNormalizationFixture.h +++ b/tests/validation/fixtures/FuseBatchNormalizationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, int class FuseBatchNormalizationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape_w, DataType data_type, DataLayout data_layout, bool in_place, bool with_bias, bool with_gamma, bool with_beta) { std::tie(_target_w, _target_b) = compute_target(shape_w, data_type, data_layout, in_place, with_bias, with_gamma, with_beta); @@ -96,14 +95,14 @@ protected: FunctionType fuse_batch_normalization; fuse_batch_normalization.configure(&w, &mean, &var, w_fused_to_use, b_fused_to_use, b_to_use, beta_to_use, gamma_to_use, _epsilon, fuse_bn_type); - ARM_COMPUTE_EXPECT(w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(var.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(w_fused.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b_fused.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(beta.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(gamma.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(var.info()->is_resizable()); + ARM_COMPUTE_ASSERT(w_fused.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b_fused.info()->is_resizable()); + ARM_COMPUTE_ASSERT(beta.info()->is_resizable()); + ARM_COMPUTE_ASSERT(gamma.info()->is_resizable()); // Allocate tensors w.allocator()->allocate(); @@ -115,14 +114,14 @@ protected: beta.allocator()->allocate(); gamma.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!var.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!w_fused.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b_fused.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!beta.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!gamma.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!var.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!w_fused.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b_fused.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!beta.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!gamma.info()->is_resizable()); // Fill tensors fill(AccessorType(w), 0U, -1.0f, 1.0f); diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h index 500e094e18..94bedc83e1 100644 --- a/tests/validation/fixtures/GEMMFixture.h +++ b/tests/validation/fixtures/GEMMFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE -#define ARM_COMPUTE_TEST_GEMM_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/TensorShape.h" @@ -34,6 +34,7 @@ #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" #include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/ElementwiseOperations.h" #include "tests/validation/reference/GEMM.h" #include <random> @@ -44,16 +45,15 @@ namespace test { namespace validation { -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false> -class GEMMValidationFixture : public framework::Fixture +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMGenericValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type, bool accumulate=false) { ARM_COMPUTE_UNUSED(pretranspose); - _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type); - _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type); + _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type, accumulate); + _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type, accumulate); } protected: @@ -80,7 +80,7 @@ protected: } TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta, - DataType data_type) + DataType data_type, bool accumulate=false) { // Create tensors TensorType a = create_tensor<TensorType>(shape_a, data_type, 1); @@ -98,12 +98,14 @@ protected: (disable_c) ? nullptr : &c, &dst, alpha, beta, - GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, (reinterpret_input_as_3d - || reinterpret_output_as_3d))); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d + || reinterpret_output_as_3d), arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate)); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &a, &b, &c, &dst }); // Allocate tensors a.allocator()->allocate(); @@ -111,18 +113,33 @@ protected: c.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(a), 0); fill(AccessorType(b), 1); + if (accumulate) + { + fill(AccessorType(dst), 6); + } if(!disable_c) { fill(AccessorType(c), 2); } + // Run with variable inputs. + if(run_twice) + { + gemm.run(); + fill(AccessorType(a), 3); // Fill tensors with new seed after run + fill(AccessorType(b), 4); + if(!disable_c) + { + fill(AccessorType(c), 5); + } + } // Compute GEMM function gemm.run(); @@ -131,10 +148,9 @@ protected: } SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta, - DataType data_type) + DataType data_type, bool accumulate=false) { TensorShape shape_a_to_use = shape_a; - if(reinterpret_input_as_3d) { // Collapse the second and third dimension if the input is 3D @@ -145,6 +161,7 @@ protected: SimpleTensor<T> a{ shape_a_to_use, data_type, 1 }; SimpleTensor<T> b{ shape_b, data_type, 1 }; SimpleTensor<T> c{ output_shape, data_type, 1 }; + SimpleTensor<T> dst{ output_shape, data_type, 1 }; // Fill reference fill(a, 0); @@ -157,27 +174,96 @@ protected: const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1]; const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2]; - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(c.data() + i * n, c.data(), n * sizeof(T)); } } + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape().y(), a.shape().x()); + TensorShape b_transposed_shape(b.shape().y(), b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{ a_transposed_shape, data_type }; + SimpleTensor<T> b_transposed{ b_transposed_shape, data_type }; + + // pretranspose a if necessary + if(pretranspose_a) + { + transpose_matrix<T>(a, a_transposed); + } + + // pretranspose b if necessary + if(pretranspose_b) + { + transpose_matrix<T>(b, b_transposed); + } + + // Run with variable inputs. + if(run_twice) + { + reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); + fill((pretranspose_a) ? a_transposed : a, 3); + fill((pretranspose_b) ? b_transposed : b, 4); + fill(c, 5); + } + + // Do in place summation + if (accumulate) + { + fill(dst, 6); + } + // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C - return reference::gemm<T>(a, b, c, alpha, disable_c ? 0.f : beta); + // Use transposed tensors if boolean enabled else use original tensors + if (accumulate) + { + reference::gemm_accumulate<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta, dst); + return dst; + } + else + { + return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta); + } } TensorType _target{}; SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + { + GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, false /*accumulate*/); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false, bool run_twice = false> +class GEMMAccumulateValidationFixture : protected GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type) + { + bool accumulate = true; + GEMMGenericValidationFixture<TensorType, AccessorType, FunctionType, T, disable_c, reinterpret_input_as_3d, reinterpret_output_as_3d, pretranspose_a, pretranspose_b, run_twice>::setup(shape_a, shape_b, shape_c, output_shape, alpha, beta, pretranspose, data_type, accumulate); + } +}; + +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -224,12 +310,14 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - GEMMFunctionType gemm; - gemm.configure(gpu_arch, &lhs, &rhs, &bias, &dst, alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + GEMMOperatorType gemm; + gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -237,10 +325,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -248,7 +336,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -276,7 +368,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -290,11 +382,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -340,12 +431,14 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - GEMMFunctionType gemm; - gemm.configure(gpu_arch, &lhs, &rhs, &bias, &dst, alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + GEMMOperatorType gemm; + gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -353,10 +446,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -364,7 +457,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -392,7 +489,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -405,11 +502,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -472,16 +568,22 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(gpu_arch, &lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); - - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -491,12 +593,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -504,9 +606,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -534,7 +642,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -548,11 +656,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch) { @@ -614,16 +721,22 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(gpu_arch, &lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); - - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -633,12 +746,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -646,9 +759,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -676,7 +795,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -689,11 +808,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType, bool fp_mixed_precision = false> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info) { @@ -768,9 +886,9 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -779,13 +897,19 @@ protected: return nullptr; } - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -795,12 +919,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -808,9 +932,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -838,7 +968,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -860,11 +990,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType, bool fp_mixed_precision = false> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false> class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info) { @@ -936,9 +1065,9 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -947,13 +1076,19 @@ protected: return nullptr; } - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -963,12 +1098,12 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -976,9 +1111,15 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1006,7 +1147,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1027,11 +1168,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { @@ -1101,8 +1241,8 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -1111,12 +1251,18 @@ protected: return nullptr; } - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -1125,11 +1271,11 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1137,8 +1283,13 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1166,7 +1317,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1181,11 +1332,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) { @@ -1253,8 +1403,8 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeRHSFunctionType reshape_rhs; - GEMMFunctionType gemm; + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); validate_result = validate_result || !rhs_info.export_to_cl_image; @@ -1263,8 +1413,8 @@ protected: return nullptr; } - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); if(has_pad_y) { @@ -1273,9 +1423,15 @@ protected: dst.info()->extend_padding(PaddingSize(2, 0, 1, 0)); } - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // We do not pad when using image as it needs to comply to strict pitch alignment restrictions + if(!rhs_info.export_to_cl_image) + { + add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst }); + } // Allocate tensors lhs.allocator()->allocate(); @@ -1284,11 +1440,11 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1296,8 +1452,13 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1325,7 +1486,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1339,11 +1500,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) { @@ -1403,12 +1563,14 @@ protected: kernel_info.activation_info = act_info; // Create and configure function - GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + GEMMOperatorType gemm; + gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1416,10 +1578,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1427,7 +1589,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1455,7 +1621,7 @@ protected: if(broadcast_bias) { - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1469,11 +1635,10 @@ protected: SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename T, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType> class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info) { @@ -1532,12 +1697,14 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + GEMMOperatorType gemm; + gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + add_padding_x({ &lhs, &rhs, &bias, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1545,10 +1712,10 @@ protected: bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); @@ -1556,7 +1723,11 @@ protected: fill(AccessorType(bias), 2); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1584,7 +1755,7 @@ protected: fill(rhs, 1); fill(bias, 2); - // In case of broadcast, we need simply copy the first into the following "M" ones + // In case of broadcast, we need to simply copy the first into the following "M" ones for(int i = 1; i < m * batch_size; i++) { memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); @@ -1597,7 +1768,170 @@ protected: SimpleTensor<T> _reference{}; }; +template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType> +class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture +{ +public: + void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha, + float beta, bool broadcast_bias, + const ActivationLayerInfo &act_info) + { + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = m0; + lhs_info.k0 = k0; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = n0; + rhs_info.k0 = k0; + rhs_info.interleave = true; + rhs_info.transpose = false; + rhs_info.h0 = 4; + rhs_info.export_to_cl_image = export_to_cl_image; + + // Set the tensor shapes for LHS and RHS matrices + const TensorShape lhs_shape(k, m, batch_size); + const TensorShape rhs_shape(n, k, batch_size); + const TensorShape bias_shape(n, + broadcast_bias ? 1 : m, + broadcast_bias ? 1 : batch_size); + + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info); + _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); + using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; + + DistributionType distribution{ T(-1.0f), T(1.0f) }; + library->fill(tensor, distribution, i); + + // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0) + DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) }; + library->fill_borders_with_garbage(tensor, distribution_inf, i); + } + + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, + DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info) + { + // Create tensors + TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); + TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); + TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); + TensorType rhs_reshaped; + TensorType dst; + + const unsigned int M = lhs_shape[1]; + const unsigned int N = rhs_shape[0]; + const unsigned int K = lhs_shape[0]; + GEMMKernelInfo kernel_info; + kernel_info.m = M; + kernel_info.n = N; + kernel_info.k = K; + kernel_info.depth_output_gemm3d = 0; + kernel_info.reinterpret_input_as_3d = false; + kernel_info.broadcast_bias = broadcast_bias; + kernel_info.activation_info = act_info; + + // Create and configure function + ReshapeRHSOperatorType reshape_rhs; + GEMMOperatorType gemm; + + validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info)); + if(!validate_result) + { + return nullptr; + } + + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + + validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info)); + if(!validate_result) + { + return nullptr; + } + + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(lhs), 0); + fill(AccessorType(rhs), 1); + fill(AccessorType(bias), 2); + + // Compute GEMM + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, + { ACL_SRC_1, &rhs_reshaped }, + { ACL_SRC_2, &bias }, + { ACL_DST, &dst } }); + gemm.run(gemm_pack); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias, + const ActivationLayerInfo &act_info) + { + if(!validate_result) + return SimpleTensor<T>(); + + TensorShape dst_shape = lhs_shape; + dst_shape[0] = rhs_shape[0]; + dst_shape[1] = lhs_shape[1]; + + // Create reference + SimpleTensor<T> lhs{ lhs_shape, data_type, 1 }; + SimpleTensor<T> rhs{ rhs_shape, data_type, 1 }; + SimpleTensor<T> bias{ dst_shape, data_type, 1 }; + + const int n = rhs_shape[0]; + const int m = lhs_shape[1]; + const int batch_size = lhs_shape[2]; + + // Fill reference + fill(lhs, 0); + fill(rhs, 1); + fill(bias, 2); + + if(broadcast_bias) + { + // In case of broadcast, we need to simply copy the first into the following "M" ones + for(int i = 1; i < m * batch_size; i++) + { + memcpy(bias.data() + i * n, bias.data(), n * sizeof(T)); + } + } + + return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info); + } + + bool validate_result = true; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMFIXTURE_H diff --git a/tests/validation/fixtures/GEMMInterleave4x4Fixture.h b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h index 1ce0eafead..59fc460869 100644 --- a/tests/validation/fixtures/GEMMInterleave4x4Fixture.h +++ b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class GEMMInterleave4x4ValidationFixture : public framework::Fixture { public: - template <typename...> void setup(size_t x, size_t y, DataType data_type) { _data_type = data_type; @@ -88,24 +87,25 @@ protected: // Create and configure function FunctionType f; - f.configure(&a, &b); + f.configure(a.info(), b.info()); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensors fill(AccessorType(a), 0); fill(AccessorType(b), 0); - // Compute GEMM function - f.run(); + // Compute GEMM interleave kernel + ITensorPack tensors{ { ACL_SRC, &a }, { ACL_DST, &b } }; + f.run(tensors); return b; } diff --git a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h deleted file mode 100644 index e9ec1bc365..0000000000 --- a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_ASSEMBLY_FIXTURE -#define ARM_COMPUTE_TEST_GEMMLOWP_ASSEMBLY_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/Helpers.h" -#include "tests/validation/reference/GEMMLowp.h" - -#include <random> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template <typename TensorType, typename AccessorType, typename FunctionType, typename T2> -class GEMMLowpAssemblyFixture : public framework::Fixture -{ -public: - template <typename...> - void setup(size_t m, size_t n, size_t k) - { - const TensorShape shape_a(k, m); - const TensorShape shape_b(n, k); - const TensorShape shape_c(n, m); - _target = compute_target(shape_a, shape_b, shape_c); - _reference = compute_reference(shape_a, shape_b, shape_c); - } - -protected: - template <typename U> - void fill(U &&tensor, int i, int lo, int hi) - { - std::uniform_int_distribution<> distribution(lo, hi); - library->fill(tensor, distribution, i); - } - - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c) - { - DataType dt_in = std::is_same<T2, int8_t>::value ? DataType::S8 : DataType::U8; - - // Create tensors - TensorType a = create_tensor<TensorType>(shape_a, dt_in, 1); - TensorType b = create_tensor<TensorType>(shape_b, dt_in, 1); - TensorType c = create_tensor<TensorType>(shape_c, DataType::S32, 1); - - // Create and configure function - FunctionType gemmlowp; - gemmlowp.configure(&a, &b, nullptr, &c); - - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - a.allocator()->allocate(); - b.allocator()->allocate(); - c.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - if(dt_in == DataType::S8) - { - fill(AccessorType(a), 0, -128, 127); - fill(AccessorType(b), 1, -128, 127); - } - else - { - fill(AccessorType(a), 0, 0, 255); - fill(AccessorType(b), 1, 0, 255); - } - fill(AccessorType(c), 2, 0, 0); - - // Compute GEMM function - gemmlowp.run(); - return c; - } - - SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c) - { - DataType dt = std::is_same<T2, int8_t>::value ? DataType::S8 : DataType::U8; - - // Create reference - SimpleTensor<T2> a{ shape_a, dt, 1 }; - SimpleTensor<T2> b{ shape_b, dt, 1 }; - - // Fill reference - if(dt == DataType::S8) - { - fill(a, 0, -128, 127); - fill(b, 1, -128, 127); - } - else - { - fill(a, 0, 0, 255); - fill(b, 1, 0, 255); - } - - return reference::gemmlowp<int32_t, T2>(a, b, shape_c); - } - - TensorType _target{}; - SimpleTensor<int32_t> _reference{}; -}; - -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */ diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 95f49601a5..aa4eedb75d 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,22 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE -#define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" +#include "src/core/utils/quantization/AsymmHelpers.h" #include "tests/validation/Helpers.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Validation.h" #include "tests/validation/reference/GEMMLowp.h" +#include "tests/validation/reference/ArithmeticOperations.h" +#include "tests/validation/reference/DequantizationLayer.h" -#include <random> +#include <cstdint> +#include <vector> namespace arm_compute { @@ -49,119 +47,145 @@ namespace template <typename U> void fill(U &&tensor, int i) { - switch(tensor.data_type()) - { - case DataType::QSYMM8_PER_CHANNEL: - { - int min_bound = 128; - int max_bound = -127; - for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++) - { - std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i); - if(bounds.first < min_bound) - { - min_bound = bounds.first; - } - if(bounds.second > max_bound) - { - max_bound = bounds.second; - } - } - std::uniform_int_distribution<int8_t> distribution(min_bound, max_bound); - library->fill(tensor, distribution, i); - break; - } - case DataType::QASYMM8: - { - std::uniform_int_distribution<uint8_t> distribution(1, 254); - library->fill(tensor, distribution, i); - break; - } - case DataType::F16: - { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, i); - break; - } - default: - library->fill_tensor_uniform(tensor, i); - } + library->fill_tensor_uniform(tensor, i); } -template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false> -TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, - QuantizationInfo b_qinfo = QuantizationInfo()) +template <typename U> +void fill_quantized(U &&tensor, int i) { - // Create tensors - DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; - - TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1); - TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated - TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1); - - a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset)); + ARM_COMPUTE_ASSERT(is_data_type_quantized(tensor.data_type())); + library->fill_tensor_uniform(tensor, i); +} - if(data_type_b == DataType::QSYMM8_PER_CHANNEL) +template <typename U> +void fill(U &&tensor, int i, int32_t min, int32_t max) +{ + if (tensor.data_type() == DataType::S32) { + std::uniform_int_distribution<int32_t> distribution(min, max); + library->fill(tensor, distribution, i); + } + else if(tensor.data_type() == DataType::F32) { - b.info()->set_quantization_info(b_qinfo); + std::uniform_real_distribution<float> distribution((float)min, (float)max); + library->fill(tensor, distribution, i); } else { - b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } +} + +/** Information about how to fill tensors */ +struct TensorFillInfo +{ + // Bias fill range. Default values are arbitrary + int32_t min_bias {-20000}; + int32_t max_bias {20000}; + + // Output fill range. Default values are arbitrary + int32_t min_output {-20000}; + int32_t max_output {20000}; + + // Optional extra hash to randomize tensor filling + int32_t hash {0}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false> +TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, + const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, + GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo(), + bool accumulate = false, bool dynamic_qinfo = false, DataType data_type_output = DataType::UNKNOWN) +{ + ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); + ARM_COMPUTE_ASSERT(data_type_a == data_type_b); + // If unknown, set to sensible defaults + if (data_type_output == DataType::UNKNOWN) { + data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a; } + + // Create tensors + TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo); + TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated + TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */); + TensorType bias; if(is_fused) { TensorShape bias_shape(shape_b[0]); - bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1); + bias = create_tensor<TensorType>(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1); } // Create and configure function // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output FunctionType gemmlowp; - // TODO (COMPMID-1672) - Extending the test to validate add bias in offset contribution - gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false, output_stage)); + gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false, + output_stage, false /*fp_mixed_precision*/, false /*fast_math*/, false /*broadcast_bias*/, + arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, + false /* pretranspose_B */, accumulate)); + + // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic) + if (dynamic_qinfo) + { + a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true)); + b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true)); + } + + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &a, &b, &output }); // Allocate tensors a.allocator()->allocate(); b.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors - fill(AccessorType(a), 0); - fill(AccessorType(b), 1); + fill_quantized(AccessorType(a), 0 + finfo.hash); + fill_quantized(AccessorType(b), 1 + finfo.hash); + + if (accumulate) + { + ARM_COMPUTE_ASSERT(accumulate != run_twice); + fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output); + } if(is_fused) { - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); bias.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - fill(AccessorType(bias), 2); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias); } + + // Run with variable inputs. + if(run_twice) + { + gemmlowp.run(); + fill_quantized(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run + fill_quantized(AccessorType(b), 4 + finfo.hash); + if(is_fused) + { + fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias); + } + } + // Compute GEMM function gemmlowp.run(); return output; } -template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t> -SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo()) +template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t, bool pretranspose_A = false, bool pretranspose_B = false, bool run_twice = false> +SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, + DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo()) { + ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a)); + ARM_COMPUTE_ASSERT(data_type_a == data_type_b); TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) { @@ -170,101 +194,269 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con } // Create reference - SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1 }; - SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) }; + SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1, a_qinfo }; + SimpleTensor<TW> b{ shape_b, data_type_b, 1, b_qinfo }; + + TensorShape shape_a_to_use_transposed{ shape_a_to_use }; + TensorShape shape_b_transposed{ shape_b }; + + shape_a_to_use_transposed.set(0, shape_a_to_use[1]); + shape_a_to_use_transposed.set(1, shape_a_to_use[0]); + shape_b_transposed.set(0, shape_b[1]); + shape_b_transposed.set(1, shape_b[0]); + + SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1, a_qinfo }; + SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, b_qinfo }; // Fill reference - fill(a, 0); - fill(b, 1); - return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>(a, b, shape_output, a_offset, b_offset); -} + fill_quantized(a, 0 + finfo.hash); + fill_quantized(b, 1 + finfo.hash); + + // Transpose reference if required + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + if(pretranspose_A) + { + transpose_matrix<TI>(a, a_transposed); + } + + if(pretranspose_B) + { + transpose_matrix<TW>(b, b_transposed); + } + + // Run with variable inputs. + const int32_t a_offset = a_qinfo.uniform().offset; + const int32_t b_offset = b_qinfo.uniform().offset; + + if(run_twice) + { + reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset); + fill_quantized((pretranspose_A) ? a_transposed : a, 3 + finfo.hash); + fill_quantized((pretranspose_B) ? b_transposed : b, 4 + finfo.hash); + } + + return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset); } +} // namespace -template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false> -class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpGenericMatrixMultiplyCoreValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate=false, bool dynamic_qinfo = false) { - _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset); - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset); + const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); + const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset); + TensorFillInfo finfo; + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate); } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) { - return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t>(shape_a, shape_b, shape_output, a_offset, b_offset); + const auto output_qinfo = QuantizationInfo(); // No output stage + return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo); } - SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) + SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate) { - return compute_gemmlowp_reference<reinterpret_input_as_3d>(shape_a, shape_b, shape_output, a_offset, b_offset); + SimpleTensor<int32_t> ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, + DataType::QASYMM8, DataType::QASYMM8, finfo); + + if (accumulate) + { + SimpleTensor<int32_t> output{ shape_output, DataType::S32, 1 }; + fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); + reference::arithmetic_operation<int32_t>(reference::ArithmeticOperation::ADD, output, ref_output, output, ConvertPolicy::SATURATE); + return output; + } + + return ref_output; } TensorType _target{}; SimpleTensor<int32_t> _reference{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t> -class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpMatrixMultiplyCoreValidationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice> { public: - template <typename...> - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) { - ARM_COMPUTE_EXPECT(output_stage.type != GEMMLowpOutputStageType::NONE, framework::LogLevel::ERRORS); - DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8; + GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */); + } +}; - if(data_type_b == DataType::QSYMM8_PER_CHANNEL) - { - output_stage.is_quantized_per_channel = true; - const size_t num_channels = shape_b[0]; - std::vector<float> scales(num_channels); - std::uniform_real_distribution<float> distribution(0.f, 1.f); - library->fill(scales, distribution, 0); - output_stage.gemmlowp_multipliers.resize(num_channels); - output_stage.gemmlowp_shifts.resize(num_channels); - for(size_t i = 0; i < num_channels; ++i) - { - quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]); - } +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpMatrixMultiplyAccumulateValidationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, true /* accumulate */); + } +}; - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales)); - _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales)); - } - else - { - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo()); - _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo()); - } +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + GEMMLowpGenericMatrixMultiplyCoreValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, run_twice>::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */, true /* dynamic_qinfo */); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false> +class GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture +{ +public: + /** Dynamically initialize the quantization info with saturation awareness + */ + template <typename T> + static void setup_quantization(DataType data_type, const TensorShape& shape_a, const TensorShape& shape_b, QuantizationInfo& a_qinfo, QuantizationInfo& b_qinfo, QuantizationInfo& output_qinfo, TensorFillInfo& finfo) + { + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + finfo.hash = shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1]; + + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + finfo.hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + a_qinfo = QuantizationInfo(scale_lhs, offset_lhs); + b_qinfo = QuantizationInfo(scale_rhs, offset_rhs); + + // reinterpret_input_as_3d or reinterpret_output_as_3d can be ignored, as the underlying gemm / matmul computation + // is equivalent to a standard 2D one with m-n-k dimensions + const int m = shape_a.y(); + const int n = shape_b.x(); + const int k = shape_a.x(); + + const float bias_fraction = 0.5f; // We enabled is_fused in compute_gemmlowp_target below, thus bias is included + + QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(a_qinfo, b_qinfo, m, n, k, data_type, bias_fraction); + output_qinfo = q_hint.q_info; + finfo.min_bias = q_hint.bias_min; + finfo.max_bias = q_hint.bias_max; + + // Both target and reference implementations use negated offsets, i.e. + // float_val = (int_val + offset) * scale + // instead of + // float_val = (int_val - offset) * scale + // as usual. Therefore, after calculating the output quantization above, we + // negate the offsets of inputs' offsets. + a_qinfo = QuantizationInfo(scale_lhs, -offset_lhs); + b_qinfo = QuantizationInfo(scale_rhs, -offset_rhs); + } + + /** Initialize output stage info from quantization info */ + static Status init_gemmlowp_output_stage_info( + DataType data_type, + const QuantizationInfo& a_qinfo, + const QuantizationInfo& b_qinfo, + const QuantizationInfo& output_qinfo, + GEMMLowpOutputStageType type, + GEMMLowpOutputStageInfo &gemmlowp_output_stage_info) + { + ARM_COMPUTE_RETURN_ERROR_ON(!is_data_type_quantized_asymmetric(data_type)); + + const UniformQuantizationInfo aq_unif = a_qinfo.uniform(); + const UniformQuantizationInfo bq_unif = b_qinfo.uniform(); + const UniformQuantizationInfo oq_unif = output_qinfo.uniform(); + + float multiplier = (aq_unif.scale * bq_unif.scale) / oq_unif.scale; + int32_t int_multiplier; + int32_t shift; + + ARM_COMPUTE_RETURN_ON_ERROR( + quantization::calculate_quantized_multiplier(multiplier, &int_multiplier, &shift)); + + int32_t type_min = 0; + int32_t type_max = 0; + std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(output_qinfo, ActivationLayerInfo(), data_type); + + gemmlowp_output_stage_info.gemmlowp_real_multiplier = multiplier; + gemmlowp_output_stage_info.gemmlowp_multiplier = int_multiplier; + gemmlowp_output_stage_info.gemmlowp_multipliers = { int_multiplier }; + gemmlowp_output_stage_info.gemmlowp_shift = shift; + gemmlowp_output_stage_info.gemmlowp_shifts = { shift }; + gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset; + gemmlowp_output_stage_info.type = type; + gemmlowp_output_stage_info.gemmlowp_min_bound = type_min; + gemmlowp_output_stage_info.gemmlowp_max_bound = type_max; + + return Status{}; + } + + /** Currently this fixture only tests the following data type configurations: + * + * 1. a and b are of the same data type + * 2. The data type is quantized asymmetric + * + */ + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, + bool reshape_b_only_on_first_run) + { + ARM_COMPUTE_ASSERT(output_stage_type != GEMMLowpOutputStageType::NONE); + ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type)); + + // Randomized dynamic quantization: randomize quantization info in a way that ensures no result saturation + // most of the time + QuantizationInfo a_qinfo; + QuantizationInfo b_qinfo; + QuantizationInfo output_qinfo; + TensorFillInfo finfo; + setup_quantization<TI>(data_type, shape_a, shape_b, a_qinfo, b_qinfo, output_qinfo, finfo); + + GEMMLowpOutputStageInfo output_stage; + init_gemmlowp_output_stage_info(data_type, a_qinfo, b_qinfo, output_qinfo, output_stage_type, output_stage); + + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, output_stage, finfo); + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo); } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, - DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo, + DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo()) { - return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true>(shape_a, shape_b, shape_output, a_offset, b_offset, - output_stage, data_type_a, data_type_b, b_qinfo); + return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo, + b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo); } - SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo) + SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, + DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, const TensorFillInfo& finfo = TensorFillInfo()) { - SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b, b_qinfo); + SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo); TensorShape bias_shape(shape_b[0]); SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 }; - fill(bias, 2); + (run_twice) ? fill(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target switch(output_stage.type) { case GEMMLowpOutputStageType::QUANTIZE_DOWN: - return reference::gemmlowp_quantize_down_scale<int32_t, TW>(output, bias, + return reference::gemmlowp_quantize_down_scale<int32_t, TI>(output, bias, output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: - return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TW>(output, bias, + return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TI>(output, bias, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; default: @@ -276,11 +468,78 @@ protected: SimpleTensor<TI> _reference{}; }; +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false> +class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate) + { + const bool dynamic_qinfo = false; + const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); + const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset); + TensorFillInfo finfo; + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + } + +protected: + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) + { + const auto output_qinfo = QuantizationInfo(); + return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32); + } + + SimpleTensor<float> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate, const bool dynamic_qinfo) + { + QuantizationInfo s32_ref_output_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0, dynamic_qinfo); + + SimpleTensor<int32_t> s32_ref_output = compute_gemmlowp_reference<reinterpret_input_as_3d, int8_t, int8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, + DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo); + s32_ref_output.quantization_info(s32_ref_output_quant_info); + + SimpleTensor<float> f32_ref_output(s32_ref_output.shape(), DataType::F32); + f32_ref_output = reference::dequantization_layer<float, int32_t>(s32_ref_output); + + if (accumulate) + { + SimpleTensor<float> output{ shape_output, DataType::F32, 1 }; + fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); + reference::arithmetic_operation<float>(reference::ArithmeticOperation::ADD, output, f32_ref_output, output, ConvertPolicy::SATURATE); + return output; + } + + return f32_ref_output; + } + + TensorType _target{}; + SimpleTensor<float> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false> +class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run) + { + GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>::setup(shape_a, shape_b, + shape_output, output_stage_type, data_type, reshape_b_only_on_first_run); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false> +class GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type, bool reshape_b_only_on_first_run) + { + GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW, run_twice>::setup(shape_a, shape_b, shape_output, output_stage_type, data_type, reshape_b_only_on_first_run); + } +}; + template <typename TensorType, typename AccessorType, typename FunctionType> class GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias) { _target = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias); @@ -316,27 +575,27 @@ protected: output_stage_info.output_data_type = DataType::QASYMM8; output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); c.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); // Fill tensor fill(AccessorType(a), 0); if(add_bias) { - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate bias tensor b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensor fill(AccessorType(b), 1); @@ -382,7 +641,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType> class GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias) { _target = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias); @@ -418,27 +676,27 @@ protected: output_stage_info.output_data_type = DataType::QASYMM8_SIGNED; output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); c.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); // Fill tensor fill(AccessorType(a), 0); if(add_bias) { - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate bias tensor b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensor fill(AccessorType(b), 1); @@ -484,7 +742,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType> class GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias) { _target = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias); @@ -512,27 +769,27 @@ protected: FunctionType output_stage; output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); c.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); // Fill tensor fill(AccessorType(a), 0); if(add_bias) { - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate bias tensor b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensor fill(AccessorType(b), 1); @@ -579,7 +836,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType> class GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias) { _target = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias); @@ -607,27 +863,27 @@ protected: FunctionType output_stage; output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); c.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); // Fill tensor fill(AccessorType(a), 0); if(add_bias) { - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate bias tensor b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensor fill(AccessorType(b), 1); @@ -674,7 +930,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture : public framework::Fixture { public: - template <typename...> void setup(DataType data_type, TensorShape shape, float result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias) { _target = compute_target(data_type, shape, result_real_multiplier, result_offset, min, max, add_bias); @@ -712,27 +967,27 @@ protected: FunctionType output_stage; output_stage.configure(&a, add_bias ? &b : nullptr, &c, info); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); c.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); // Fill tensor fill(AccessorType(a), 0); if(add_bias) { - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate bias tensor b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensor fill(AccessorType(b), 1); @@ -777,7 +1032,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType> class GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max, bool add_bias) { _target = compute_target(shape, result_fixedpoint_multiplier, result_shift, min, max, add_bias); @@ -805,27 +1059,27 @@ protected: FunctionType output_stage; output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, min, max); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(c.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); c.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!c.info()->is_resizable()); // Fill tensor fill(AccessorType(a), 0); if(add_bias) { - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate bias tensor b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensor fill(AccessorType(b), 1); @@ -868,11 +1122,10 @@ protected: SimpleTensor<int16_t> _reference{}; }; -template <typename TensorType, typename AccessorType, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType> class GEMMLowpMatrixMultiplyReshapedValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, DataType data_type) { @@ -938,15 +1191,17 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; GEMMFunctionType gemm; - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K)); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K)); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -955,20 +1210,23 @@ protected: rhs_reshaped.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1014,11 +1272,10 @@ protected: SimpleTensor<int32_t> _reference{}; }; -template <typename TensorType, typename AccessorType, typename ReshapeLHSFunctionType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType> class GEMMLowpMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs, bool interleave_rhs, DataType data_type) { @@ -1088,15 +1345,17 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeLHSFunctionType reshape_lhs; - ReshapeRHSFunctionType reshape_rhs; + ReshapeLHSOperatorType reshape_lhs; + ReshapeRHSOperatorType reshape_rhs; GEMMFunctionType gemm; - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h)); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h)); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1105,20 +1364,23 @@ protected: rhs_reshaped.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!lhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); // Compute GEMM - reshape_lhs.run(); - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } }; + reshape_lhs.run(reshape_lhs_pack); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1166,11 +1428,10 @@ protected: SimpleTensor<int32_t> _reference{}; }; -template <typename TensorType, typename AccessorType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType> class GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type) { @@ -1239,13 +1500,15 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeRHSFunctionType reshape_rhs; + ReshapeRHSOperatorType reshape_rhs; GEMMFunctionType gemm; - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + + add_padding_x({ &lhs, &rhs, &rhs_reshaped, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1253,18 +1516,20 @@ protected: rhs_reshaped.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); // Compute GEMM - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1305,11 +1570,372 @@ protected: SimpleTensor<int32_t> _reference{}; }; -template <typename TensorType, typename AccessorType, typename ReshapeRHSFunctionType, typename GEMMFunctionType> +template <typename T, typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType, typename ReduceOperation, typename CastOperation> +class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture : public framework::Fixture +{ +public: + void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, + unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool broadcast_bias, DataType data_type) + { + GEMMLowpOutputStageInfo output_stage; + output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + output_stage.output_data_type = data_type; + output_stage.gemmlowp_multipliers = std::vector<int32_t> { 1 }; + output_stage.gemmlowp_shifts = std::vector<int32_t> { 1 }; + output_stage.gemmlowp_multipliers[0] = 1; + output_stage.gemmlowp_shifts[0] = 1; + output_stage.gemmlowp_offset = 0; + constexpr float scale = 0.001f; + quantization::calculate_quantized_multiplier(scale, &output_stage.gemmlowp_multipliers[0], &output_stage.gemmlowp_shifts[0]); + output_stage.gemmlowp_min_bound = -100; + output_stage.gemmlowp_max_bound = 100; + + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = m0; + lhs_info.k0 = k0; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = n0; + rhs_info.k0 = k0; + rhs_info.h0 = h0; + rhs_info.interleave = interleave_rhs; + rhs_info.transpose = transpose_rhs; + + int a_offset = 1; + int b_offset = 1; + + // Set the tensor shapes for LHS and RHS matrices + const TensorShape lhs_shape(k, m, batch_size); + const TensorShape rhs_shape(n, k, batch_size); + const TensorShape bias_shape(n, + broadcast_bias ? 1 : m, + broadcast_bias ? 1 : batch_size); + + _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, output_stage, a_offset, b_offset); + if(gemm_validated == true) + { + _reference = compute_reference(lhs_shape, rhs_shape, bias_shape, data_type, output_stage, a_offset, b_offset); + } + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::QASYMM8: + { + // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path + std::uniform_int_distribution<> distribution(1, 254); + library->fill(tensor, distribution, i); + } + break; + case DataType::QASYMM8_SIGNED: + { + std::uniform_int_distribution<> distribution(-127, 126); + library->fill(tensor, distribution, i); + } + break; + case DataType::S32: + { + std::uniform_int_distribution<> distribution(-10000, 10000); + library->fill(tensor, distribution, i); + } + break; + default: + ARM_COMPUTE_ERROR("Unsupported data type"); + } + } + + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, DataType data_type, GEMMLowpOutputStageInfo output_stage, const int a_offset, const int b_offset) + { + // Create tensors + TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset)); + TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset)); + TensorType bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1); + TensorType dst; + TensorType rhs_reshaped; + + const unsigned int M = lhs_shape[1]; + const unsigned int N = rhs_shape[0]; + const unsigned int K = lhs_shape[0]; + + // Tensors for precomputing sum of lhs rows / rhs columns + TensorType vec_sum_rows = create_tensor<TensorType>(TensorShape(M, 1, lhs_shape[2]), DataType::S32, 1); + TensorType vec_sum_cols = create_tensor<TensorType>(TensorShape(N, 1, rhs_shape[2]), DataType::S32, 1); + + GEMMKernelInfo gemm_info; + gemm_info.m = M; + gemm_info.n = N; + gemm_info.k = K; + gemm_info.lhs_info = lhs_info; + gemm_info.rhs_info = rhs_info; + gemm_info.output_stage = output_stage; + gemm_info.a_offset = a_offset; + gemm_info.b_offset = b_offset; + // The output tensor will be auto-initialized within the function + + // Create and configure function + ReshapeRHSOperatorType reshape_rhs; + GEMMFunctionType gemm; + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + + // If GEMM is not validated, do not try to run. The validation will check + // if the technology supports this extension. If not, the test will be skipped. + // If it supports, the test will fail anyway because target and reference + // will not match. + gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info())); + if(gemm_validated == true) + { + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info()); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + bias.allocator()->allocate(); + vec_sum_cols.allocator()->allocate(); + vec_sum_rows.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!vec_sum_cols.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!vec_sum_rows.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(lhs), 0); + fill(AccessorType(rhs), 1); + fill(AccessorType(bias), 2); + + TensorType lhs_32 = create_tensor<TensorType>(lhs_shape, DataType::S32, 1); + TensorType rhs_32 = create_tensor<TensorType>(rhs_shape, DataType::S32, 1); + CastOperation cast_lhs; + CastOperation cast_rhs; + cast_lhs.configure(&lhs, &lhs_32, ConvertPolicy::SATURATE); + cast_rhs.configure(&rhs, &rhs_32, ConvertPolicy::SATURATE); + lhs_32.allocator()->allocate(); + rhs_32.allocator()->allocate(); + cast_lhs.run(); + cast_rhs.run(); + + ReduceOperation lhs_sum_rows; + ReduceOperation rhs_sum_cols; + + lhs_sum_rows.configure(&lhs_32, &vec_sum_rows, 0, ReductionOperation::SUM, false); + rhs_sum_cols.configure(&rhs_32, &vec_sum_cols, 1, ReductionOperation::SUM); + + lhs_sum_rows.run(); + rhs_sum_cols.run(); + + // Compute GEMM + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst }, { ACL_VEC_COL_SUM, &vec_sum_cols }, { ACL_VEC_ROW_SUM, &vec_sum_rows } }); + gemm.run(gemm_pack); + } + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, GEMMLowpOutputStageInfo output_stage, + const int a_offset, const int b_offset) + { + TensorShape dst_shape = lhs_shape; + dst_shape[0] = rhs_shape[0]; + dst_shape[1] = lhs_shape[1]; + + // Create reference + SimpleTensor<T> lhs{ lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset) }; + SimpleTensor<T> rhs{ rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset) }; + SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 }; + SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 }; + SimpleTensor<T> dst_final{ dst_shape, data_type, 1 }; + + // Fill reference + fill(lhs, 0); + fill(rhs, 1); + fill(bias, 2); + + dst = reference::gemmlowp_matrix_multiply_core<int32_t, T>(lhs, rhs, dst_shape, a_offset, b_offset); + dst_final = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, T>(dst, bias, + output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); + return dst_final; + } + + bool gemm_validated = true; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType> +class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture : public framework::Fixture +{ +public: + void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, + unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type) + { + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = m0; + lhs_info.k0 = k0; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = n0; + rhs_info.k0 = k0; + rhs_info.h0 = h0; + rhs_info.interleave = interleave_rhs; + rhs_info.transpose = transpose_rhs; + + // Set the tensor shapes for LHS and RHS matrices + const TensorShape lhs_shape(k, m, batch_size); + const TensorShape rhs_shape(n, k, batch_size); + + _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type); + if(gemm_validated == true) + { + _reference = compute_reference(lhs_shape, rhs_shape, data_type); + } + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::QASYMM8: + { + // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path + std::uniform_int_distribution<> distribution(1, 254); + library->fill(tensor, distribution, i); + } + break; + case DataType::QASYMM8_SIGNED: + { + std::uniform_int_distribution<> distribution(-127, 126); + library->fill(tensor, distribution, i); + } + break; + default: + ARM_COMPUTE_ERROR("Unsupported data type"); + } + } + + TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, + const GEMMRHSMatrixInfo &rhs_info, DataType data_type) + { + // Create tensors + TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1); + TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1); + TensorType rhs_reshaped; + TensorType dst; + + const unsigned int M = lhs_shape[1]; + const unsigned int N = rhs_shape[0]; + const unsigned int K = lhs_shape[0]; + + GEMMKernelInfo gemm_info; + gemm_info.m = M; + gemm_info.n = N; + gemm_info.k = K; + gemm_info.lhs_info = lhs_info; + gemm_info.rhs_info = rhs_info; + // The output tensor will be auto-initialized within the function + + // Create and configure function + ReshapeRHSOperatorType reshape_rhs; + GEMMFunctionType gemm; + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + + // If GEMM is not validated, do not try to run. The validation will check + // if the technology supports this extension. If not, the test will be skipped. + // If it supports, the test will fail anyway because target and reference + // will not match. + gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr)); + if(gemm_validated == true) + { + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(lhs), 0); + fill(AccessorType(rhs), 1); + + // Compute GEMM + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); + } + + return dst; + } + + SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type) + { + TensorShape dst_shape = lhs_shape; + dst_shape[0] = rhs_shape[0]; + dst_shape[1] = lhs_shape[1]; + + if(data_type == DataType::QASYMM8) + { + // Create reference + SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 }; + SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 }; + SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 }; + + // Fill reference + fill(lhs, 0); + fill(rhs, 1); + + return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0); + } + else + { + // Create reference + SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 }; + SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 }; + SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 }; + + // Fill reference + fill(lhs, 0); + fill(rhs, 1); + + return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0); + } + } + + bool gemm_validated = true; + TensorType _target{}; + SimpleTensor<int32_t> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType> class GEMMLowpMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type) { @@ -1382,13 +2008,15 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - ReshapeRHSFunctionType reshape_rhs; + ReshapeRHSOperatorType reshape_rhs; GEMMFunctionType gemm; - reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info); - gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info); + reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &rhs_reshaped, &dst }); // Allocate tensors lhs.allocator()->allocate(); @@ -1396,18 +2024,20 @@ protected: rhs_reshaped.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); // Compute GEMM - reshape_rhs.run(); - gemm.run(); + ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } }; + reshape_rhs.run(reshape_rhs_pack); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1454,7 +2084,6 @@ template <typename TensorType, typename AccessorType, typename GEMMFunctionType> class GEMMLowpMatrixMultiplyNativeValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0) { GEMMLHSMatrixInfo lhs_info; @@ -1497,26 +2126,29 @@ protected: // Create and configure function GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K)); + gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K)); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); + + add_padding_x({ &lhs, &rhs, &dst }); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1546,7 +2178,6 @@ template <typename TensorType, typename AccessorType, typename GEMMFunctionType> class GEMMLowpMatrixMultiplyNative3DValidationFixture : public framework::Fixture { public: - template <typename...> void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0) { GEMMLHSMatrixInfo lhs_info; @@ -1592,26 +2223,29 @@ protected: // Create and configure function GEMMFunctionType gemm; - gemm.configure(&lhs, &rhs, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h)); + gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h)); + + ARM_COMPUTE_ASSERT(lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rhs.info()->is_resizable()); - ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS); + add_padding_x({ &lhs, &rhs, &dst }); // Allocate tensors lhs.allocator()->allocate(); rhs.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(lhs), 0); fill(AccessorType(rhs), 1); // Compute GEMM - gemm.run(); + ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } }); + gemm.run(gemm_pack); return dst; } @@ -1641,4 +2275,4 @@ protected: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H diff --git a/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h b/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h index d0855093a7..d88029f93e 100644 --- a/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h +++ b/tests/validation/fixtures/GEMMReshapeLHSMatrixFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,11 +46,10 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool reinterpret_input_as_3d = false> +template <typename TensorType, typename AccessorType, typename OperatorType, typename T, bool reinterpret_input_as_3d = false> class GEMMReshapeLHSMatrixValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape_in, unsigned int batch_size, DataType data_type, unsigned int m0, unsigned int k0, unsigned int v0, bool interleave, bool transpose) { GEMMLHSMatrixInfo lhs_info; @@ -86,23 +85,26 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - FunctionType gemm_lhs_reshape; - gemm_lhs_reshape.configure(&src, &dst, lhs_info, reinterpret_input_as_3d); + OperatorType gemm_lhs_reshape; + gemm_lhs_reshape.configure(src.info(), dst.info(), lhs_info, reinterpret_input_as_3d); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + + add_padding_x({ &src, &dst }); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); // Compute GEMM LHS matrix reshape function - gemm_lhs_reshape.run(); + ITensorPack tensors = { { ACL_SRC, &src }, { ACL_DST, &dst } }; + gemm_lhs_reshape.run(tensors); return dst; } diff --git a/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h b/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h index 99bfa3bced..0929faf04a 100644 --- a/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h +++ b/tests/validation/fixtures/GEMMReshapeRHSMatrixFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,11 +46,10 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename OperatorType, typename T> class GEMMReshapeRHSMatrixValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape_in, unsigned int batch_size, DataType data_type, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave, bool transpose) { GEMMRHSMatrixInfo rhs_info; @@ -85,23 +84,26 @@ protected: // The output tensor will be auto-initialized within the function // Create and configure function - FunctionType gemm_rhs_reshape; - gemm_rhs_reshape.configure(&src, &dst, rhs_info); + OperatorType gemm_rhs_reshape; + gemm_rhs_reshape.configure(src.info(), dst.info(), rhs_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + + add_padding_x({ &src, &dst }); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); // Compute GEMM RHS matrix reshape function - gemm_rhs_reshape.run(); + ITensorPack tensors = { { ACL_SRC, &src }, { ACL_DST, &dst } }; + gemm_rhs_reshape.run(tensors); return dst; } diff --git a/tests/validation/fixtures/GEMMTranspose1xWFixture.h b/tests/validation/fixtures/GEMMTranspose1xWFixture.h index 2d2e70697a..3765515b57 100644 --- a/tests/validation/fixtures/GEMMTranspose1xWFixture.h +++ b/tests/validation/fixtures/GEMMTranspose1xWFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class GEMMTranspose1xWValidationFixture : public framework::Fixture { public: - template <typename...> void setup(size_t x, size_t y, DataType data_type) { _data_type = data_type; @@ -89,24 +88,25 @@ protected: // Create and configure function FunctionType f; - f.configure(&a, &b); + f.configure(a.info(), b.info()); - ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); // Allocate tensors a.allocator()->allocate(); b.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); // Fill tensors fill(AccessorType(a), 0); fill(AccessorType(b), 1); // Compute GEMM function - f.run(); + ITensorPack tensors{ { ACL_SRC, &a }, { ACL_DST, &b } }; + f.run(tensors); return b; } diff --git a/tests/validation/fixtures/GatherFixture.h b/tests/validation/fixtures/GatherFixture.h index 0a9f8c1d15..857b0387b7 100644 --- a/tests/validation/fixtures/GatherFixture.h +++ b/tests/validation/fixtures/GatherFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class GatherFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape indices_shape, int axis, DataType data_type) { _target = compute_target(input_shape, data_type, axis, indices_shape); @@ -67,9 +66,11 @@ protected: std::mt19937 gen(library->seed()); uint32_t *indices_ptr = static_cast<uint32_t *>(indices.data()); - std::uniform_int_distribution<uint32_t> dist_index(0, input_shape[actual_axis] - 1); - //Let's consider 1D indices - for(unsigned int ind = 0; ind < indices_shape[0]; ind++) + // 10% of the time the index is out-of-range. + uint32_t max_index = input_shape[actual_axis] + input_shape[actual_axis] / 9 + 1; + std::uniform_int_distribution<uint32_t> dist_index(0, max_index - 1); + + for(unsigned int ind = 0; ind < indices_shape.total_size(); ind++) { indices_ptr[ind] = dist_index(gen); } @@ -91,18 +92,18 @@ protected: FunctionType gather; gather.configure(&src, &indices_tensor, &dst, axis); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(indices_tensor.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(indices_tensor.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); indices_tensor.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!indices_tensor.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!indices_tensor.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/Im2ColFixture.h b/tests/validation/fixtures/Im2ColFixture.h index b6cf18bd4c..5c7978f4ab 100644 --- a/tests/validation/fixtures/Im2ColFixture.h +++ b/tests/validation/fixtures/Im2ColFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,10 +45,9 @@ namespace validation using namespace arm_compute::misc::shape_calculator; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool batch_size_on_z> -class Im2ColValidationFixture : public framework::Fixture +class Im2ColOpValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, DataType data_type, const Size2D &kernel_dims, const PadStrideInfo &conv_info, const QuantizationInfo &quant_info, const DataLayout &data_layout, unsigned int num_groups) { @@ -88,23 +87,28 @@ protected: // Create and configure function FunctionType im2col_func; - im2col_func.configure(&src, &dst, _kernel_dims, _conv_info, _has_bias, Size2D(1U, 1U), _num_groups); + im2col_func.configure(src.info(), dst.info(), _kernel_dims, _conv_info, _has_bias, Size2D(1U, 1U), _num_groups); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); + arm_compute::ITensorPack pack = + { + { arm_compute::TensorType::ACL_SRC, &src }, + { arm_compute::TensorType::ACL_DST, &dst } + }; // Compute function - im2col_func.run(); + im2col_func.run(pack); return dst; } diff --git a/tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h b/tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h new file mode 100644 index 0000000000..7374093f51 --- /dev/null +++ b/tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_FIXTURE +#define ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "tests/Globals.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/IndirectConv2dAddressPrecalculation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using namespace arm_compute::misc::shape_calculator; + +template <typename TensorType, typename AccessorType, typename OperatorType> +class IndirectConv2dAddressPrecalculationValidationFixture : public framework::Fixture +{ +public: + void setup(unsigned int src_w, + unsigned int src_h, + unsigned int src_b, + unsigned int wei_w, + unsigned int wei_h, + unsigned int pad, + unsigned int stride, + unsigned int m0) + { + DirectConvComputeKernelInfo desc; + desc.m0 = m0; + desc.n0 = 1; // Not used by the kernel + desc.k0 = 1; // Not used by the kernel + desc.export_weights_to_cl_image = false; // Not used by the kernel + + const PadStrideInfo conv_info(stride, stride, pad, pad); + + const TensorShape shape_conv_src(23, // The input channels are not used by the kernel + src_w, + src_h, + src_b); + + const TensorShape shape_conv_wei(23, // The input channels are not used by the kernel + wei_w, + wei_h, + 23 // The output channels are not used by the kernel + ); + + // The result of the kernel does not change with the datatype. Hence, we can fix it to Fp16 for validation purposes + const DataType data_type = DataType::F16; + + _target = compute_target(shape_conv_src, shape_conv_wei, data_type, conv_info, desc); + _reference = compute_reference(shape_conv_src, shape_conv_wei, data_type, conv_info, desc); + } + +protected: + TensorType compute_target(TensorShape shape_conv_src, TensorShape shape_conv_wei, DataType data_type, const PadStrideInfo &conv_info, const DirectConvComputeKernelInfo &desc) + { + TensorInfo src_conv_info(shape_conv_src, 1, data_type, DataLayout::NHWC); + TensorInfo wei_conv_info(shape_conv_wei, 1, data_type, DataLayout::NHWC); + TensorType dst; + + // The output tensor will be auto-initialized within the function + + // Create and configure function + OperatorType func; + func.configure(&src_conv_info, &wei_conv_info, dst.info(), conv_info, desc); + + add_padding_x({ &dst }); + + // Allocate tensors + dst.allocator()->allocate(); + + // Compute GEMM LHS matrix reshape function + ITensorPack tensors = { { ACL_DST, &dst } }; + func.run(tensors); + + return dst; + } + + SimpleTensor<int32_t> compute_reference(TensorShape shape_conv_src, TensorShape shape_conv_wei, DataType data_type, const PadStrideInfo &conv_info, const DirectConvComputeKernelInfo &desc) + { + ARM_COMPUTE_UNUSED(data_type); + TensorShape shape_out = compute_indirect_buffer_shape(shape_conv_src, DataLayout::NHWC, shape_conv_wei, conv_info, desc); + TensorShape output_conv_shape = compute_deep_convolution_shape(shape_conv_src, DataLayout::NHWC, shape_conv_wei, conv_info); + + return reference::indirect_conv2d_addr_precalculation(shape_conv_src, shape_conv_wei, output_conv_shape, shape_out, conv_info); + } + + TensorType _target{}; + SimpleTensor<int32_t> _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_FIXTURE */
\ No newline at end of file diff --git a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h index 611d9aae5b..c26dd99f02 100644 --- a/tests/validation/fixtures/InstanceNormalizationLayerFixture.h +++ b/tests/validation/fixtures/InstanceNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class InstanceNormalizationLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, DataLayout data_layout, bool in_place) { _target = compute_target(shape, data_type, data_layout, in_place); @@ -86,10 +85,10 @@ protected: FunctionType instance_norm_func; instance_norm_func.configure(&src, in_place ? nullptr : &dst, gamma, beta, epsilon); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); if(!in_place) { - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); } // Allocate tensors @@ -99,10 +98,10 @@ protected: dst.allocator()->allocate(); } - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); if(!in_place) { - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); } // Fill tensors diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h index 349c0904eb..b8f4b1eaf3 100644 --- a/tests/validation/fixtures/L2NormalizeLayerFixture.h +++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,7 +48,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class L2NormalizeLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, DataLayout data_layout, int axis, float epsilon) { _target = compute_target(shape, data_type, data_layout, axis, epsilon); @@ -81,15 +80,15 @@ protected: FunctionType l2_norm_func; l2_norm_func.configure(&src, &dst, axis, epsilon); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/LSTMLayerFixture.h b/tests/validation/fixtures/LSTMLayerFixture.h index 366d050039..a32e9adfe5 100644 --- a/tests/validation/fixtures/LSTMLayerFixture.h +++ b/tests/validation/fixtures/LSTMLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class LSTMLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape input_weights_shape, TensorShape recurrent_weights_shape, TensorShape cell_bias_shape, TensorShape output_cell_shape, TensorShape output_shape, TensorShape scratch_shape, ActivationLayerInfo info, float cell_threshold, float projection_threshold, DataType data_type, bool projection_opt, bool peephole_opt, bool use_layer_norm) @@ -167,22 +166,22 @@ protected: &scratch, &output_state_out, &cell_state_out, &output, lstm_params, info, cell_threshold, projection_threshold); - ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(input_to_forget_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(input_to_cell_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(input_to_output_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(recurrent_to_forget_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(recurrent_to_cell_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(recurrent_to_output_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(forget_gate_bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(cell_bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output_gate_bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output_state_in.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(cell_state_in.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(scratch.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output_state_out.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(cell_state_out.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(input_to_forget_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(input_to_cell_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(input_to_output_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(recurrent_to_forget_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(recurrent_to_cell_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(recurrent_to_output_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(forget_gate_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(cell_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output_gate_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output_state_in.info()->is_resizable()); + ARM_COMPUTE_ASSERT(cell_state_in.info()->is_resizable()); + ARM_COMPUTE_ASSERT(scratch.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output_state_out.info()->is_resizable()); + ARM_COMPUTE_ASSERT(cell_state_out.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); // Allocate tensors input.allocator()->allocate(); @@ -202,22 +201,22 @@ protected: cell_state_out.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!input_to_forget_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!input_to_cell_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!input_to_output_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!recurrent_to_forget_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!recurrent_to_cell_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!recurrent_to_output_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!forget_gate_bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!cell_bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output_gate_bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output_state_in.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!cell_state_in.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!scratch.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output_state_out.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!cell_state_out.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!input_to_forget_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!input_to_cell_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!input_to_output_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!recurrent_to_forget_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!recurrent_to_cell_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!recurrent_to_output_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!forget_gate_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!cell_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output_gate_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output_state_in.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!cell_state_in.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!scratch.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output_state_out.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!cell_state_out.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors fill(AccessorType(input), 0); @@ -236,18 +235,18 @@ protected: if(!cifg_opt) { - ARM_COMPUTE_EXPECT(input_to_input_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(recurrent_to_input_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(cell_to_input_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(input_gate_bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input_to_input_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(recurrent_to_input_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(cell_to_input_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(input_gate_bias.info()->is_resizable()); input_to_input_w.allocator()->allocate(); recurrent_to_input_w.allocator()->allocate(); cell_to_input_w.allocator()->allocate(); input_gate_bias.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input_to_input_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!recurrent_to_input_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!cell_to_input_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!input_gate_bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input_to_input_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!recurrent_to_input_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!cell_to_input_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!input_gate_bias.info()->is_resizable()); fill(AccessorType(input_to_input_w), 13); fill(AccessorType(recurrent_to_input_w), 14); if(peephole_opt) @@ -260,26 +259,26 @@ protected: if(peephole_opt) { - ARM_COMPUTE_EXPECT(cell_to_forget_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(cell_to_output_w.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(cell_to_forget_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(cell_to_output_w.info()->is_resizable()); cell_to_forget_w.allocator()->allocate(); cell_to_output_w.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!cell_to_forget_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!cell_to_output_w.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!cell_to_forget_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!cell_to_output_w.info()->is_resizable()); fill(AccessorType(cell_to_forget_w), 18); fill(AccessorType(cell_to_output_w), 19); } if(projection_opt) { - ARM_COMPUTE_EXPECT(projection_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(projection_bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(projection_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(projection_bias.info()->is_resizable()); projection_w.allocator()->allocate(); projection_bias.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!projection_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!projection_bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!projection_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!projection_bias.info()->is_resizable()); fill(AccessorType(projection_w), 20); fill(AccessorType(projection_bias), 21); @@ -289,25 +288,25 @@ protected: { if(!cifg_opt) { - ARM_COMPUTE_EXPECT(input_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input_layer_norm_w.info()->is_resizable()); input_layer_norm_w.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input_layer_norm_w.info()->is_resizable()); fill(AccessorType(input_layer_norm_w), 22); } - ARM_COMPUTE_EXPECT(forget_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(cell_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(forget_layer_norm_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(cell_layer_norm_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output_layer_norm_w.info()->is_resizable()); forget_layer_norm_w.allocator()->allocate(); cell_layer_norm_w.allocator()->allocate(); output_layer_norm_w.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!forget_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!cell_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output_layer_norm_w.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!forget_layer_norm_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!cell_layer_norm_w.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output_layer_norm_w.info()->is_resizable()); fill(AccessorType(forget_layer_norm_w), 23); fill(AccessorType(cell_layer_norm_w), 24); @@ -458,7 +457,6 @@ protected: } input_gate = reference::activation_layer(input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); } - // Compute cell_state SimpleTensor<T> fully_connected_cell_state = reference::fully_connected_layer(input, input_to_cell_w, cell_bias, output_cell_shape); transposed_weights = reference::transpose(recurrent_to_cell_w); @@ -474,12 +472,13 @@ protected: fill(cell_bias, 8); cell_state_out = reference::arithmetic_operation(reference::ArithmeticOperation::ADD, cell_state_out, cell_bias, data_type, ConvertPolicy::SATURATE); } - cell_state_out = reference::activation_layer(cell_state_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + cell_state_out = reference::activation_layer(cell_state_out, info); cell_state_out = reference::pixel_wise_multiplication<T, T, T>(cell_state_out, input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN, data_type); cell_state_out = reference::arithmetic_operation(reference::ArithmeticOperation::ADD, cell_state_out, pixelwise_mul, data_type, ConvertPolicy::SATURATE); + if(cell_threshold != 0.f) { - cell_state_out = reference::activation_layer(cell_state_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold)); + cell_state_out = reference::activation_layer(cell_state_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, cell_threshold, -cell_threshold)); } // Compute output @@ -515,7 +514,6 @@ protected: output_state_out = reference::activation_layer(fully_connected_projection, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold)); } } - std::vector<SimpleTensor<T>> scratch_inputs; if(!cifg_opt) { diff --git a/tests/validation/fixtures/LogicalFixture.h b/tests/validation/fixtures/LogicalFixture.h index 9f64d89d10..60dc963ba7 100644 --- a/tests/validation/fixtures/LogicalFixture.h +++ b/tests/validation/fixtures/LogicalFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,9 +60,9 @@ protected: { for(auto t : tensors) { - ARM_COMPUTE_EXPECT(t->info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(t->info()->is_resizable()); t->allocator()->allocate(); - ARM_COMPUTE_EXPECT(!t->info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!t->info()->is_resizable()); } } @@ -79,7 +79,6 @@ class LogicalBinaryOperationValidationFixture : public LogicalOperationValidatio using Parent = LogicalOperationValidationFixtureBase<TensorType, AccessorType, FunctionType, T>; public: - template <typename...> void setup(TensorShape shape0, TensorShape shape1) { Parent::_target = compute_target(shape0, shape1); @@ -135,7 +134,6 @@ class LogicalNotValidationFixture : public LogicalOperationValidationFixtureBase using Parent = LogicalOperationValidationFixtureBase<TensorType, AccessorType, FunctionType, T>; public: - template <typename...> void setup(TensorShape shape, DataType data_type) { Parent::_target = compute_target(shape, data_type); diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h new file mode 100644 index 0000000000..ffd12e56d0 --- /dev/null +++ b/tests/validation/fixtures/MatMulFixture.h @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_MATMULFIXTURE_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" + +#include "src/core/utils/quantization/AsymmHelpers.h" +#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ActivationLayer.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/GEMMLowp.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/ReshapeLayer.h" +#include "tests/validation/Validation.h" + +#include <limits> +#include <random> +#include <type_traits> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class MatMulGenericValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + int num_extra_runs, + Settings settings, + QuantizationInfo a_qinfo = QuantizationInfo(), + QuantizationInfo b_qinfo = QuantizationInfo(), + QuantizationInfo o_qinfo = QuantizationInfo()) + { + // For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. + if (transpose_a) + { + permute(shape_a, PermutationVector(1U, 0U)); + } + if (transpose_b) + { + permute(shape_b, PermutationVector(1U, 0U)); + } + + _target = compute_target(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, + num_extra_runs, settings, a_qinfo, b_qinfo, o_qinfo); + _reference = compute_reference(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, + a_qinfo, b_qinfo, o_qinfo); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) + { + switch (tensor.data_type()) + { + case DataType::BFLOAT16: + { + arm_compute::utils::uniform_real_distribution_16bit<bfloat16> distribution{float(lo), float(hi)}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{float(lo), float(hi)}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(lo, hi); + library->fill(tensor, distribution, i); + break; + } + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + { + library->fill_tensor_uniform(tensor, i); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported data type."); + } + } + } + + virtual TensorType compute_target(const TensorShape &shape_a, + const TensorShape &shape_b, + const TensorShape &output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + int num_extra_runs, + const Settings &settings, + QuantizationInfo a_qinfo, + QuantizationInfo b_qinfo, + QuantizationInfo o_qinfo) + { + // 1. Create Classes and configure function + // ---------------------------------------------------- + // Create tensors + // Configure relevant classes and matmul function + TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo); + TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, b_qinfo); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, o_qinfo); + + FunctionType matmul; + + // Configure MatMulInfo class + MatMulInfo mm_info; + mm_info.adj_lhs(transpose_a).adj_rhs(transpose_b); + + // Ensure values are dynamic + a.info()->set_are_values_constant(false); + b.info()->set_are_values_constant(false); + + // Configure operator + matmul.configure(&a, &b, &dst, mm_info, settings, act_info); + + // Assertions + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + a.allocator()->allocate(); + b.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // For multiple runs. + for (int i = 0; i < num_extra_runs; i++) + { + // Stress dynamic tensors by running multiple times. + // -------------------------------------------------------- + // Fill tensors with new seed + // Run function + const int seed_offset = num_extra_runs * 100; + fill(AccessorType(a), seed_offset); + fill(AccessorType(b), seed_offset + 1); + + matmul.run(); + } + + // 2. Final Run for reference comparison + // -------------------------------------------------------- + // Re-fill tensors same seed as reference run + // Compute MatMul operation + fill(AccessorType(a), 2); + fill(AccessorType(b), 3); + + matmul.run(); + + return dst; + } + + template <typename TT> + typename std::enable_if < !std::is_integral<TT>::value, SimpleTensor<TT >>::type + compute_reference_gemm(const SimpleTensor<TT> &a, + const SimpleTensor<TT> &b, + const SimpleTensor<TT> &c, + float alpha, + float beta, + const QuantizationInfo &o_qinfo) + { + ARM_COMPUTE_UNUSED(o_qinfo); + + return reference::gemm(a, b, c, alpha, beta); + } + + template <typename TT> + typename std::enable_if<std::is_integral<TT>::value, SimpleTensor<TT>>::type + compute_reference_gemm(const SimpleTensor<TT> &a, + const SimpleTensor<TT> &b, + const SimpleTensor<TT> &c, + float alpha, + float beta, + const QuantizationInfo &o_qinfo) + { + ARM_COMPUTE_UNUSED(alpha, beta); + + const auto aq = a.quantization_info().uniform(); + const auto bq = b.quantization_info().uniform(); + const auto oq = o_qinfo.uniform(); + + const auto multiplier = aq.scale * bq.scale / oq.scale; + + int32_t output_multiplier = 0; + int32_t output_shift = 0; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + std::vector<int32_t> output_multipliers{output_multiplier}; + std::vector<int32_t> output_shifts{output_shift}; + + //The lhs and rhs offsets are negated here to keep the reference aligned with the function implementation where the lhs and rhs offsets are also negated. + const auto tmp = reference::gemmlowp_matrix_multiply_core<int32_t>(a, b, c.shape(), -aq.offset, -bq.offset); + + auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TT>( + tmp, output_multipliers, output_shifts, oq.offset, std::numeric_limits<int32_t>::lowest(), + std::numeric_limits<int32_t>::max()); + output.quantization_info(o_qinfo); + + return output; + } + + SimpleTensor<T> compute_reference(const TensorShape &a_shape, + const TensorShape &b_shape, + const TensorShape &output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + QuantizationInfo a_qinfo, + QuantizationInfo b_qinfo, + QuantizationInfo o_qinfo) + { + // We collapse dimensions > 2 onto dimension 2, i.e. 4D+ tensors will look like 3D + // This is necessary unless we choose to extend gemm reference for 4D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape a_shape_collapsed = a_shape.collapsed_from(Window::DimZ); + TensorShape b_shape_collapsed = b_shape.collapsed_from(Window::DimZ); + + // Create reference + SimpleTensor<T> a{a_shape_collapsed, data_type, 1, a_qinfo}; + SimpleTensor<T> b{b_shape_collapsed, data_type, 1, b_qinfo}; + SimpleTensor<T> c{output_shape_collapsed, data_type, 1}; + + // Fill reference + fill(a, 2); + fill(b, 3); + + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if transpose_a is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if transpose_b is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape()); + a_transposed_shape.set(0, a.shape().y()); + a_transposed_shape.set(1, a.shape().x()); + + TensorShape b_transposed_shape(b.shape()); + b_transposed_shape.set(0, b.shape().y()); + b_transposed_shape.set(1, b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{a_transposed_shape, data_type}; + SimpleTensor<T> b_transposed{b_transposed_shape, data_type}; + + // pretranspose a if necessary + if (transpose_a) + { + a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U)); + } + // pretranspose b if necessary + if (transpose_b) + { + b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U)); + } + + // Setting beta to 0 will effectively disable C for the + // computation of the reference: alpha * A * B + 0 * C + // Use transposed tensors if boolean enabled else use original tensors + auto result = compute_reference_gemm<T>((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, + 1.0f, 0.f, o_qinfo); + + result = reference::activation_layer<T>(result, act_info, o_qinfo); + + // We reshape the gemm output back if the tensor is high dimensional + if (output_shape_collapsed != output_shape) + { + result = reference::reshape_layer(result, output_shape); + } + + return result; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +/// TODO: (ONCPUML-1451) The current state of this fixture is interim and a longer-term testing method will be implemented later. +/// @note: Currently we support only a 2x2 test due to the lack of reorder ref. implementation. +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class MatMulFixedFormatFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + TensorType compute_target(const TensorShape &shape_a, + const TensorShape &shape_b, + const TensorShape &output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + int num_extra_runs, + const Settings &settings, + QuantizationInfo a_qinfo, + QuantizationInfo b_qinfo, + QuantizationInfo o_qinfo) override + { + // 1. Create Classes and configure function + // ---------------------------------------------------- + // Create tensors + // Configure relevant classes and matmul function + TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo); + TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, b_qinfo); + TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, o_qinfo); + + const auto weight_tensor_info = TensorInfo(*b.info()); + const TensorInfo new_tensor_info = prepare_weights(weight_tensor_info); + TensorType weights_transformed = create_tensor<TensorType>(new_tensor_info); + + // Configure MatMulInfo class + MatMulInfo mm_info; + mm_info.adj_lhs(transpose_a).adj_rhs(transpose_b); + + // Ensure values are dynamic + a.info()->set_are_values_constant(false); + b.info()->set_are_values_constant(false); + weights_transformed.info()->set_are_values_constant(false); + + FunctionType matmul; + + // Configure operator + matmul.configure(&a, &weights_transformed, &dst, mm_info, settings, act_info); + + // Assertions + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights_transformed.info()->is_resizable()); + + // Allocate tensors + a.allocator()->allocate(); + b.allocator()->allocate(); + dst.allocator()->allocate(); + weights_transformed.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights_transformed.info()->is_resizable()); + + // For multiple runs. + for (int i = 0; i < num_extra_runs; i++) + { + // Stress dynamic tensors by running multiple times. + // -------------------------------------------------------- + // Fill tensors with new seed + // Run function + const int seed_offset = num_extra_runs * 100; + this->fill(AccessorType(a), seed_offset); + this->fill(AccessorType(b), seed_offset + 1); + + matmul.run(); + } + + // 2. Final Run for reference comparison + // -------------------------------------------------------- + // Re-fill tensors same seed as reference run + // Compute MatMul operation + this->fill(AccessorType(a), 2); + this->fill(AccessorType(b), 3); + + rearrange_data(AccessorType(b), AccessorType(weights_transformed)); + + matmul.run(); + + return dst; + } + + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + int num_extra_runs, + Settings settings, + QuantizationInfo a_qinfo, + QuantizationInfo b_qinfo, + QuantizationInfo o_qinfo) + { + if (CPUInfo::get().has_bf16()) + { + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, settings, + a_qinfo, b_qinfo, o_qinfo); + } + } + +private: + TensorInfo prepare_weights(const TensorInfo tensor_info) + { + const DataLayout data_layout = tensor_info.data_layout(); + ARM_COMPUTE_EXPECT(data_layout == DataLayout::NCHW, framework::LogLevel::ERRORS); + const DataType data_type = tensor_info.data_type(); + const TensorShape tensor_shape = tensor_info.tensor_shape(); + const int H = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)]; + const int W = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)]; + ARM_COMPUTE_EXPECT(H <= 2 && W <= 2, framework::LogLevel::ERRORS); + + arm_compute::Strides strides_in_bytes = tensor_info.strides_in_bytes(); + strides_in_bytes.set(1, 32); + strides_in_bytes.set(2, 32); + + const size_t offset_first_element_in_bytes = tensor_info.offset_first_element_in_bytes(); + const size_t total_size_in_bytes = 32; + + const TensorShape TS(H, W); + + TensorInfo new_tensor_info = tensor_info; + new_tensor_info.init(TS, tensor_info.num_channels(), data_type, strides_in_bytes, offset_first_element_in_bytes, + total_size_in_bytes); + + return new_tensor_info; + } + + void rearrange_data(const AccessorType src, AccessorType dst) + { + const TensorShape src_tensor_shape = src.shape(); + const DataLayout data_layout = src.data_layout(); + ARM_COMPUTE_EXPECT(data_layout == DataLayout::NCHW, framework::LogLevel::ERRORS); + const unsigned int O = + src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O + const unsigned int H = + src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)]; + const unsigned int W = + src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)]; + const unsigned int I = + src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I + ARM_COMPUTE_EXPECT(H <= 2 && W <= 2, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(I == 1 && O == 1, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(src.num_elements() <= dst.num_elements(), framework::LogLevel::ERRORS); + + const T *src_ptr = reinterpret_cast<const T *>(src.data()); + T *dst_ptr = reinterpret_cast<T *>(dst.data()); + + // rearrange indexes for 2x2 input and weight + int dst_idx[] = {0, 4, 1, 5}; + for (int i = 0; i < 4; i++) + { + dst_ptr[dst_idx[i]] = src_ptr[i]; + } + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class MatMulValidationFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type) + { + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, ActivationLayerInfo(), 0, Settings()); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class MatMulValidationWithDynamicTensorsFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + int num_extra_runs) + { + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings()); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class QuantizedMatMulValidationFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info, + int num_extra_runs, + QuantizationInfo a_qinfo, + QuantizationInfo b_qinfo, + QuantizationInfo o_qinfo) + { + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings(), + a_qinfo, b_qinfo, o_qinfo); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class MatMulValidationWithActivationFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo act_info) + { + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, 0, Settings()); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class MatMulValidationWithActivationAlphaBetaFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo::ActivationFunction function, + float alpha_beta) + { + ActivationLayerInfo act_info(function, alpha_beta, alpha_beta); + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, 0, Settings()); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename Settings, typename T> +class QuantizedMatMulValidationWithActivationFixture + : public MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T> +{ +public: + void setup(TensorShape shape_a, + TensorShape shape_b, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + DataType data_type, + ActivationLayerInfo::ActivationFunction function, + float alpha_beta, + int num_extra_runs, + QuantizationInfo a_qinfo, + QuantizationInfo b_qinfo, + QuantizationInfo o_qinfo) + { + ActivationLayerInfo act_info(function, alpha_beta, alpha_beta); + MatMulGenericValidationFixture<TensorType, AccessorType, FunctionType, Settings, T>::setup( + shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings(), + a_qinfo, b_qinfo, o_qinfo); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_MATMULFIXTURE_H diff --git a/tests/validation/fixtures/MatMulKernelFixture.h b/tests/validation/fixtures/MatMulKernelFixture.h new file mode 100644 index 0000000000..26072dff65 --- /dev/null +++ b/tests/validation/fixtures/MatMulKernelFixture.h @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H + +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/CL/Helper.h" +#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/Validation.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/GEMMLowp.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/ReshapeLayer.h" +#include <cmath> +#include <random> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using namespace arm_compute::opencl::kernels; + +template <typename T, typename KernelType, bool use_mmul = false> +class MatMulKernelGenericValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type, + bool enable_bias) + { + // This hash is used by random generators. There may be hash collisions but + // this is intentional as it's a very easy way to make the the current + // random generation process almost different for many test configurations, + // which were using the same set of values before. + _hash = M0 + N0 + K0 + shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1] + enable_bias + export_rhs_to_cl_image; + + // Flag to create a bias + _enable_bias = enable_bias; + + // For brevity, the input shapes are assumed to be not-transposed for both Lhs and Rhs matrices. + QuantizationInfo lhs_q_info; + QuantizationInfo rhs_q_info; + QuantizationInfo dst_q_info; + + if(is_data_type_quantized(data_type)) + { + const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max()); + const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution<float> distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + lhs_q_info = QuantizationInfo(scale_lhs, offset_lhs); + rhs_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + const int m = shape_a.y(); + const int n = shape_b.x(); + const int k = shape_a.x(); + + const float bias_fraction = enable_bias ? 0.5f : 0.f; + + QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(lhs_q_info, rhs_q_info, m, n, k, data_type, bias_fraction); + dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + } + + if(pretranspose_a) + { + permute(shape_a, PermutationVector(1U, 0U)); + } + + if(pretranspose_b) + { + permute(shape_b, PermutationVector(1U, 0U)); + } + + // Skip configurations unsupported by the device. + _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + if(!_device_supports_export_to_cl_image && export_rhs_to_cl_image) + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs. + } + + _device_supports_mmul = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()); + if(!_device_supports_mmul && use_mmul) + { + ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs. + } + + _target = compute_target(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, lhs_q_info, rhs_q_info, dst_q_info); + _reference = compute_reference(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, data_type, lhs_q_info, rhs_q_info, dst_q_info); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) + { + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(lo), float(hi) }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(lo, hi); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + template <typename U> + void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max) + { + std::uniform_int_distribution<int32_t> distribution(min, max); + library->fill(tensor, distribution, i); + } + + template <typename U, typename D> + void fill_constant(U &&tensor, D value) + { + library->fill_tensor_value(tensor, value); + } + + CLTensor compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, const int M0, const int N0, const int K0, + bool export_rhs_to_cl_image, DataType data_type, const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, const QuantizationInfo &dst_q_info) + { + CLSynthetizeOperator<KernelType> matMul{}; + MatMulKernelInfo matmul_info; + matmul_info.adj_lhs = pretranspose_a; + matmul_info.adj_rhs = pretranspose_b; + matmul_info.m0 = M0; + matmul_info.n0 = N0; + matmul_info.k0 = K0; + matmul_info.export_rhs_to_cl_image = export_rhs_to_cl_image; + + bool is_quantized = is_data_type_quantized(data_type); + + // Create tensors + CLTensor a = create_tensor<CLTensor>(shape_a, data_type, 1, lhs_q_info); + CLTensor b = create_tensor<CLTensor>(shape_b, data_type, 1, rhs_q_info); + CLTensor bias = create_tensor<CLTensor>(output_shape[0], (is_quantized) ? DataType::S32 : data_type, 1, dst_q_info); + CLTensor dst = create_tensor<CLTensor>(output_shape, data_type, 1, dst_q_info); + + matMul.configure(a.info(), b.info(), (_enable_bias) ? bias.info() : nullptr, dst.info(), matmul_info); + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + a.allocator()->allocate(); + b.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!a.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!b.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(CLAccessor(a), _hash + 1); + fill(CLAccessor(b), _hash + 2); + + // Compute matMul kernel + ITensorPack tensors_pack({ { ACL_SRC_0, &a }, + { ACL_SRC_1, &b }, + { ACL_DST, &dst } + }); + + if(_enable_bias) + { + // Allocate, fill and add bias to TensorPack obj + bias.allocator()->allocate(); + if(is_quantized) + { + fill_bias_s32(CLAccessor(bias), _hash + 3, _min_bias, _max_bias); + } + else + { + fill(CLAccessor(bias), _hash + 3); + } + tensors_pack.add_tensor(ACL_SRC_2, &bias); + } + + matMul.run(tensors_pack); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type, + const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, const QuantizationInfo &dst_q_info) + { + // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D + // This is necessary unless we choose to extend gemm reference for 5D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ); + TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ); + + // Create reference + SimpleTensor<T> a{ shape_a_collapsed, data_type, 1, lhs_q_info }; + SimpleTensor<T> b{ shape_b_collapsed, data_type, 1, rhs_q_info }; + SimpleTensor<T> c{ output_shape_collapsed, data_type, 1, dst_q_info }; + + // Fill reference + fill(a, _hash + 1); + fill(b, _hash + 2); + + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape()); + a_transposed_shape.set(0, a.shape().y()); + a_transposed_shape.set(1, a.shape().x()); + + TensorShape b_transposed_shape(b.shape()); + b_transposed_shape.set(0, b.shape().y()); + b_transposed_shape.set(1, b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{ a_transposed_shape, data_type }; + SimpleTensor<T> b_transposed{ b_transposed_shape, data_type }; + + // pretranspose a if necessary + if(pretranspose_a) + { + a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U)); + } + + // pretranspose b if necessary + if(pretranspose_b) + { + b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U)); + } + + // Use transposed tensors if boolean enabled else use original tensors + SimpleTensor<T> result = gemm_reference<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c); + + // We reshape the gemm output back if the tensor is high dimensional + if(output_shape_collapsed != output_shape) + { + result = reference::reshape_layer(result, output_shape); + } + + return result; + } + + template <typename U = T> + typename std::enable_if < std::is_same<U, float>::value || std::is_same<U, half>::value, SimpleTensor<U >>::type gemm_reference(SimpleTensor<U> &a, SimpleTensor<U> &b, SimpleTensor<U> &c) + { + // Fill bias, then copy first dimension into subsequent dimensions to mimic broadcast + // of bias tensor from shape [dst.dimension(0)] to [dst.tensor_shape()] in target kernel + if(_enable_bias) + { + fill(c, _hash + 3); + const int n = c.shape().x(); + const int other_dims = c.shape().collapsed_from(1)[1]; + for(int i = 1; i < other_dims; ++i) // For all data, copy first n elements into remaining batches + { + memcpy(c.data() + i * n, c.data(), n * sizeof(T)); + } + } + // Setting beta to 0 will effectively disable C for the + // computation of the reference: alpha * A * B + 0 * C + return reference::gemm<U>(a, b, c, 1.0f, (_enable_bias) ? 1.0f : 0.f); + } + + template <typename U = T> + typename std::enable_if < std::is_same<U, int8_t>::value || std::is_same<U, uint8_t>::value, SimpleTensor<U >>::type gemm_reference(SimpleTensor<U> &a, SimpleTensor<U> &b, SimpleTensor<U> &c) + { + const UniformQuantizationInfo aq = a.quantization_info().uniform(); + const UniformQuantizationInfo bq = b.quantization_info().uniform(); + const UniformQuantizationInfo cq = c.quantization_info().uniform(); + + const SimpleTensor<int32_t> result = reference::gemmlowp_matrix_multiply_core<int32_t, U, U>(a, b, c.shape(), -aq.offset, -bq.offset); + + std::vector<int32_t> gemmlowp_multipliers{ 1 }; + std::vector<int32_t> gemmlowp_shifts{ 1 }; + const int gemmlowp_offset = cq.offset; + const float scale = aq.scale * bq.scale / cq.scale; + + quantization::calculate_quantized_multiplier(scale, &gemmlowp_multipliers[0], &gemmlowp_shifts[0]); + constexpr int32_t gemmlowp_min_bound = std::numeric_limits<int32_t>::min(); + constexpr int32_t gemmlowp_max_bound = std::numeric_limits<int32_t>::max(); + + SimpleTensor<int> bias{ c.shape(), DataType::S32 }; + if(_enable_bias) + { + // Identical to float implementation, fill and copy values of bias first dimension + fill_bias_s32(bias, _hash + 3, _min_bias, _max_bias); + const int n = bias.shape().x(); + const int other_dims = bias.shape().collapsed_from(1)[1]; + const unsigned int dt_size = sizeof(int32_t); + for(int i = 1; i < other_dims; ++i) + { + memcpy(bias.data() + i * n, bias.data(), n * dt_size); + } + } + else + { + fill_constant(bias, static_cast<int32_t>(0)); // effectively disable bias + } + + const SimpleTensor<U> final_result = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, U>(result, bias, + gemmlowp_multipliers, gemmlowp_shifts, gemmlowp_offset, gemmlowp_min_bound, gemmlowp_max_bound); + + return final_result; + } + + CLTensor _target{}; + SimpleTensor<T> _reference{}; + bool _enable_bias{ false }; + bool _device_supports_export_to_cl_image{ true }; + bool _device_supports_mmul{ true }; + int32_t _min_bias{ 0 }; + int32_t _max_bias{ 0 }; + int32_t _hash{ 0 }; +}; + +template <typename T, typename KernelType, bool use_mmul = false> +class MatMulKernelValidationFixture : public MatMulKernelGenericValidationFixture<T, KernelType, use_mmul> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + { + MatMulKernelGenericValidationFixture<T, KernelType, use_mmul>::setup(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, + false /* enable bias */); + } +}; + +template <typename T, typename KernelType, bool use_mmul = false> +class MatMulKernelWithBiasValidation : public MatMulKernelGenericValidationFixture<T, KernelType, use_mmul> +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + { + MatMulKernelGenericValidationFixture<T, KernelType, use_mmul>::setup(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, + true /* enable bias */); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H diff --git a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h index 7c118da319..808e3ffabd 100644 --- a/tests/validation/fixtures/MaxUnpoolingLayerFixture.h +++ b/tests/validation/fixtures/MaxUnpoolingLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename PoolingFunctionTy class MaxUnpoolingLayerValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout) { std::mt19937 gen(library->seed()); @@ -106,9 +105,9 @@ protected: MaxUnpoolingFunctionType unpool_layer; unpool_layer.configure(&dst, &indices, &unpooled, pool_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(indices.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(indices.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -116,10 +115,10 @@ protected: indices.allocator()->allocate(); unpooled.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!indices.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!unpooled.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!indices.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!unpooled.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -150,7 +149,6 @@ template <typename TensorType, typename AccessorType, typename F1, typename F2, class MaxUnpoolingLayerValidationFixture : public MaxUnpoolingLayerValidationGenericFixture<TensorType, AccessorType, F1, F2, T> { public: - template <typename...> void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, DataType data_type, DataLayout data_layout) { MaxUnpoolingLayerValidationGenericFixture<TensorType, AccessorType, F1, F2, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, true), diff --git a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h index 5d11d1f8e2..bf5d20790c 100644 --- a/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h +++ b/tests/validation/fixtures/MeanStdDevNormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,30 +44,35 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class MeanStdDevNormalizationLayerValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape shape, DataType dt, bool in_place, float epsilon = 1e-8f) + void setup(TensorShape shape, DataType dt, bool in_place, float epsilon = 1e-8) { - _data_type = dt; - _target = compute_target(shape, dt, in_place, epsilon); - _reference = compute_reference(shape, dt, epsilon); + QuantizationInfo qi = QuantizationInfo(0.5f, 10); + _data_type = dt; + _target = compute_target(shape, dt, in_place, epsilon, qi); + _reference = compute_reference(shape, dt, epsilon, qi); } protected: template <typename U> - void fill(U &&src_tensor) + void fill(U &&tensor) { - static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported."); - using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type; - - DistributionType distribution{ T(-1.0f), T(1.0f) }; - library->fill(src_tensor, distribution, 0); + if(is_data_type_float(_data_type)) + { + std::uniform_real_distribution<> distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, 0); + } + else + { + std::uniform_int_distribution<> distribution{ 0, 255 }; + library->fill(tensor, distribution, 0); + } } - TensorType compute_target(TensorShape shape, DataType dt, bool in_place, float epsilon) + TensorType compute_target(TensorShape shape, DataType dt, bool in_place, float epsilon, QuantizationInfo qi) { // Create tensors - TensorType src = create_tensor<TensorType>(shape, dt, 1); - TensorType dst; + TensorType src = create_tensor<TensorType>(shape, dt, 1, qi); + TensorType dst = create_tensor<TensorType>(shape, dt, 1, qi); TensorType *dst_ptr = in_place ? &src : &dst; @@ -75,17 +80,17 @@ protected: FunctionType norm; norm.configure(&src, dst_ptr, epsilon); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); if(!in_place) { dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); } // Fill tensors @@ -104,10 +109,10 @@ protected: } } - SimpleTensor<T> compute_reference(const TensorShape &shape, DataType dt, float epsilon) + SimpleTensor<T> compute_reference(const TensorShape &shape, DataType dt, float epsilon, QuantizationInfo qi) { // Create reference - SimpleTensor<T> ref_src{ shape, dt, 1 }; + SimpleTensor<T> ref_src{ shape, dt, 1, qi }; // Fill reference fill(ref_src); @@ -119,6 +124,7 @@ protected: SimpleTensor<T> _reference{}; DataType _data_type{}; }; + } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/NonMaxSuppressionFixture.h b/tests/validation/fixtures/NonMaxSuppressionFixture.h index 6d5fc437ce..043b4731aa 100644 --- a/tests/validation/fixtures/NonMaxSuppressionFixture.h +++ b/tests/validation/fixtures/NonMaxSuppressionFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType> class NMSValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, unsigned int max_output_size, float score_threshold, float nms_threshold) { ARM_COMPUTE_ERROR_ON(max_output_size == 0); @@ -77,18 +76,18 @@ protected: FunctionType nms_func; nms_func.configure(&bboxes, &scores, &indices, max_output_size, score_threshold, nms_threshold); - ARM_COMPUTE_EXPECT(bboxes.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(indices.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(scores.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bboxes.info()->is_resizable()); + ARM_COMPUTE_ASSERT(indices.info()->is_resizable()); + ARM_COMPUTE_ASSERT(scores.info()->is_resizable()); // Allocate tensors bboxes.allocator()->allocate(); indices.allocator()->allocate(); scores.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!bboxes.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!indices.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!scores.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bboxes.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!indices.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!scores.info()->is_resizable()); // Fill tensors fill(AccessorType(bboxes), 0, 0.f, 1.f); diff --git a/tests/validation/fixtures/NormalizationLayerFixture.h b/tests/validation/fixtures/NormalizationLayerFixture.h index 54570de64f..ddaa3533f5 100644 --- a/tests/validation/fixtures/NormalizationLayerFixture.h +++ b/tests/validation/fixtures/NormalizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class NormalizationValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, DataLayout data_layout) { NormalizationLayerInfo info(norm_type, norm_size, 5, beta, 1.f, is_scaled); @@ -81,15 +80,15 @@ protected: FunctionType norm_layer; norm_layer.configure(&src, &dst, info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -119,7 +118,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class NormalizationValidationFixture : public NormalizationValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, DataLayout data_layout) { NormalizationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, norm_type, norm_size, beta, is_scaled, data_type, data_layout); diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h index 3249ccc4bb..5f2c865950 100644 --- a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h +++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class NormalizePlanarYUVLayerValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape0, TensorShape shape1, DataType dt, DataLayout data_layout, QuantizationInfo quantization_info) { _data_type = dt; @@ -97,10 +96,10 @@ protected: FunctionType norm; norm.configure(&src, &dst, &mean, &std); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(std.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(std.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -108,10 +107,10 @@ protected: mean.allocator()->allocate(); std.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!mean.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!std.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!mean.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!std.info()->is_resizable()); // Fill tensors fill(AccessorType(src), AccessorType(mean), AccessorType(std)); @@ -144,7 +143,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class NormalizePlanarYUVLayerValidationFixture : public NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape0, TensorShape shape1, DataType dt, DataLayout data_layout) { NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, dt, data_layout, QuantizationInfo()); @@ -155,7 +153,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class NormalizePlanarYUVLayerValidationQuantizedFixture : public NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape0, TensorShape shape1, DataType dt, DataLayout data_layout, QuantizationInfo quantization_info) { NormalizePlanarYUVLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, dt, data_layout, quantization_info); diff --git a/tests/validation/fixtures/PadLayerFixture.h b/tests/validation/fixtures/PadLayerFixture.h index 2279c8b2b3..93b43616ff 100644 --- a/tests/validation/fixtures/PadLayerFixture.h +++ b/tests/validation/fixtures/PadLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PaddingFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, const PaddingList &padding, const PaddingMode mode) { PaddingList clamped_padding = padding; @@ -94,15 +93,15 @@ protected: FunctionType padding; padding.configure(&src, &dst, paddings, const_value, mode); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/PermuteFixture.h b/tests/validation/fixtures/PermuteFixture.h index 9bbc0cba5e..b1b3845a8d 100644 --- a/tests/validation/fixtures/PermuteFixture.h +++ b/tests/validation/fixtures/PermuteFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PermuteValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, PermutationVector perm, DataType data_type) { _target = compute_target(input_shape, data_type, perm); @@ -73,15 +72,15 @@ protected: FunctionType perm_func; perm_func.configure(&src, &dst, perm); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h index 4eb83859ac..4345d8a13f 100644 --- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h +++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationGenericValidationFixture : public framework::Fixture { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, @@ -76,24 +75,45 @@ protected: QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info) { // Create tensors - TensorType src1 = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0); - TensorType src2 = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1); - TensorType dst = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), dt_out, 1, qinfo_out); + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + TensorType src1 = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0); + TensorType src2 = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1); + TensorType dst = create_tensor<TensorType>(out_shape, dt_out, 1, qinfo_out); + + // Check whether do in-place computation and whether inputs are broadcast compatible + TensorType *actual_dst = &dst; + if(_is_inplace) + { + bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (dt_in1 == dt_out); + bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (dt_in2 == dt_out); + bool do_in_place = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace); + ARM_COMPUTE_ASSERT(do_in_place); + + if(src1_is_inplace) + { + actual_dst = &src1; + } + else + { + actual_dst = &src2; + } + } auto allocate_tensor = [](TensorType & t) { - ARM_COMPUTE_EXPECT(t.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(t.info()->is_resizable()); t.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!t.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!t.info()->is_resizable()); }; // Create and configure function FunctionType multiply; - multiply.configure(&src1, &src2, (_is_inplace ? &src1 : &dst), scale, convert_policy, rounding_policy, act_info); + multiply.configure(&src1, &src2, actual_dst, scale, convert_policy, rounding_policy, act_info); allocate_tensor(src1); allocate_tensor(src2); + // If don't do in-place computation, still need to allocate original dst if(!_is_inplace) { allocate_tensor(dst); @@ -106,12 +126,7 @@ protected: // Compute function multiply.run(); - if(_is_inplace) - { - return src1; - } - - return dst; + return std::move(*actual_dst); } SimpleTensor<T3> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out, @@ -122,16 +137,12 @@ protected: SimpleTensor<T1> src1{ shape0, dt_in1, 1, qinfo0 }; SimpleTensor<T2> src2{ shape1, dt_in2, 1, qinfo1 }; - // current in-place implementation only supports same metadata of input and output tensors. - // By ignoring output quantization information here, we can make test cases implementation much simpler. - QuantizationInfo output_qinfo = _is_inplace ? qinfo0 : qinfo_out; - // Fill reference fill(src1, 0); fill(src2, 1); - auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy, rounding_policy, dt_out, output_qinfo); - return act_info.enabled() ? reference::activation_layer(result, act_info, output_qinfo) : result; + auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy, rounding_policy, dt_out, qinfo_out); + return act_info.enabled() ? reference::activation_layer(result, act_info, qinfo_out) : result; } TensorType _target{}; @@ -143,7 +154,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationValidationFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3> { public: - template <typename...> void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, bool is_inplace) { PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>::setup(shape, shape, dt_in1, dt_in2, dt_out, scale, convert_policy, rounding_policy, @@ -155,7 +165,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationBroadcastValidationFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, bool is_inplace) { @@ -168,7 +177,17 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationValidationFloatFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2> { public: - template <typename...> + void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, ActivationLayerInfo act_info, bool is_inplace) + { + PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, shape, dt_in1, dt_in2, dt_in2, scale, convert_policy, rounding_policy, + QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> +class PixelWiseMultiplicationValidationIntegerFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2> +{ +public: void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, ActivationLayerInfo act_info, bool is_inplace) { PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, shape, dt_in1, dt_in2, dt_in2, scale, convert_policy, rounding_policy, @@ -180,7 +199,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationBroadcastValidationFloatFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, ActivationLayerInfo act_info, bool is_inplace) { @@ -193,7 +211,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationValidationQuantizedFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3> { public: - template <typename...> void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { @@ -206,7 +223,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PixelWiseMultiplicationBroadcastValidationQuantizedFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2, T3> { public: - template <typename...> void setup(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace) { diff --git a/tests/validation/fixtures/Pooling3dLayerFixture.h b/tests/validation/fixtures/Pooling3dLayerFixture.h new file mode 100644 index 0000000000..1bdf615fb1 --- /dev/null +++ b/tests/validation/fixtures/Pooling3dLayerFixture.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2022-2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE +#define ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/Tensor.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/Pooling3dLayer.h" +#include <random> +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class Pooling3dLayerValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, Pooling3dLayerInfo pool_info, DataType data_type, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo()) + { + _target = compute_target(shape, pool_info, data_type, input_qinfo, output_qinfo); + _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo); + } + +protected: + template <typename U> + void fill(U &&tensor) + { + if(tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, 0); + } + else if(tensor.data_type() == DataType::F16) + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, 0); + } + else // data type is quantized_asymmetric + { + library->fill_tensor_uniform(tensor, 0); + } + } + + TensorType compute_target(TensorShape shape, Pooling3dLayerInfo info, + DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo) + { + // Create tensors + TensorType src = create_tensor<TensorType>(shape, data_type, 1, input_qinfo, DataLayout::NDHWC); + const TensorShape dst_shape = misc::shape_calculator::compute_pool3d_shape((src.info()->tensor_shape()), info); + TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, output_qinfo, DataLayout::NDHWC); + + // Create and configure function + FunctionType pool_layer; + pool_layer.validate(src.info(), dst.info(), info); + pool_layer.configure(&src, &dst, info); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src)); + + // Compute function + pool_layer.run(); + return dst; + } + + SimpleTensor<T> compute_reference(TensorShape shape, Pooling3dLayerInfo info, DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo) + { + // Create reference + SimpleTensor<T> src(shape, data_type, 1, input_qinfo, DataLayout::NDHWC); + // Fill reference + fill(src); + return reference::pooling_3d_layer<T>(src, info, output_qinfo); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class Pooling3dLayerValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, PoolingType pool_type, Size3D pool_size, Size3D stride, Padding3D padding, bool exclude_padding, DataType data_type) + { + Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type, pool_size, stride, padding, exclude_padding), + data_type); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class Pooling3dLayerValidationQuantizedFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, PoolingType pool_type, Size3D pool_size, Size3D stride, Padding3D padding, bool exclude_padding, DataType data_type, + QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo()) + { + Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type, pool_size, stride, padding, exclude_padding), + data_type, input_qinfo, output_qinfo); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class Pooling3dLayerGlobalValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, PoolingType pool_type, DataType data_type) + { + Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, Pooling3dLayerInfo(pool_type), data_type); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class SpecialPooling3dLayerValidationFixture : public Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape src_shape, Pooling3dLayerInfo pool_info, DataType data_type) + { + Pooling3dLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_POOLING_3D_LAYER_FIXTURE */ diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h index af078d4ce3..59c920868b 100644 --- a/tests/validation/fixtures/PoolingLayerFixture.h +++ b/tests/validation/fixtures/PoolingLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,16 +45,31 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PoolingLayerValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout, bool indices = false, - QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo()) + QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo(), bool mixed_layout = false) { - _pool_info = pool_info; - _target = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices); - _reference = compute_reference(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices); + _mixed_layout = mixed_layout; + _pool_info = pool_info; + _target = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices); + _reference = compute_reference(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices); } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + const DataLayout data_layout = src.info()->data_layout(); + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout); + dst.info()->set_data_layout(data_layout); + } + template <typename U> void fill(U &&tensor) { @@ -94,25 +109,33 @@ protected: FunctionType pool_layer; pool_layer.configure(&src, &dst, info, (indices) ? &_target_indices : nullptr); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(_target_indices.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(_target_indices.info()->is_resizable()); + + add_padding_x({ &src, &dst, &_target_indices }, data_layout); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); _target_indices.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!_target_indices.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!_target_indices.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); - // Compute function - pool_layer.run(); - + if(_mixed_layout) + { + mix_layout(pool_layer, src, dst); + } + else + { + // Compute function + pool_layer.run(); + } return dst; } @@ -129,6 +152,7 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; PoolingLayerInfo _pool_info{}; + bool _mixed_layout{ false }; TensorType _target_indices{}; SimpleTensor<uint32_t> _ref_indices{}; }; @@ -136,23 +160,22 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PoolingLayerIndicesValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> - void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout) + void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout, bool use_kernel_indices) { - PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding), + PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding, false, + true, use_kernel_indices), data_type, data_layout, true); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class PoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout) { PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding), - data_type, data_layout); + data_type, data_layout, false, mixed_layout); } }; @@ -160,7 +183,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PoolingLayerValidationMixedPrecisionFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout, bool fp_mixed_precision = false) { PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding, fp_mixed_precision), @@ -168,16 +190,15 @@ public: } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class PoolingLayerValidationQuantizedFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout = DataLayout::NCHW, QuantizationInfo input_qinfo = QuantizationInfo(), QuantizationInfo output_qinfo = QuantizationInfo()) { PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding), - data_type, data_layout, false, input_qinfo, output_qinfo); + data_type, data_layout, false, input_qinfo, output_qinfo, mixed_layout); } }; @@ -185,10 +206,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SpecialPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape src_shape, PoolingLayerInfo pool_info, DataType data_type) { - PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, DataLayout::NCHW); + PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, pool_info.data_layout); } }; @@ -196,7 +216,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, PoolingType pool_type, DataType data_type, DataLayout data_layout = DataLayout::NCHW) { PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, data_layout), data_type, data_layout); diff --git a/tests/validation/fixtures/PriorBoxLayerFixture.h b/tests/validation/fixtures/PriorBoxLayerFixture.h index ef18c0d787..0a76cfd155 100644 --- a/tests/validation/fixtures/PriorBoxLayerFixture.h +++ b/tests/validation/fixtures/PriorBoxLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class PriorBoxLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, PriorBoxLayerInfo info, DataType data_type, DataLayout data_layout) { TensorInfo input_info(input_shape, 1, data_type); @@ -73,18 +72,18 @@ protected: FunctionType prior_box; prior_box.configure(&src1, &src2, &dst, info); - ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src1.allocator()->allocate(); src2.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src1.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!src2.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Compute function prior_box.run(); diff --git a/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h b/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h index 0cf2ef04f7..e864b4affe 100644 --- a/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h +++ b/tests/validation/fixtures/QLSTMLayerNormalizationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 Arm Limited. + * Copyright (c) 2020-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class QLSTMLayerNormalizationValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weight_shape, TensorShape bias_shape, DataType data_type, QuantizationInfo weight_qinfo) { ARM_COMPUTE_ERROR_ON(data_type != DataType::QSYMM16); @@ -91,9 +90,9 @@ protected: { for(auto t : tensors) { - ARM_COMPUTE_EXPECT(t->info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(t->info()->is_resizable()); t->allocator()->allocate(); - ARM_COMPUTE_EXPECT(!t->info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!t->info()->is_resizable()); } } diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h index 4f46f99ff5..1b21967bda 100644 --- a/tests/validation/fixtures/QuantizationLayerFixture.h +++ b/tests/validation/fixtures/QuantizationLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class QuantizationValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type_in, DataType data_type_out, QuantizationInfo qinfo, QuantizationInfo qinfo_in) { _target = compute_target(shape, data_type_in, data_type_out, qinfo, qinfo_in); @@ -70,15 +69,15 @@ protected: FunctionType quantization_layer; quantization_layer.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -108,7 +107,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class QuantizationValidationFixture : public QuantizationValidationGenericFixture<TensorType, AccessorType, FunctionType, Tin, Tout> { public: - template <typename...> void setup(TensorShape shape, DataType data_type_in, DataType data_type_out, QuantizationInfo qinfo) { QuantizationValidationGenericFixture<TensorType, AccessorType, FunctionType, Tin, Tout>::setup(shape, data_type_in, data_type_out, qinfo, QuantizationInfo()); diff --git a/tests/validation/fixtures/RNNLayerFixture.h b/tests/validation/fixtures/RNNLayerFixture.h index 394d91cd6f..e9a05e7838 100644 --- a/tests/validation/fixtures/RNNLayerFixture.h +++ b/tests/validation/fixtures/RNNLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class RNNLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape recurrent_weights_shape, TensorShape bias_shape, TensorShape output_shape, ActivationLayerInfo info, DataType data_type) { @@ -76,12 +75,12 @@ protected: FunctionType rnn; rnn.configure(&input, &weights, &recurrent_weights, &bias, &hidden_state, &output, info); - ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(recurrent_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(hidden_state.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(recurrent_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(hidden_state.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); // Allocate tensors input.allocator()->allocate(); @@ -91,12 +90,12 @@ protected: hidden_state.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!recurrent_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!hidden_state.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!recurrent_weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!hidden_state.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors fill(AccessorType(input), 0); diff --git a/tests/validation/fixtures/ROIAlignLayerFixture.h b/tests/validation/fixtures/ROIAlignLayerFixture.h index c631c24cff..ad76dcbbd9 100644 --- a/tests/validation/fixtures/ROIAlignLayerFixture.h +++ b/tests/validation/fixtures/ROIAlignLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ROIAlignLayerGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo) { _rois_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::QASYMM16 : data_type; @@ -138,18 +137,18 @@ protected: FunctionType roi_align_layer; roi_align_layer.configure(&src, &rois_tensor, &dst, pool_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(rois_tensor.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rois_tensor.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); rois_tensor.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!rois_tensor.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rois_tensor.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -189,7 +188,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ROIAlignLayerFixture : public ROIAlignLayerGenericFixture<TensorType, AccessorType, FunctionType, T, TRois> { public: - template <typename...> void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout) { ROIAlignLayerGenericFixture<TensorType, AccessorType, FunctionType, T, TRois>::setup(input_shape, pool_info, rois_shape, data_type, data_layout, @@ -201,7 +199,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ROIAlignLayerQuantizedFixture : public ROIAlignLayerGenericFixture<TensorType, AccessorType, FunctionType, T, TRois> { public: - template <typename...> void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo) { diff --git a/tests/validation/fixtures/ROIPoolingLayerFixture.h b/tests/validation/fixtures/ROIPoolingLayerFixture.h new file mode 100644 index 0000000000..4b46a6176d --- /dev/null +++ b/tests/validation/fixtures/ROIPoolingLayerFixture.h @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_ROIPOOLINGLAYER_FIXTURE +#define ARM_COMPUTE_TEST_ROIPOOLINGLAYER_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/ROIPoolingLayer.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ROIPoolingLayerGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo) + { + _target = compute_target(input_shape, data_type, data_layout, pool_info, rois_shape, qinfo, output_qinfo); + _reference = compute_reference(input_shape, data_type, pool_info, rois_shape, qinfo, output_qinfo); + } + +protected: + template <typename U> + void fill(U &&tensor) + { + library->fill_tensor_uniform(tensor, 0); + } + + template <typename U> + void generate_rois(U &&rois, const TensorShape &shape, const ROIPoolingLayerInfo &pool_info, TensorShape rois_shape, DataLayout data_layout = DataLayout::NCHW) + { + const size_t values_per_roi = rois_shape.x(); + const size_t num_rois = rois_shape.y(); + + std::mt19937 gen(library->seed()); + uint16_t *rois_ptr = static_cast<uint16_t *>(rois.data()); + + const float pool_width = pool_info.pooled_width(); + const float pool_height = pool_info.pooled_height(); + const float roi_scale = pool_info.spatial_scale(); + + // Calculate distribution bounds + const auto scaled_width = static_cast<float>((shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)] / roi_scale) / pool_width); + const auto scaled_height = static_cast<float>((shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)] / roi_scale) / pool_height); + const auto min_width = static_cast<float>(pool_width / roi_scale); + const auto min_height = static_cast<float>(pool_height / roi_scale); + + // Create distributions + std::uniform_int_distribution<int> dist_batch(0, shape[3] - 1); + std::uniform_int_distribution<> dist_x1(0, scaled_width); + std::uniform_int_distribution<> dist_y1(0, scaled_height); + std::uniform_int_distribution<> dist_w(min_width, std::max(float(min_width), (pool_width - 2) * scaled_width)); + std::uniform_int_distribution<> dist_h(min_height, std::max(float(min_height), (pool_height - 2) * scaled_height)); + + for(unsigned int pw = 0; pw < num_rois; ++pw) + { + const auto batch_idx = dist_batch(gen); + const auto x1 = dist_x1(gen); + const auto y1 = dist_y1(gen); + const auto x2 = x1 + dist_w(gen); + const auto y2 = y1 + dist_h(gen); + + rois_ptr[values_per_roi * pw] = batch_idx; + rois_ptr[values_per_roi * pw + 1] = static_cast<uint16_t>(x1); + rois_ptr[values_per_roi * pw + 2] = static_cast<uint16_t>(y1); + rois_ptr[values_per_roi * pw + 3] = static_cast<uint16_t>(x2); + rois_ptr[values_per_roi * pw + 4] = static_cast<uint16_t>(y2); + } + } + + TensorType compute_target(TensorShape input_shape, + DataType data_type, + DataLayout data_layout, + const ROIPoolingLayerInfo &pool_info, + const TensorShape rois_shape, + const QuantizationInfo &qinfo, + const QuantizationInfo &output_qinfo) + { + const QuantizationInfo rois_qinfo = is_data_type_quantized(data_type) ? QuantizationInfo(0.125f, 0) : QuantizationInfo(); + + // Create tensors + TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, qinfo, data_layout); + TensorType rois_tensor = create_tensor<TensorType>(rois_shape, _rois_data_type, 1, rois_qinfo); + + // Initialise shape and declare output tensor dst + const TensorShape dst_shape; + TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, output_qinfo, data_layout); + + // Create and configure function + FunctionType roi_pool_layer; + roi_pool_layer.configure(&src, &rois_tensor, &dst, pool_info); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(rois_tensor.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + rois_tensor.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!rois_tensor.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src)); + generate_rois(AccessorType(rois_tensor), input_shape, pool_info, rois_shape, data_layout); + + // Compute function + roi_pool_layer.run(); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + DataType data_type, + const ROIPoolingLayerInfo &pool_info, + const TensorShape rois_shape, + const QuantizationInfo &qinfo, + const QuantizationInfo &output_qinfo) + { + // Create reference tensor + SimpleTensor<T> src{ input_shape, data_type, 1, qinfo }; + const QuantizationInfo rois_qinfo = is_data_type_quantized(data_type) ? QuantizationInfo(0.125f, 0) : QuantizationInfo(); + SimpleTensor<uint16_t> rois_tensor{ rois_shape, _rois_data_type, 1, rois_qinfo }; + + // Fill reference tensor + fill(src); + generate_rois(rois_tensor, input_shape, pool_info, rois_shape); + + return reference::roi_pool_layer(src, rois_tensor, pool_info, output_qinfo); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + const DataType _rois_data_type{ DataType::U16 }; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ROIPoolingLayerQuantizedFixture : public ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, + DataLayout data_layout, QuantizationInfo qinfo, QuantizationInfo output_qinfo) + { + ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, pool_info, rois_shape, + data_type, data_layout, qinfo, output_qinfo); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ROIPoolingLayerFixture : public ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, const ROIPoolingLayerInfo pool_info, TensorShape rois_shape, DataType data_type, DataLayout data_layout) + { + ROIPoolingLayerGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, pool_info, rois_shape, data_type, data_layout, + QuantizationInfo(), QuantizationInfo()); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif /* ARM_COMPUTE_TEST_ROIPOOLINGLAYER_FIXTURE */
\ No newline at end of file diff --git a/tests/validation/fixtures/RangeFixture.h b/tests/validation/fixtures/RangeFixture.h index 0713db9237..166613a318 100644 --- a/tests/validation/fixtures/RangeFixture.h +++ b/tests/validation/fixtures/RangeFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -55,7 +55,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class RangeFixture : public framework::Fixture { public: - template <typename...> void setup(const DataType data_type0, float start, float step, const QuantizationInfo qinfo0 = QuantizationInfo()) { _target = compute_target(data_type0, qinfo0, start, step); @@ -113,11 +112,11 @@ protected: FunctionType range_func; range_func.configure(&dst, start, end, step); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Compute function range_func.run(); diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h index 36bf14b27e..e61941435c 100644 --- a/tests/validation/fixtures/ReduceMeanFixture.h +++ b/tests/validation/fixtures/ReduceMeanFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReduceMeanValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output) { _target = compute_target(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output); @@ -92,15 +91,15 @@ protected: FunctionType reduction_mean; reduction_mean.configure(&src, axis, keep_dims, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -124,7 +123,13 @@ protected: { TensorShape output_shape = i == 0 ? src_shape : out.shape(); output_shape.set(axis[i], 1); - out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, quantization_info_output); + bool is_opencl = false; + +#ifdef ARM_COMPUTE_OPENCL_ENABLED + is_opencl = std::is_same<CLTensor, TensorType>::value; // Round down to zero on opencl to match kernel +#endif /* ARM_COMPUTE_OPENCL_ENABLED */ + out = reference::reduction_operation<T, T>(i == 0 ? src : out, output_shape, axis[i], ReductionOperation::MEAN_SUM, data_type, quantization_info_output, + is_opencl ? RoundingPolicy::TO_ZERO : RoundingPolicy::TO_NEAREST_UP); } if(!keep_dims) @@ -133,7 +138,7 @@ protected: std::sort(axis.begin(), axis.begin() + axis.num_dimensions()); for(unsigned int i = 0; i < axis.num_dimensions(); ++i) { - output_shape.remove_dimension(axis[i] - i); + output_shape.remove_dimension(axis[i] - i, false); } out = reference::reshape_layer(out, output_shape); @@ -149,7 +154,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReduceMeanQuantizedFixture : public ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims, QuantizationInfo quantization_info_input, QuantizationInfo quantization_info_output) { ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, quantization_info_input, quantization_info_output); @@ -160,7 +164,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReduceMeanFixture : public ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, Coordinates axis, bool keep_dims) { ReduceMeanValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, keep_dims, QuantizationInfo(), QuantizationInfo()); diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h index f3d653e6dc..b44f299486 100644 --- a/tests/validation/fixtures/ReductionOperationFixture.h +++ b/tests/validation/fixtures/ReductionOperationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReductionOperationValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info, bool keep_dims = false) { const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); @@ -76,14 +75,14 @@ protected: if(tensor.data_type() == DataType::QASYMM8) { std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, 0); } else if(tensor.data_type() == DataType::QASYMM8_SIGNED) { std::pair<int, int> bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution<int8_t> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, 0); } @@ -108,15 +107,15 @@ protected: FunctionType reduction_func; reduction_func.configure(&src, &dst, axis, op, _keep_dims); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -135,7 +134,7 @@ protected: // Fill reference fill(src); - return reference::reduction_operation<T, T>(src, dst_shape, axis, op, quantization_info); + return reference::reduction_operation<T, T>(src, dst_shape, axis, op, data_type, quantization_info); } TensorType _target{}; @@ -149,7 +148,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReductionOperationQuantizedFixture : public ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info = QuantizationInfo(), bool keep_dims = false) { ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, quantization_info, keep_dims); @@ -160,7 +158,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReductionOperationFixture : public ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, bool keep_dims = false) { ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, QuantizationInfo(), keep_dims); diff --git a/tests/validation/fixtures/RemapFixture.h b/tests/validation/fixtures/RemapFixture.h deleted file mode 100644 index e851cdb4df..0000000000 --- a/tests/validation/fixtures/RemapFixture.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2017-2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_REMAP_FIXTURE -#define ARM_COMPUTE_TEST_REMAP_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" -#include "tests/framework/Fixture.h" -#include "tests/validation/reference/Remap.h" - -#include <random> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class RemapValidationFixture : public framework::Fixture -{ -public: - template <typename...> - void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode) - { - std::mt19937 gen(library->seed()); - std::uniform_int_distribution<uint8_t> distribution(0, 255); - const T constant_border_value = static_cast<T>(distribution(gen)); - - _target = compute_target(shape, policy, data_type, border_mode, constant_border_value); - _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value); - } - -protected: - template <typename U> - void fill(U &&tensor, int i, float min, float max) - { - std::uniform_int_distribution<> distribution((int)min, (int)max); - library->fill(tensor, distribution, i); - } - - TensorType compute_target(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value) - { - // Create tensors - TensorType src = create_tensor<TensorType>(shape, data_type); - TensorType map_x = create_tensor<TensorType>(shape, DataType::F32); - TensorType map_y = create_tensor<TensorType>(shape, DataType::F32); - TensorType dst = create_tensor<TensorType>(shape, data_type); - - // Create and configure function - FunctionType remap; - remap.configure(&src, &map_x, &map_y, &dst, policy, border_mode, constant_border_value); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(map_x.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(map_y.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - src.allocator()->allocate(); - map_x.allocator()->allocate(); - map_y.allocator()->allocate(); - dst.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!map_x.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!map_y.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - fill(AccessorType(src), 0, 0, 255); - fill(AccessorType(map_x), 1, -5, shape.x() + 5); - fill(AccessorType(map_y), 2, -5, shape.y() + 5); - - // Compute function - remap.run(); - - return dst; - } - - SimpleTensor<T> compute_reference(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value) - { - ARM_COMPUTE_ERROR_ON(data_type != DataType::U8); - - // Create reference - SimpleTensor<T> src{ shape, data_type }; - SimpleTensor<float> map_x{ shape, DataType::F32 }; - SimpleTensor<float> map_y{ shape, DataType::F32 }; - - // Create the valid mask Tensor - _valid_mask = SimpleTensor<T> { shape, data_type }; - - // Fill reference - fill(src, 0, 0, 255); - fill(map_x, 1, -5, shape.x() + 5); - fill(map_y, 2, -5, shape.y() + 5); - - // Compute reference - return reference::remap<T>(src, map_x, map_y, _valid_mask, policy, border_mode, constant_border_value); - } - - TensorType _target{}; - SimpleTensor<T> _reference{}; - SimpleTensor<T> _valid_mask{}; -}; -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_REMAP_FIXTURE */ diff --git a/tests/validation/fixtures/ReorderFixture.h b/tests/validation/fixtures/ReorderFixture.h new file mode 100644 index 0000000000..8e28484c48 --- /dev/null +++ b/tests/validation/fixtures/ReorderFixture.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/Reorder.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** [ReorderLayer fixture] **/ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ReorderValidationFixture : public framework::Fixture +{ +public: + void check_hardware_supports(WeightFormat output_wf){ + if(!Scheduler::get().cpu_info().has_sve() && output_wf!=WeightFormat::OHWIo4){ + _hardware_supports = false; + } + if (Scheduler::get().cpu_info().has_sve() && arm_gemm::utils::get_vector_length<float>() != 8 && output_wf==WeightFormat::OHWIo8) + { + _hardware_supports = false; + } + } + + void setup(TensorShape input_shape, TensorShape output_shape, WeightFormat input_wf, WeightFormat output_wf, DataType data_type) + { + check_hardware_supports(output_wf); + if (_hardware_supports){ + _target = compute_target(input_shape, output_shape, input_wf, output_wf, data_type); + _reference = compute_reference(input_shape, output_shape, output_wf, data_type); + } + } + + protected: + template <typename U> + void fill(U &&tensor) + { + library->fill_tensor_uniform(tensor, 0); + } + + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, WeightFormat input_wf, WeightFormat output_wf, DataType data_type) + { + // Create tensors + TensorType src = create_tensor<TensorType>(input_shape, data_type); + TensorType dst = create_tensor<TensorType>(output_shape, data_type); + + // Create and configure function + FunctionType reorder; + + reorder.configure(&src, &dst, input_wf, output_wf); + + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + + // Fill tensors + fill(AccessorType(src)); + + // Compute function + reorder.run(); + + return dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, WeightFormat output_wf, DataType data_type) + { + // Create reference + SimpleTensor<T> src{ input_shape, data_type }; + + // Fill reference + fill(src); + + return reference::reorder_layer<T>(src, output_shape, output_wf); + } + + bool _hardware_supports = true; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; +/** [ReorderLayer fixture] **/ +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_REORDERFIXTURE_H diff --git a/tests/validation/fixtures/ReorgLayerFixture.h b/tests/validation/fixtures/ReorgLayerFixture.h index 630802214a..f87017190e 100644 --- a/tests/validation/fixtures/ReorgLayerFixture.h +++ b/tests/validation/fixtures/ReorgLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReorgLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, int32_t stride, DataType data_type, DataLayout data_layout) { _target = compute_target(input_shape, stride, data_type, data_layout); @@ -74,15 +73,15 @@ protected: reorg.configure(&src, &dst, stride); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/ReshapeLayerFixture.h b/tests/validation/fixtures/ReshapeLayerFixture.h index a89a94741f..5be431f8cf 100644 --- a/tests/validation/fixtures/ReshapeLayerFixture.h +++ b/tests/validation/fixtures/ReshapeLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_RESHAPE_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_RESHAPE_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_RESHAPELAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_RESHAPELAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -31,6 +31,7 @@ #include "tests/IAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" #include "tests/validation/reference/ReshapeLayer.h" namespace arm_compute @@ -41,13 +42,12 @@ namespace validation { /** [ReshapeLayer fixture] **/ template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class ReshapeLayerValidationFixture : public framework::Fixture +class ReshapeLayerGenericValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type) + void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type, bool add_x_padding = false) { - _target = compute_target(input_shape, output_shape, data_type); + _target = compute_target(input_shape, output_shape, data_type, add_x_padding); _reference = compute_reference(input_shape, output_shape, data_type); } @@ -58,10 +58,10 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type, bool add_x_padding = false) { // Check if indeed the input shape can be reshape to the output one - ARM_COMPUTE_EXPECT(input_shape.total_size() == output_shape.total_size(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size()); // Create tensors TensorType src = create_tensor<TensorType>(input_shape, data_type); @@ -72,15 +72,21 @@ protected: reshape.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + if(add_x_padding) + { + // Add random padding in x dimension + add_padding_x({ &src, &dst }); + } // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); @@ -105,8 +111,27 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; }; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ReshapeLayerValidationFixture : public ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type) + { + ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, output_shape, data_type); + } +}; +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ReshapeLayerPaddedValidationFixture : public ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type) + { + ReshapeLayerGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, output_shape, data_type, true /* add_x_padding */); + } +}; /** [ReshapeLayer fixture] **/ } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_RESHAPE_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_RESHAPELAYERFIXTURE_H diff --git a/tests/validation/fixtures/ReverseFixture.h b/tests/validation/fixtures/ReverseFixture.h index 4982cae578..856bff7b12 100644 --- a/tests/validation/fixtures/ReverseFixture.h +++ b/tests/validation/fixtures/ReverseFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_REVERSE_FIXTURE -#define ARM_COMPUTE_TEST_REVERSE_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_REVERSEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_REVERSEFIXTURE_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" @@ -45,11 +45,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ReverseValidationFixture : public framework::Fixture { public: - template <typename...> - void setup(TensorShape shape, TensorShape axis_shape, DataType data_type) + void setup(TensorShape shape, TensorShape axis_shape, DataType data_type, bool use_negative_axis = false, bool use_inverted_axis = false) { - _target = compute_target(shape, axis_shape, data_type); - _reference = compute_reference(shape, axis_shape, data_type); + _num_dims = shape.num_dimensions(); + _target = compute_target(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis); + _reference = compute_reference(shape, axis_shape, data_type, use_negative_axis, use_inverted_axis); } protected: @@ -58,16 +58,25 @@ protected: { library->fill_tensor_uniform(tensor, 0); } - std::vector<int> generate_random_axis() + std::vector<int32_t> generate_random_axis(bool use_negative = false) { - std::vector<int> axis_v = { 0, 1, 2, 3 }; - std::mt19937 g(0); + std::vector<int32_t> axis_v; + if(use_negative) + { + axis_v = { -1, -2, -3, -4 }; + } + else + { + axis_v = { 0, 1, 2, 3 }; + } + axis_v = std::vector<int32_t>(axis_v.begin(), axis_v.begin() + _num_dims); + std::mt19937 g(library->seed()); std::shuffle(axis_v.begin(), axis_v.end(), g); return axis_v; } - TensorType compute_target(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type) + TensorType compute_target(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type, bool use_negative_axis, bool use_inverted_axis = false) { // Create tensors TensorType src = create_tensor<TensorType>(shape, data_type, 1); @@ -76,27 +85,27 @@ protected: // Create and configure function FunctionType reverse_func; - reverse_func.configure(&src, &dst, &axis); + reverse_func.configure(&src, &dst, &axis, use_inverted_axis); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(axis.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(axis.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); axis.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!axis.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!axis.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); { auto axis_data = AccessorType(axis); - auto axis_v = generate_random_axis(); - std::copy(axis_v.begin(), axis_v.begin() + axis_shape.x(), static_cast<int32_t *>(axis_data.data())); + auto axis_v = generate_random_axis(use_negative_axis); + std::copy(axis_v.begin(), axis_v.begin() + axis_shape.total_size(), static_cast<int32_t *>(axis_data.data())); } // Compute function @@ -105,24 +114,25 @@ protected: return dst; } - SimpleTensor<T> compute_reference(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type) + SimpleTensor<T> compute_reference(const TensorShape &shape, const TensorShape &axis_shape, DataType data_type, bool use_negative_axis, bool use_inverted_axis = false) { // Create reference - SimpleTensor<T> src{ shape, data_type }; - SimpleTensor<uint32_t> axis{ axis_shape, DataType::U32 }; + SimpleTensor<T> src{ shape, data_type }; + SimpleTensor<int32_t> axis{ axis_shape, DataType::S32 }; // Fill reference fill(src); - auto axis_v = generate_random_axis(); - std::copy(axis_v.begin(), axis_v.begin() + axis_shape.x(), axis.data()); + auto axis_v = generate_random_axis(use_negative_axis); + std::copy(axis_v.begin(), axis_v.begin() + axis_shape.total_size(), axis.data()); - return reference::reverse<T>(src, axis); + return reference::reverse<T>(src, axis, use_inverted_axis); } TensorType _target{}; SimpleTensor<T> _reference{}; + unsigned int _num_dims{}; }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_REVERSE_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_REVERSEFIXTURE_H diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h index dd521470e6..86d89d71f7 100644 --- a/tests/validation/fixtures/ScaleFixture.h +++ b/tests/validation/fixtures/ScaleFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,15 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_SCALE_FIXTURE -#define ARM_COMPUTE_TEST_SCALE_FIXTURE - -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" +#ifndef ACL_TESTS_VALIDATION_FIXTURES_SCALEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_SCALEFIXTURE_H + +#include "tests/framework/Asserts.h" // Required for ARM_COMPUTE_ASSERT #include "tests/framework/Fixture.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/Scale.h" @@ -44,22 +39,23 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ScaleValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, - bool align_corners) + bool align_corners, bool mixed_layout, QuantizationInfo output_quantization_info) { - _shape = shape; - _policy = policy; - _border_mode = border_mode; - _sampling_policy = sampling_policy; - _data_type = data_type; - _quantization_info = quantization_info; - _align_corners = align_corners; + _shape = shape; + _policy = policy; + _border_mode = border_mode; + _sampling_policy = sampling_policy; + _data_type = data_type; + _input_quantization_info = quantization_info; + _output_quantization_info = output_quantization_info; + _align_corners = align_corners; + _mixed_layout = mixed_layout; generate_scale(shape); - std::mt19937 generator(library->seed()); - std::uniform_int_distribution<uint8_t> distribution_u8(0, 255); + std::mt19937 generator(library->seed()); + std::uniform_int_distribution<uint32_t> distribution_u8(0, 255); _constant_border_value = static_cast<T>(distribution_u8(generator)); _target = compute_target(shape, data_layout); @@ -67,6 +63,21 @@ public: } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + const DataLayout data_layout = src.info()->data_layout(); + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout); + dst.info()->set_data_layout(data_layout); + } + void generate_scale(const TensorShape &shape) { static constexpr float _min_scale{ 0.25f }; @@ -128,7 +139,7 @@ protected: } // Create tensors - TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _quantization_info, data_layout); + TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, data_layout); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); @@ -136,40 +147,48 @@ protected: TensorShape shape_scaled(shape); shape_scaled.set(idx_width, shape[idx_width] * _scale_x, /* apply_dim_correction = */ false); shape_scaled.set(idx_height, shape[idx_height] * _scale_y, /* apply_dim_correction = */ false); - TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, _quantization_info, data_layout); + TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, _output_quantization_info, data_layout); // Create and configure function FunctionType scale; scale.configure(&src, &dst, ScaleKernelInfo{ _policy, _border_mode, _constant_border_value, _sampling_policy, /* use_padding */ false, _align_corners }); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &dst }, data_layout); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); - // Compute function - scale.run(); - + if(_mixed_layout) + { + mix_layout(scale, src, dst); + } + else + { + // Compute function + scale.run(); + } return dst; } SimpleTensor<T> compute_reference(const TensorShape &shape) { // Create reference - SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info }; + SimpleTensor<T> src{ shape, _data_type, 1, _input_quantization_info }; // Fill reference fill(src); - return reference::scale<T>(src, _scale_x, _scale_y, _policy, _border_mode, _constant_border_value, _sampling_policy, /* ceil_policy_scale */ false, _align_corners); + return reference::scale<T>(src, _scale_x, _scale_y, _policy, _border_mode, _constant_border_value, _sampling_policy, /* ceil_policy_scale */ false, _align_corners, _output_quantization_info); } TensorType _target{}; @@ -180,17 +199,18 @@ protected: T _constant_border_value{}; SamplingPolicy _sampling_policy{}; DataType _data_type{}; - QuantizationInfo _quantization_info{}; + QuantizationInfo _input_quantization_info{}; + QuantizationInfo _output_quantization_info{}; bool _align_corners{ false }; + bool _mixed_layout{ false }; float _scale_x{ 1.f }; float _scale_y{ 1.f }; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class ScaleValidationQuantizedFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, bool align_corners) { @@ -201,14 +221,35 @@ public: policy, border_mode, sampling_policy, - align_corners); + align_corners, + mixed_layout, + quantization_info); } }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> +class ScaleValidationDifferentOutputQuantizedFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, DataType data_type, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, InterpolationPolicy policy, + BorderMode border_mode, SamplingPolicy sampling_policy, + bool align_corners) + { + ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, + data_type, + input_quantization_info, + data_layout, + policy, + border_mode, + sampling_policy, + align_corners, + mixed_layout, + output_quantization_info); + } +}; +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class ScaleValidationFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, bool align_corners) { ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, @@ -218,10 +259,12 @@ public: policy, border_mode, sampling_policy, - align_corners); + align_corners, + mixed_layout, + QuantizationInfo()); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SCALE_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_SCALEFIXTURE_H diff --git a/tests/validation/fixtures/ScatterLayerFixture.h b/tests/validation/fixtures/ScatterLayerFixture.h new file mode 100644 index 0000000000..af161ef98b --- /dev/null +++ b/tests/validation/fixtures/ScatterLayerFixture.h @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H + +#include "arm_compute/core/Utils.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "tests/Globals.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/Validation.h" +#include "tests/validation/reference/ScatterLayer.h" +#include "tests/SimpleTensor.h" + +#include <random> +#include <cstdint> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ScatterGenericValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape src_shape, TensorShape updates_shape, TensorShape indices_shape, + TensorShape out_shape, DataType data_type, ScatterInfo scatter_info, bool inplace, bool padding, + QuantizationInfo src_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) + { + // this is for improving randomness across tests + _hash = src_shape[0] + src_shape[1] + src_shape[2] + src_shape[3] + src_shape[4] + src_shape[5] + + updates_shape[0] + updates_shape[1] + updates_shape[2] + updates_shape[3] + + updates_shape[4] + updates_shape[5] + + indices_shape[0] + indices_shape[1] + indices_shape[2] + indices_shape[3]; + + _target = compute_target(src_shape, updates_shape, indices_shape, out_shape, data_type, scatter_info, inplace, padding, src_qinfo, o_qinfo); + _reference = compute_reference(src_shape, updates_shape, indices_shape, out_shape, data_type,scatter_info, src_qinfo , o_qinfo); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch(tensor.data_type()) + { + case DataType::F32: + case DataType::F16: + { + std::uniform_real_distribution<float> distribution(-10.f, 10.f); + library->fill(tensor, distribution, i); + break; + } + case DataType::S32: + case DataType::S16: + case DataType::S8: + { + std::uniform_int_distribution<int32_t> distribution(-100, 100); + library->fill(tensor, distribution, i); + break; + } + case DataType::U32: + case DataType::U16: + case DataType::U8: + { + std::uniform_int_distribution<uint32_t> distribution(0, 200); + library->fill(tensor, distribution, i); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported data type."); + } + } + } + + // This is used to fill indices tensor with S32 datatype. + // Used to prevent ONLY having values that are out of bounds. + template <typename U> + void fill_indices(U &&tensor, int i, const TensorShape &shape) + { + // Calculate max indices the shape should contain. Add an arbitrary value to allow testing for some out of bounds values (In this case min dimension) + const int32_t max = std::min({shape[0] , shape[1], shape[2]}) + 1; + library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(0), static_cast<int32_t>(max)); + } + + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, + const TensorShape &out_shape, DataType data_type, const ScatterInfo info, bool inplace, bool padding, + QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) + { + // 1. Create relevant tensors using ScatterInfo data structure. + // ---------------------------------------------------- + // In order - src, updates, indices, output. + TensorType src = create_tensor<TensorType>(shape_a, data_type, 1, a_qinfo); + TensorType updates = create_tensor<TensorType>(shape_b, data_type, 1, a_qinfo); + TensorType indices = create_tensor<TensorType>(shape_c, DataType::S32, 1, QuantizationInfo()); + TensorType dst = create_tensor<TensorType>(out_shape, data_type, 1, o_qinfo); + + FunctionType scatter; + + // Configure operator + // When scatter_info.zero_initialization is true, pass nullptr for src + // because dst does not need to be initialized with src values. + if(info.zero_initialization) + { + scatter.configure(nullptr, &updates, &indices, &dst, info); + } + else + { + if(inplace) + { + scatter.configure(&src, &updates, &indices, &src, info); + } + else + { + scatter.configure(&src, &updates, &indices, &dst, info); + } + } + + // Assertions + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(updates.info()->is_resizable()); + ARM_COMPUTE_ASSERT(indices.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + if(padding) + { + add_padding_x({ &src, &updates, &indices}); + + if(!inplace) + { + add_padding_x({ &dst }); + } + } + + // Allocate tensors + src.allocator()->allocate(); + updates.allocator()->allocate(); + indices.allocator()->allocate(); + + if(!inplace) + { + dst.allocator()->allocate(); + } + + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!updates.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!indices.info()->is_resizable()); + + if(!inplace) + { + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); + } + + // Fill update (a) and indices (b) tensors. + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(updates), 1+ _hash); + fill_indices(AccessorType(indices), 2 + _hash, out_shape); + + scatter.run(); + + if(inplace) + { + return src; + } + else + { + return dst; + } + } + + SimpleTensor<T> compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &c_shape, + const TensorShape &out_shape, DataType data_type, ScatterInfo info, QuantizationInfo a_qinfo, QuantizationInfo o_qinfo) + { + // Output Quantization not currently in use - fixture should be extended to support this. + ARM_COMPUTE_UNUSED(o_qinfo); + TensorShape src_shape = a_shape; + TensorShape updates_shape = b_shape; + TensorShape indices_shape = c_shape; + const int num_ind_dims = c_shape.num_dimensions(); + + // 1. Collapse batch index into a single dim if necessary for update tensor and indices tensor. + if(num_ind_dims >= 3) + { + indices_shape = indices_shape.collapsed_from(1); + updates_shape = updates_shape.collapsed_from(updates_shape.num_dimensions() - (num_ind_dims -1)); // Collapses batch dims + } + + // 2. Collapse data dims into a single dim. + // Collapse all src dims into 2 dims. First one holding data, the other being the index we iterate over. + src_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse all data dims into single dim. + src_shape = src_shape.collapsed_from(1); // Collapse all index dims into a single dim + updates_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse data dims (all except last dim which is batch dim) + + // Create reference tensors + SimpleTensor<T> src{ src_shape, data_type, 1, a_qinfo }; + SimpleTensor<T> updates{updates_shape, data_type, 1, QuantizationInfo() }; + SimpleTensor<int32_t> indices{ indices_shape, DataType::S32, 1, QuantizationInfo() }; + + // Fill reference + fill(src, 0 + _hash); + fill(updates, 1 + _hash); + fill_indices(indices, 2 + _hash, out_shape); + + // Calculate individual reference using collapsed shapes + return reference::scatter_layer<T>(src, updates, indices, out_shape, info); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + int32_t _hash{}; +}; + +// This fixture will use the same shape for updates as indices. +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class ScatterValidationFixture : public ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape src_shape, TensorShape update_shape, TensorShape indices_shape, + TensorShape out_shape, DataType data_type, ScatterFunction func, bool zero_init, bool inplace, bool padding) + { + ScatterGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, update_shape, + indices_shape, out_shape, data_type, ScatterInfo(func, zero_init), inplace, padding, + QuantizationInfo(), QuantizationInfo()); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_SCATTERLAYERFIXTURE_H diff --git a/tests/validation/fixtures/ScharrFixture.h b/tests/validation/fixtures/ScharrFixture.h index 204ffc6767..b54a9d29e6 100644 --- a/tests/validation/fixtures/ScharrFixture.h +++ b/tests/validation/fixtures/ScharrFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -66,7 +66,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class ScharrValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension) { // Generate a random constant value @@ -120,18 +119,18 @@ protected: ARM_COMPUTE_ERROR("Gradient dimension not supported"); } - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst_x.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst_y.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst_x.allocator()->allocate(); dst_y.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst_x.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst_y.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst_x.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst_y.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/SelectFixture.h b/tests/validation/fixtures/SelectFixture.h index 96a7c8666a..8cb6f062f9 100644 --- a/tests/validation/fixtures/SelectFixture.h +++ b/tests/validation/fixtures/SelectFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,7 +63,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SelectValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, bool has_same_same_rank, DataType data_type) { TensorShape condition_shape = detail::select_condition_shape(shape, has_same_same_rank); @@ -97,10 +96,10 @@ protected: FunctionType select; select.configure(&c_t, &x_t, &y_t, &dst_t); - ARM_COMPUTE_EXPECT(c_t.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(x_t.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(y_t.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst_t.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(c_t.info()->is_resizable()); + ARM_COMPUTE_ASSERT(x_t.info()->is_resizable()); + ARM_COMPUTE_ASSERT(y_t.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst_t.info()->is_resizable()); // Allocate tensors c_t.allocator()->allocate(); @@ -108,10 +107,10 @@ protected: y_t.allocator()->allocate(); dst_t.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!c_t.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!x_t.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!y_t.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst_t.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!c_t.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!x_t.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!y_t.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst_t.info()->is_resizable()); // Fill tensors fill_bool(AccessorType(c_t), 0); diff --git a/tests/validation/fixtures/SliceOperationsFixtures.h b/tests/validation/fixtures/SliceOperationsFixtures.h index c1e046e427..b1f91ea2e0 100644 --- a/tests/validation/fixtures/SliceOperationsFixtures.h +++ b/tests/validation/fixtures/SliceOperationsFixtures.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" -#include "tests/RawLutAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" @@ -46,7 +45,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SliceFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, Coordinates starts, Coordinates ends, DataType data_type) { _target = compute_target(shape, starts, ends, data_type); @@ -70,15 +68,15 @@ protected: FunctionType slice; slice.configure(&src, &dst, starts, ends); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); @@ -109,7 +107,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class StridedSliceFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, Coordinates starts, Coordinates ends, BiStrides strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_mask, @@ -139,15 +136,15 @@ protected: FunctionType strided_slice; strided_slice.configure(&src, &dst, starts, ends, strides, begin_mask, end_mask, shrink_mask); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h index c39ab740cd..f4bf8df9c0 100644 --- a/tests/validation/fixtures/SoftmaxLayerFixture.h +++ b/tests/validation/fixtures/SoftmaxLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SoftmaxValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta, size_t axis) { _quantization_info = quantization_info; @@ -91,15 +90,15 @@ protected: FunctionType smx_layer; smx_layer.configure(&src, &dst, beta, axis); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -131,7 +130,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SoftmaxValidationFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, float beta, size_t axis) { SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG>::setup(shape, @@ -146,7 +144,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SoftmaxValidationQuantizedFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG> { public: - template <typename...> void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta, size_t axis) { SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T, IS_LOG>::setup(shape, diff --git a/tests/validation/fixtures/SpaceToBatchFixture.h b/tests/validation/fixtures/SpaceToBatchFixture.h index c4076e6ed6..964e511301 100644 --- a/tests/validation/fixtures/SpaceToBatchFixture.h +++ b/tests/validation/fixtures/SpaceToBatchFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,7 +39,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SpaceToBatchLayerValidationGenericFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape paddings_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info) { @@ -79,10 +78,10 @@ protected: FunctionType space_to_batch; space_to_batch.configure(&input, &block_shape, &paddings, &output); - ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(block_shape.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(paddings.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(block_shape.info()->is_resizable()); + ARM_COMPUTE_ASSERT(paddings.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); // Allocate tensors input.allocator()->allocate(); @@ -90,10 +89,10 @@ protected: paddings.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!block_shape.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!paddings.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!block_shape.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!paddings.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors fill(AccessorType(input), 0); @@ -140,7 +139,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SpaceToBatchLayerValidationFixture : public SpaceToBatchLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape paddings_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout) { @@ -152,7 +150,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SpaceToBatchLayerValidationQuantizedFixture : public SpaceToBatchLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - template <typename...> void setup(TensorShape input_shape, TensorShape block_shape_shape, TensorShape paddings_shape, TensorShape output_shape, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info) { diff --git a/tests/validation/fixtures/SpaceToDepthFixture.h b/tests/validation/fixtures/SpaceToDepthFixture.h index 45ea34b0f5..2d2e9fad7d 100644 --- a/tests/validation/fixtures/SpaceToDepthFixture.h +++ b/tests/validation/fixtures/SpaceToDepthFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_FIXTURE #define ARM_COMPUTE_TEST_SPACE_TO_DEPTH_LAYER_FIXTURE +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/Globals.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" @@ -39,7 +40,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class SpaceToDepthLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape output_shape, const int block_shape, DataType data_type, DataLayout data_layout) { _target = compute_target(input_shape, output_shape, block_shape, data_type, data_layout); @@ -69,19 +69,25 @@ protected: TensorType input = create_tensor<TensorType>(input_shape, data_type, 1, QuantizationInfo(), data_layout); TensorType output = create_tensor<TensorType>(output_shape, data_type, 1, QuantizationInfo(), data_layout); + auto calc_out_shape = misc::shape_calculator::compute_space_to_depth_shape(input.info(), block_shape); + ARM_COMPUTE_ASSERT(output_shape[0] == calc_out_shape[0]); + ARM_COMPUTE_ASSERT(output_shape[1] == calc_out_shape[1]); + ARM_COMPUTE_ASSERT(output_shape[2] == calc_out_shape[2]); + ARM_COMPUTE_ASSERT(output_shape[3] == calc_out_shape[3]); + // Create and configure function FunctionType space_to_depth; space_to_depth.configure(&input, &output, block_shape); - ARM_COMPUTE_EXPECT(input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(output.info()->is_resizable()); // Allocate tensors input.allocator()->allocate(); output.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!output.info()->is_resizable()); // Fill tensors fill(AccessorType(input), 0); diff --git a/tests/validation/fixtures/SplitFixture.h b/tests/validation/fixtures/SplitFixture.h index 03ff41e993..203925329c 100644 --- a/tests/validation/fixtures/SplitFixture.h +++ b/tests/validation/fixtures/SplitFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" -#include "tests/RawLutAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" @@ -48,7 +47,6 @@ template <typename TensorType, typename ITensorType, typename AccessorType, type class SplitFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, unsigned int axis, unsigned int splits, DataType data_type) { _target = compute_target(shape, axis, splits, data_type); @@ -77,7 +75,7 @@ protected: FunctionType split; split.configure(&src, dsts_ptr, axis); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); ARM_COMPUTE_EXPECT(std::all_of(dsts.cbegin(), dsts.cend(), [](const TensorType & t) { return t.info()->is_resizable(); @@ -91,7 +89,7 @@ protected: dsts[i].allocator()->allocate(); } - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); ARM_COMPUTE_EXPECT(std::all_of(dsts.cbegin(), dsts.cend(), [](const TensorType & t) { return !t.info()->is_resizable(); @@ -150,7 +148,6 @@ template <typename TensorType, typename ITensorType, typename AccessorType, type class SplitShapesFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, unsigned int axis, std::vector<TensorShape> split_shapes, DataType data_type) { _target = compute_target(shape, axis, split_shapes, data_type); @@ -186,7 +183,7 @@ protected: FunctionType split; split.configure(&src, dsts_ptr, axis); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); ARM_COMPUTE_EXPECT(std::all_of(dsts.cbegin(), dsts.cend(), [](const TensorType & t) { return t.info()->is_resizable(); @@ -200,7 +197,7 @@ protected: dsts[i].allocator()->allocate(); } - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); ARM_COMPUTE_EXPECT(std::all_of(dsts.cbegin(), dsts.cend(), [](const TensorType & t) { return !t.info()->is_resizable(); diff --git a/tests/validation/fixtures/StackLayerFixture.h b/tests/validation/fixtures/StackLayerFixture.h index 7bf63a3ebc..7dd8fe47dc 100644 --- a/tests/validation/fixtures/StackLayerFixture.h +++ b/tests/validation/fixtures/StackLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorShape.h" @@ -52,10 +52,9 @@ template <typename TensorType, typename AbstractTensorType, typename AccessorTyp class StackLayerValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors) { - _target = compute_target(shape_src, axis, data_type, num_tensors); + _target = compute_target(shape_src, axis, data_type, num_tensors, false /* add_x_padding */); _reference = compute_reference(shape_src, axis, data_type, num_tensors); } @@ -66,7 +65,7 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors) + TensorType compute_target(TensorShape shape_src, int axis, DataType data_type, int num_tensors, bool add_x_padding) { std::vector<TensorType> tensors(num_tensors); std::vector<AbstractTensorType *> src(num_tensors); @@ -76,7 +75,7 @@ protected: { tensors[i] = create_tensor<TensorType>(shape_src, data_type); src[i] = &(tensors[i]); - ARM_COMPUTE_EXPECT(tensors[i].info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(tensors[i].info()->is_resizable()); } // Create tensors @@ -91,18 +90,28 @@ protected: // Allocate and fill the input tensors for(int i = 0; i < num_tensors; ++i) { - ARM_COMPUTE_EXPECT(tensors[i].info()->is_resizable(), framework::LogLevel::ERRORS); + if(add_x_padding) + { + add_padding_x({&tensors[i]}, DataLayout::NHWC); + } + + ARM_COMPUTE_ASSERT(tensors[i].info()->is_resizable()); tensors[i].allocator()->allocate(); - ARM_COMPUTE_EXPECT(!tensors[i].info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!tensors[i].info()->is_resizable()); // Fill input tensor fill(AccessorType(tensors[i]), i); } + if(add_x_padding) + { + add_padding_x({&dst}, DataLayout::NHWC); + } + // Allocate output tensor dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Compute stack function stack.run(); @@ -132,7 +141,21 @@ protected: TensorType _target{}; SimpleTensor<T> _reference{}; }; + +template <typename TensorType, typename AbstractTensorType, typename AccessorType, typename FunctionType, typename T> +class StackLayerWithPaddingValidationFixture : + public StackLayerValidationFixture<TensorType, AbstractTensorType, AccessorType, FunctionType, T> +{ +public: + using Parent = StackLayerValidationFixture<TensorType, AbstractTensorType, AccessorType, FunctionType, T>; + + void setup(TensorShape shape_src, int axis, DataType data_type, int num_tensors) + { + Parent::_target = Parent::compute_target(shape_src, axis, data_type, num_tensors, true /* add_x_padding */); + Parent::_reference = Parent::compute_reference(shape_src, axis, data_type, num_tensors); + } +}; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_STACK_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_STACKLAYERFIXTURE_H diff --git a/tests/validation/fixtures/TileFixture.h b/tests/validation/fixtures/TileFixture.h index 0dfcc330f3..979eee5ab1 100644 --- a/tests/validation/fixtures/TileFixture.h +++ b/tests/validation/fixtures/TileFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class TileValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type, const Multiples &multiples) { _target = compute_target(shape, data_type, multiples); @@ -68,15 +67,15 @@ protected: FunctionType tile_func; tile_func.configure(&src, &dst, multiples); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); diff --git a/tests/validation/fixtures/TransposeFixture.h b/tests/validation/fixtures/TransposeFixture.h index 757e6c3c07..212c76cc9a 100644 --- a/tests/validation/fixtures/TransposeFixture.h +++ b/tests/validation/fixtures/TransposeFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE -#define ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_TRANSPOSEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_TRANSPOSEFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -32,7 +32,7 @@ #include "tests/IAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" -#include "tests/validation/reference/Transpose.h" +#include "tests/validation/reference/Permute.h" namespace arm_compute { @@ -44,7 +44,6 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class TransposeValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape shape, DataType data_type) { _target = compute_target(shape, data_type); @@ -71,15 +70,15 @@ protected: FunctionType trans_func; trans_func.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src)); @@ -98,7 +97,7 @@ protected: // Fill reference fill(src); - return reference::transpose<T>(src); + return reference::permute<T>(src, PermutationVector(1U, 0U)); } TensorType _target{}; @@ -107,4 +106,4 @@ protected: } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_TRANSPOSEFIXTURE_H diff --git a/tests/validation/fixtures/UNIT/ContextFixture.h b/tests/validation/fixtures/UNIT/ContextFixture.h new file mode 100644 index 0000000000..77cbc12320 --- /dev/null +++ b/tests/validation/fixtures/UNIT/ContextFixture.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_UNIT_CONTEXT_FIXTURE +#define ARM_COMPUTE_TEST_UNIT_CONTEXT_FIXTURE + +#include "arm_compute/Acl.hpp" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** Test-case for AclDestroyContext + * + * Validate that AclDestroyContext behaves as expected when invalid inputs as context are given + * + * Test Steps: + * - Call AclDestroyContext with null context + * - Confirm that AclInvalidArgument is reported + * - Call AclDestroyContext on empty array + * - Confirm that AclInvalidArgument is reported + * - Call AclDestroyContext on an ACL object other than AclContext + * - Confirm that AclInvalidArgument is reported + * - Confirm that context is still nullptr + */ +template <AclTarget Target> +class DestroyInvalidContextFixture : public framework::Fixture +{ +public: + void setup() + { + AclContext ctx = nullptr; + std::array<char, 256> empty_array{}; + AclContext valid_ctx = nullptr; + ARM_COMPUTE_ASSERT(AclCreateContext(&valid_ctx, Target, nullptr) == AclStatus::AclSuccess); + ARM_COMPUTE_ASSERT(AclDestroyContext(ctx) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyContext(reinterpret_cast<AclContext>(empty_array.data())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(ctx == nullptr); + ARM_COMPUTE_ASSERT(AclDestroyContext(valid_ctx) == AclStatus::AclSuccess); + }; +}; + +/** Test-case for AclCreateContext and AclDestroyContext + * + * Validate that AclCreateContext can create and destroy a context through the C API + * + * Test Steps: + * - Call AclCreateContext with valid target + * - Confirm that context is not nullptr and error code is AclSuccess + * - Destroy context + * - Confirm that AclSuccess is reported + */ +template <AclTarget Target> +class SimpleContextCApiFixture : public framework::Fixture +{ +public: + void setup() + { + AclContext ctx = nullptr; + ARM_COMPUTE_ASSERT(AclCreateContext(&ctx, Target, nullptr) == AclStatus::AclSuccess); + ARM_COMPUTE_ASSERT(ctx != nullptr); + ARM_COMPUTE_ASSERT(AclDestroyContext(ctx) == AclStatus::AclSuccess); + }; +}; + +/** Test-case for Context from the C++ interface + * + * Test Steps: + * - Create a Context obejct + * - Confirm that StatusCode::Success is reported + * - Confirm that equality operator works + * - Confirm that inequality operator works + */ +template <acl::Target Target> +class SimpleContextCppApiFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode status = acl::StatusCode::Success; + acl::Context ctx(Target, &status); + ARM_COMPUTE_ASSERT(status == acl::StatusCode::Success); + + auto ctx_eq = ctx; + ARM_COMPUTE_ASSERT(ctx_eq == ctx); + + acl::Context ctx_ienq(Target, &status); + ARM_COMPUTE_ASSERT(status == acl::StatusCode::Success); + ARM_COMPUTE_ASSERT(ctx_ienq != ctx); + }; +}; + +/** Test-case for multiple contexes + * + * Validate that AclCreateContext can create/destroy multiple contexts with different options + * + * Test Steps: + * - Call AclCreateContext with different targets + * - Confirm that AclSuccess is reported + * - Destroy all contexts + * - Confirm that AclSuccess is reported + */ +template <AclTarget Target> +class MultipleContextsFixture : public framework::Fixture +{ +public: + void setup() + { + const unsigned int num_tests = 5; + std::array<AclContext, num_tests> ctxs{}; + for(unsigned int i = 0; i < num_tests; ++i) + { + ARM_COMPUTE_ASSERT(AclCreateContext(&ctxs[i], Target, nullptr) == AclStatus::AclSuccess); + ARM_COMPUTE_ASSERT(ctxs[i] != nullptr); + ARM_COMPUTE_ASSERT(AclDestroyContext(ctxs[i]) == AclStatus::AclSuccess); + } + }; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_UNIT_CONTEXT_FIXTURE */ diff --git a/tests/validation/fixtures/UNIT/DynamicTensorFixture.h b/tests/validation/fixtures/UNIT/DynamicTensorFixture.h index c3aa63b31b..3e96dcbf2d 100644 --- a/tests/validation/fixtures/UNIT/DynamicTensorFixture.h +++ b/tests/validation/fixtures/UNIT/DynamicTensorFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -74,14 +74,14 @@ public: void validate(bool validate_finalized) const { - ARM_COMPUTE_EXPECT(mm->pool_manager() != nullptr, framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mm->lifetime_manager() != nullptr, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mm->pool_manager() != nullptr); + ARM_COMPUTE_ASSERT(mm->lifetime_manager() != nullptr); if(validate_finalized) { - ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); } - ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == num_pools, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == num_pools); } AllocatorType allocator; @@ -127,7 +127,6 @@ class DynamicTensorType3SingleFunction : public framework::Fixture using T = float; public: - template <typename...> void setup(TensorShape input_level0, TensorShape input_level1) { input_l0 = input_level0; @@ -159,15 +158,15 @@ protected: SimpleFunctionWrapperType layer(serv_internal.mm); layer.configure(&src, &dst); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Populate and validate memory manager serv_cross.populate(num_pools); @@ -251,7 +250,6 @@ class DynamicTensorType3ComplexFunction : public framework::Fixture using T = float; public: - template <typename...> void setup(std::vector<TensorShape> input_shapes, TensorShape weights_shape, TensorShape bias_shape, std::vector<TensorShape> output_shapes, PadStrideInfo info) { num_iterations = input_shapes.size(); @@ -313,8 +311,8 @@ protected: // Create and configure function _f_target->configure(&src, &_weights_target, &_bias_target, &dst, info, weights_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); @@ -322,8 +320,8 @@ protected: _weights_target.allocator()->allocate(); _bias_target.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); @@ -390,7 +388,6 @@ class DynamicTensorType2PipelineFunction : public framework::Fixture using T = float; public: - template <typename...> void setup(std::vector<TensorShape> input_shapes) { _data_type = DataType::F32; diff --git a/tests/validation/fixtures/UNIT/MemoryManagerFixture.h b/tests/validation/fixtures/UNIT/MemoryManagerFixture.h index 14f22a8d21..3bc4844bd5 100644 --- a/tests/validation/fixtures/UNIT/MemoryManagerFixture.h +++ b/tests/validation/fixtures/UNIT/MemoryManagerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -100,8 +100,8 @@ protected: // Finalize memory manager mm->populate(_allocator, 1 /* num_pools */); - ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); + ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == 1); // Fill tensors fill(AccessorType(src), 0); @@ -206,8 +206,8 @@ protected: // Finalize memory manager mm->populate(_allocator, 1 /* num_pools */); - ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); + ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == 1); // Fill tensors (1st iteration) fill(AccessorType(src), 0); @@ -340,8 +340,8 @@ protected: // Finalize memory manager mm->populate(_allocator, 1 /* num_pools */); - ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(mm->lifetime_manager()->are_all_finalized()); + ARM_COMPUTE_ASSERT(mm->pool_manager()->num_pools() == 1); // Fill tensors (1st iteration) fill(AccessorType(src), 0); diff --git a/tests/validation/fixtures/UNIT/QueueFixture.h b/tests/validation/fixtures/UNIT/QueueFixture.h new file mode 100644 index 0000000000..bc93f5f120 --- /dev/null +++ b/tests/validation/fixtures/UNIT/QueueFixture.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_UNIT_QUEUE_FIXTURE +#define ARM_COMPUTE_TEST_UNIT_QUEUE_FIXTURE + +#include "arm_compute/Acl.hpp" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** Test case for AclCreateQueue + * + * Validate that AclCreateQueue behaves as expected with invalid context + * + * Test Steps: + * - Call AclCreateQueue with an invalid context + * - Confirm that AclInvalidArgument is reported + * - Confirm that the queue is still nullptr + */ +class CreateQueueWithInvalidContextFixture : public framework::Fixture +{ +public: + void setup() + { + AclQueue queue = nullptr; + ARM_COMPUTE_ASSERT(AclCreateQueue(&queue, nullptr, nullptr) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(queue == nullptr); + }; +}; + +/** Test-case for AclCreateQueue + * + * Validate that AclCreateQueue behaves as expected with invalid options + * + * Test Steps: + * - Call AclCreateQueue with valid context but invalid options + * - Confirm that AclInvalidArgument is reported + * - Confirm that queue is still nullptr + */ +template <acl::Target Target> +class CreateQueuerWithInvalidOptionsFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + + // Check invalid tuning mode + AclQueueOptions invalid_queue_opts; + invalid_queue_opts.mode = static_cast<AclTuningMode>(-1); + + AclQueue queue = nullptr; + ARM_COMPUTE_ASSERT(AclCreateQueue(&queue, ctx.get(), &invalid_queue_opts) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(queue == nullptr); + }; +}; + +/** Test case for AclDestroyQueue +* +* Validate that AclDestroyQueue behaves as expected when an invalid queue is given +* +* Test Steps: +* - Call AclDestroyQueue with null queue +* - Confirm that AclInvalidArgument is reported +* - Call AclDestroyQueue on empty array +* - Confirm that AclInvalidArgument is reported +* - Call AclDestroyQueue on an ACL object other than AclQueue +* - Confirm that AclInvalidArgument is reported +* - Confirm that queue is still nullptr +*/ +template <acl::Target Target> +class DestroyInvalidQueueFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + + std::array<char, 256> empty_array{}; + AclQueue queue = nullptr; + + ARM_COMPUTE_ASSERT(AclDestroyQueue(queue) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyQueue(reinterpret_cast<AclQueue>(ctx.get())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyQueue(reinterpret_cast<AclQueue>(empty_array.data())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(queue == nullptr); + }; +}; + +/** Test case for AclCreateQueue + * + * Validate that a queue can be created successfully + * + * Test Steps: + * - Create a valid context + * - Create a valid queue + * - Confirm that AclSuccess is returned + */ +template <acl::Target Target> +class SimpleQueueFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::Queue queue(ctx, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + }; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_UNIT_QUEUE_FIXTURE */ diff --git a/tests/validation/fixtures/UNIT/TensorFixture.h b/tests/validation/fixtures/UNIT/TensorFixture.h new file mode 100644 index 0000000000..bfe115b3ed --- /dev/null +++ b/tests/validation/fixtures/UNIT/TensorFixture.h @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_UNIT_TENSOR_FIXTURE +#define ARM_COMPUTE_TEST_UNIT_TENSOR_FIXTURE + +#include "arm_compute/Acl.hpp" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** Test case for AclCreateTensor + * + * Validate that AclCreateTensor behaves as expected with invalid context + * + * Test Steps: + * - Call AclCreateTensor with an invalid context + * - Confirm that AclInvalidArgument is reported + * - Confirm that the tensor is still nullptr + */ +class CreateTensorWithInvalidContextFixture : public framework::Fixture +{ +public: + void setup() + { + AclTensor tensor = nullptr; + ARM_COMPUTE_ASSERT(AclCreateTensor(&tensor, nullptr, nullptr, false) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(tensor == nullptr); + }; +}; + +/** Test-case for AclCreateTensor + * + * Validate that AclCreateTensor behaves as expected on invalid descriptor + * + * Test Steps: + * - Call AclCreateTensor with valid context but invalid descriptor + * - Confirm that AclInvalidArgument is reported + * - Confirm that tensor is still nullptr + */ +template <acl::Target Target> +class CreateTensorWithInvalidDescriptorFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + AclTensor tensor = nullptr; + ARM_COMPUTE_ASSERT(AclCreateTensor(&tensor, ctx.get(), nullptr, false) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(tensor == nullptr); + + // Check invalid data type + AclTensorDescriptor invalid_desc; + invalid_desc.ndims = 4; + invalid_desc.data_type = static_cast<AclDataType>(-1); + ARM_COMPUTE_ASSERT(AclCreateTensor(&tensor, ctx.get(), &invalid_desc, false) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(tensor == nullptr); + + // Check invalid number of dimensions + invalid_desc.data_type = AclDataType::AclFloat32; + invalid_desc.ndims = 15; + ARM_COMPUTE_ASSERT(AclCreateTensor(&tensor, ctx.get(), &invalid_desc, false) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(tensor == nullptr); + }; +}; + +/** Test case for AclDestroyTensor +* +* Validate that AclDestroyTensor behaves as expected when an invalid tensor is given +* +* Test Steps: +* - Call AclDestroyTensor with null tensor +* - Confirm that AclInvalidArgument is reported +* - Call AclDestroyTensor on empty array +* - Confirm that AclInvalidArgument is reported +* - Call AclDestroyTensor on an ACL object other than AclTensor +* - Confirm that AclInvalidArgument is reported +* - Confirm that tensor is still nullptr +*/ +template <acl::Target Target> +class DestroyInvalidTensorFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + + std::array<char, 256> empty_array{}; + AclTensor tensor = nullptr; + + ARM_COMPUTE_ASSERT(AclDestroyTensor(tensor) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyTensor(reinterpret_cast<AclTensor>(ctx.get())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyTensor(reinterpret_cast<AclTensor>(empty_array.data())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(tensor == nullptr); + }; +}; + +/** Test case for AclCreateTensor + * + * Validate that a tensor can be created successfully + * + * Test Steps: + * - Create a valid context + * - Create a valid tensor + * - Confirm that AclSuccess is returned + */ +template <acl::Target Target> +class SimpleTensorFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + acl::Context ctx(Target, &err); + + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 2, 3 }, acl::DataType::Float32), &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + }; +}; + +/** Test case for AclTensor + * + * Validate that multiple tensors can be created successfully + * Stress the possibility of memory leaks + * + * Test Steps: + * - Create a valid context + * - Create a lot of tensors + * - Confirm that AclSuccess is returned + */ +template <acl::Target Target> +class TensorStressFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + const unsigned int num_tensors = 1024; + for(unsigned int i = 0; i < num_tensors; ++i) + { + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 1024, 1024 }, acl::DataType::Float32), &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + } + }; +}; + +/** Test case for AclMapTensor + * + * Validate that map on an invalid object fails + * + * Test Steps: + * - Create a valid context + * - Pass and invalid object for mapping + * - Confirm that AclInvalidArgument is returned + */ +template <acl::Target Target> +class MapInvalidTensorFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + void *handle = nullptr; + ARM_COMPUTE_ASSERT(AclMapTensor(reinterpret_cast<AclTensor>(ctx.get()), &handle) == AclStatus::AclInvalidArgument); + }; +}; + +/** Test case for AclMapTensor + * + * Validate that map of an unallocated pointer is nullptr + * + * Test Steps: + * - Create a valid context + * - Create a valid tensor without allocating + * - Map tensor + * - Check that mapping is nullptr + */ +template <acl::Target Target> +class MapNotAllocatedTensorFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 8, 8 }, acl::DataType::Float32), false /* allocate */, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + ARM_COMPUTE_ASSERT(tensor.map() == nullptr); + }; +}; + +/** Test case for AclMapTensor + * + * Validate that map of a valid tensor return a non-nullptr value + * + * Test Steps: + * - Create a valid context + * - Create a valid tensor while allocating + * - Map tensor + * - Check that mapping is not nullptr + */ +template <acl::Target Target> +class MapAllocatedTensorFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 8, 8 }, acl::DataType::Float32), &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + void *handle = tensor.map(); + ARM_COMPUTE_ASSERT(handle != nullptr); + ARM_COMPUTE_ASSERT(tensor.unmap(handle) == acl::StatusCode::Success); + }; +}; + +/** Test case for AclTensorImport + * + * Validate that an externally memory can be successfully imported + * + * Test Steps: + * - Create a valid context + * - Create a valid tensor without allocating + * - Allocate external memory + * - Import memory to the tensor + * - Check that imported pointer matches + */ +template <acl::Target Target> +class ImportMemoryFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + const int32_t size = 8; + acl::Tensor tensor(ctx, acl::TensorDescriptor({ size }, acl::DataType::Float32), false /* allocate */, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + std::vector<float> data(size); + err = tensor.import(data.data(), acl::ImportType::Host); + + void *handle = tensor.map(); + ARM_COMPUTE_ASSERT(handle == data.data()); + ARM_COMPUTE_ASSERT(tensor.unmap(handle) == acl::StatusCode::Success); + } +}; +/** Test case for get_size() interface of Tensor + * + * + * Test Steps: + * - Create a valid context + * - Create a valid tensor + * - Compare the size value returned with the expected value + */ +template <acl::Target Target> +class TensorSizeFixture : public framework::Fixture +{ +public: + void setup() + { + acl::StatusCode err = acl::StatusCode::Success; + acl::Context ctx(Target, &err); + + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 2, 3 }, acl::DataType::Float32), &err); + + // size should be 6 elements (2x3) times 4 bytes (float32) = 24 bytes + constexpr size_t expected_size = 24; + ARM_COMPUTE_ASSERT(tensor.get_size() == expected_size); + }; +}; +/** Test case for get_size() dealing with invalid arguments + * + * Test Steps: + * - Test nullptr tensor can return a correct error + * - Create a valid tensor + * - Test C interface with null size argument can return a correct error + */ +template <acl::Target Target> +class InvalidTensorSizeFixture : public framework::Fixture +{ +public: + void setup() + { + // Null tensor + AclTensor null_tensor = nullptr; + uint64_t size{ 0 }; + ARM_COMPUTE_ASSERT(AclGetTensorSize(null_tensor, &size) == AclStatus::AclInvalidArgument); + + // Create valid tensor + acl::StatusCode err = acl::StatusCode::Success; + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 2, 3 }, acl::DataType::Float32), &err); + + // Null size argument + ARM_COMPUTE_ASSERT(AclGetTensorSize(tensor.get(), nullptr) == AclStatus::AclInvalidArgument); + }; +}; + +template <acl::Target Target> +class DescriptorConversionFixture : public framework::Fixture +{ + bool compare_descriptor(const AclTensorDescriptor &desc_a, const AclTensorDescriptor &desc_b) + { + auto are_descriptors_same = true; + + are_descriptors_same &= desc_a.ndims == desc_b.ndims; + are_descriptors_same &= desc_a.data_type == desc_b.data_type; + are_descriptors_same &= desc_a.shape != nullptr && desc_b.shape != nullptr; + + for(int32_t d = 0; d < desc_a.ndims; ++d) + { + are_descriptors_same &= desc_a.shape[d] == desc_b.shape[d]; + } + + // other attributes should be added here + + return are_descriptors_same; + } + +public: + void setup() + { + auto err{ acl::StatusCode::Success }; + auto ctx{ acl::Context(Target, &err) }; + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + auto desc{ acl::TensorDescriptor({ 2, 3 }, acl::DataType::Float32) }; + acl::Tensor tensor(ctx, desc, &err); + + auto desc_from_tensor = tensor.get_descriptor(); + + ARM_COMPUTE_ASSERT(compare_descriptor(*desc.get(), *desc_from_tensor.get())); + ARM_COMPUTE_ASSERT(desc == desc_from_tensor); + + // Test c interface with "prepopulated" descriptor + // Note: When c interface used, there are possibility of memory leak + // if members are not correctly deleted (e.g., shape). + // Since that is considered user's responsibility, we don't test here. + AclTensorDescriptor prepopulated_descriptor + { + 3, nullptr, AclDataType::AclBFloat16, nullptr, 0 + }; + + ARM_COMPUTE_ASSERT(AclGetTensorDescriptor(tensor.get(), &prepopulated_descriptor) == AclStatus::AclSuccess); + ARM_COMPUTE_ASSERT(compare_descriptor(*desc.get(), prepopulated_descriptor)); + ARM_COMPUTE_ASSERT(desc == acl::TensorDescriptor(prepopulated_descriptor)); + }; +}; + +template <acl::Target Target> +class InvalidDescriptorConversionFixture : public framework::Fixture +{ +public: + void setup() + { + // Null tensor + AclTensor null_tensor = nullptr; + AclTensorDescriptor desc{}; + ARM_COMPUTE_ASSERT(AclGetTensorDescriptor(null_tensor, &desc) == AclStatus::AclInvalidArgument); + + // Create valid tensor + acl::StatusCode err = acl::StatusCode::Success; + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + acl::Tensor tensor(ctx, acl::TensorDescriptor({ 2, 3 }, acl::DataType::Float32), &err); + + // Null size argument + ARM_COMPUTE_ASSERT(AclGetTensorDescriptor(tensor.get(), nullptr) == AclStatus::AclInvalidArgument); + }; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_UNIT_TENSOR_FIXTURE */ diff --git a/tests/validation/fixtures/UNIT/TensorPackFixture.h b/tests/validation/fixtures/UNIT/TensorPackFixture.h new file mode 100644 index 0000000000..bc14631936 --- /dev/null +++ b/tests/validation/fixtures/UNIT/TensorPackFixture.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_UNIT_TENSORPACK_FIXTURE +#define ARM_COMPUTE_TEST_UNIT_TENSORPACK_FIXTURE + +#include "arm_compute/Acl.hpp" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/** Test case for AclCreateTensorPack + * + * Validate that AclCreateTensorPack behaves as expected with invalid context + * + * Test Steps: + * - Call AclCreateTensorPack with an invalid context + * - Confirm that AclInvalidArgument is reported + * - Confirm that the tensor pack is still nullptr + */ +class CreateTensorPackWithInvalidContextFixture : public framework::Fixture +{ +public: + void setup() + { + AclTensorPack pack = nullptr; + ARM_COMPUTE_ASSERT(AclCreateTensorPack(&pack, nullptr) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(pack == nullptr); + }; +}; + +/** Test case for AclDestroyTensorPack + * + * Validate that AclDestroyTensorPack behaves as expected when an invalid tensor pack is given + * + * Test Steps: + * - Call AclDestroyTensorPack with null tensor pack + * - Confirm that AclInvalidArgument is reported + * - Call AclDestroyTensorPack on empty array + * - Confirm that AclInvalidArgument is reported + * - Call AclDestroyTensorPack on an ACL object other than AclTensorPack + * - Confirm that AclInvalidArgument is reported + * - Confirm that tensor pack is still nullptr + */ +template <acl::Target Target> +class DestroyInvalidTensorPackFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + + std::array<char, 256> empty_array{}; + AclTensorPack pack = nullptr; + + ARM_COMPUTE_ASSERT(AclDestroyTensorPack(pack) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyTensorPack(reinterpret_cast<AclTensorPack>(ctx.get())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(AclDestroyTensorPack(reinterpret_cast<AclTensorPack>(empty_array.data())) == AclStatus::AclInvalidArgument); + ARM_COMPUTE_ASSERT(pack == nullptr); + }; +}; + +/** Test case for AclPackTensor + * + * Validate that AclPackTensor behaves as expected when an invalid is being passed for packing + * + * Test Steps: + * - Create a valid TensorPack + * - Try to pack an empty object + * - Confirm that AclInvalidArgument is reported + * - Try to pack another API object other than tensor + * - Confirm that AclInvalidArgument is reported + */ +template <acl::Target Target> +class AddInvalidObjectToTensorPackFixture : public framework::Fixture +{ +public: + void setup() + { + auto err = acl::StatusCode::Success; + + acl::Context ctx(Target, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::TensorPack pack(ctx, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + auto status = AclPackTensor(pack.get(), + reinterpret_cast<AclTensor>(ctx.get()), + AclTensorSlot::AclSrc); + ARM_COMPUTE_ASSERT(status == AclInvalidArgument); + + status = AclPackTensor(pack.get(), nullptr, AclTensorSlot::AclSrc); + ARM_COMPUTE_ASSERT(status == AclInvalidArgument); + }; +}; + +/** Test case for AclPackTensor + * + * Validate that a tensor can be added successfully to the TensorPack + * + * Test Steps: + * - Create a valid tensor pack + * - Create a valid tensor + * - Add tensor to the tensor pack + * - Confirm that AclSuccess is returned + */ +template <acl::Target Target> +class SimpleTensorPackFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + acl::TensorPack pack(ctx); + acl::Tensor t(ctx, acl::TensorDescriptor({ 3, 3, 5, 7 }, acl::DataType::Float32)); + + ARM_COMPUTE_ASSERT(pack.add(t, AclTensorSlot::AclSrc) == acl::StatusCode::Success); + }; +}; + +/** Test case for AclPackTensor + * + * Validate that multiple tensor can be added successfully to the TensorPack + * + * Test Steps: + * - Create a valid tensor pack + * - Create a list of valid tensors + * - Add tensors to the tensor pack + * - Confirm that AclSuccess is returned + */ +template <acl::Target Target> +class MultipleTensorsInPackFixture : public framework::Fixture +{ +public: + void setup() + { + acl::Context ctx(Target); + acl::TensorPack pack(ctx); + + const acl::TensorDescriptor desc(acl::TensorDescriptor({ 3, 3, 5, 7 }, acl::DataType::Float32)); + const size_t num_tensors = 256; + + std::vector<acl::Tensor> tensors; + for(unsigned int i = 0; i < num_tensors; ++i) + { + auto err = acl::StatusCode::Success; + tensors.emplace_back(acl::Tensor(ctx, desc, &err)); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + ARM_COMPUTE_ASSERT(pack.add(tensors.back(), static_cast<int32_t>(AclTensorSlot::AclSrcVec) + i) == acl::StatusCode::Success); + } + }; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_UNIT_TENSORPACK_FIXTURE */ diff --git a/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h b/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h index af9f776ebc..f5e6071340 100644 --- a/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h +++ b/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h @@ -74,10 +74,10 @@ protected: TensorType compute_target() { // Create tensors - TensorType w1 = create_tensor<TensorType>(TensorShape(180000U, 150U), DataType::F32, 1); - TensorType b1 = create_tensor<TensorType>(TensorShape(150U), DataType::F32, 1); - TensorType src = create_tensor<TensorType>(TensorShape(1U, 150U, 1200U, _max_batches), DataType::F32, 1); - TensorType dst = create_tensor<TensorType>(TensorShape(150U, _max_batches), DataType::F32, 1); + TensorType w1 = create_tensor<TensorType>(TensorShape(6000U, 15U), DataType::F32, 1); + TensorType b1 = create_tensor<TensorType>(TensorShape(15U), DataType::F32, 1); + TensorType src = create_tensor<TensorType>(TensorShape(1U, 15U, 400U, _max_batches), DataType::F32, 1); + TensorType dst = create_tensor<TensorType>(TensorShape(15U, _max_batches), DataType::F32, 1); // Create and configure function FullyConnectedFunction fc_layer_1; @@ -105,9 +105,9 @@ protected: int diff = _max_batches - _cur_batches; auto new_src_padding = PaddingSize(src_padding.top, src_padding.right, src_padding.bottom + diff, src_padding.left); auto new_dst_padding = PaddingSize(dst_padding.top, dst_padding.right, dst_padding.bottom + diff, dst_padding.left); - src.allocator()->info().set_tensor_shape(TensorShape(1U, 150U, 1200U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding); + src.allocator()->info().set_tensor_shape(TensorShape(1U, 15U, 400U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding); src.allocator()->info().set_is_resizable(false); - dst.allocator()->info().set_tensor_shape(TensorShape(150U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding); + dst.allocator()->info().set_tensor_shape(TensorShape(15U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding); dst.allocator()->info().set_is_resizable(false); // Configure FC info @@ -129,16 +129,16 @@ protected: SimpleTensor<T> compute_reference() { // Create reference - SimpleTensor<T> w1{ TensorShape(180000U, 150U), DataType::F32 }; - SimpleTensor<T> b1{ TensorShape(150U), DataType::F32 }; - SimpleTensor<T> src{ TensorShape(1U, 150U, 1200U, _cur_batches), DataType::F32 }; + SimpleTensor<T> w1{ TensorShape(6000U, 15U), DataType::F32 }; + SimpleTensor<T> b1{ TensorShape(15U), DataType::F32 }; + SimpleTensor<T> src{ TensorShape(1U, 15U, 400U, _cur_batches), DataType::F32 }; // Fill reference fill(src, 5); fill(w1, 1); fill(b1, 2); - return reference::fully_connected_layer(src, w1, b1, TensorShape(150U, _cur_batches)); + return reference::fully_connected_layer(src, w1, b1, TensorShape(15U, _cur_batches)); } protected: diff --git a/tests/validation/fixtures/UnstackFixture.h b/tests/validation/fixtures/UnstackFixture.h index 53c79e180b..30b7dd5539 100644 --- a/tests/validation/fixtures/UnstackFixture.h +++ b/tests/validation/fixtures/UnstackFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,6 @@ template <typename TensorType, typename ITensorType, typename AccessorType, type class UnstackValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, int axis, int num, DataType data_type) { _target = compute_target(input_shape, axis, num, data_type); @@ -80,10 +79,10 @@ protected: for(auto &out : output_slices) { out.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!out.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!out.info()->is_resizable()); } input_tensor.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!input_tensor.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!input_tensor.info()->is_resizable()); fill(AccessorType(input_tensor), 0); // Compute function unstack.run(); diff --git a/tests/validation/fixtures/WeightsReshapeFixture.h b/tests/validation/fixtures/WeightsReshapeFixture.h index 5c17b538d5..68bd8b689d 100644 --- a/tests/validation/fixtures/WeightsReshapeFixture.h +++ b/tests/validation/fixtures/WeightsReshapeFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -45,10 +45,9 @@ namespace validation using namespace arm_compute::misc::shape_calculator; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class WeightsReshapeValidationFixture : public framework::Fixture +class WeightsReshapeOpValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, DataType data_type, bool has_bias, unsigned int num_groups) { const TensorShape output_shape = compute_weights_reshaped_shape(TensorInfo(input_shape, 1, data_type), has_bias, num_groups); @@ -73,34 +72,44 @@ protected: // Create and configure function FunctionType weights_reshape_func; - weights_reshape_func.configure(&src, (has_bias ? &bias : nullptr), &dst, num_groups); + weights_reshape_func.configure(src.info(), (has_bias ? bias.info() : nullptr), dst.info(), num_groups); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0); if(has_bias) { - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); bias.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); fill(AccessorType(bias), 1); } + arm_compute::ITensorPack pack = + { + { arm_compute::TensorType::ACL_SRC, &src }, + { arm_compute::TensorType::ACL_DST, &dst } + }; + + if(has_bias) + { + pack.add_const_tensor(arm_compute::TensorType::ACL_BIAS, &bias); + } // Compute function - weights_reshape_func.run(); + weights_reshape_func.run(pack); return dst; } diff --git a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h index 03ec920c4e..20b678b36c 100644 --- a/tests/validation/fixtures/WinogradConvolutionLayerFixture.h +++ b/tests/validation/fixtures/WinogradConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE -#define ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_WINOGRADCONVOLUTIONLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_WINOGRADCONVOLUTIONLAYERFIXTURE_H #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" @@ -51,130 +51,36 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool use_bias = true> -class WinogradConvolutionLayerValidationFixture : public framework::Fixture +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename T1 = T, bool use_bias = true, bool mixed_layout = false> +class WinogradConvolutionLayerFastMathValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, - DataType data_type, ActivationLayerInfo act_info) + DataType data_type, ActivationLayerInfo act_info, const DataLayout &data_layout) + { ARM_COMPUTE_UNUSED(dilation); - - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, act_info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, act_info); + _mixed_layout = mixed_layout; + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, act_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, info, data_type, act_info); } protected: - template <typename U> - void fill(U &&tensor, int i, float min, float max) - { - switch(tensor.data_type()) - { - case DataType::F16: - { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(min), float(max) }; - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - std::uniform_real_distribution<float> distribution(min, max); - library->fill(tensor, distribution, i); - break; - } - default: - { - ARM_COMPUTE_ERROR("Not supported"); - } - } - } - - TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, const PadStrideInfo &info, - DataType data_type, ActivationLayerInfo act_info) - { - // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type, 1); - TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1); - TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1); - TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1); - - // Create and configure function - FunctionType conv; - ARM_COMPUTE_EXPECT(static_cast<bool>(conv.validate(src.info(), weights.info(), (use_bias) ? bias.info() : nullptr, dst.info(), info, act_info)), framework::LogLevel::ERRORS); - conv.configure(&src, &weights, (use_bias) ? &bias : nullptr, &dst, info, act_info); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - dst.allocator()->allocate(); - bias.allocator()->allocate(); - - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Fill tensors - fill(AccessorType(src), 0, -1.f, 1.f); - fill(AccessorType(weights), 1, -1.f, 1.f); - fill(AccessorType(bias), 2, -1.f, 1.f); - - // Compute Winograd Convolution function - conv.run(); - - return dst; - } - - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, ActivationLayerInfo act_info) + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) { - // Create reference - SimpleTensor<T> src{ input_shape, data_type, 1 }; - SimpleTensor<T> weights{ weights_shape, data_type, 1 }; - SimpleTensor<T> bias{ bias_shape, data_type, 1 }; + const DataLayout data_layout = src.info()->data_layout(); + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); - // Fill reference - fill(src, 0, -1.f, 1.f); - fill(weights, 1, -1.f, 1.f); - if(use_bias) - { - fill(bias, 2, -1.f, 1.f); - } - else - { - fill(bias, 2, 0.f, 0.f); - } - - SimpleTensor<T> conv_out = reference::convolution_layer<T>(src, weights, bias, output_shape, info); + // Compute Convolution function + layer.run(); - return (act_info.enabled()) ? reference::activation_layer<T>(conv_out, act_info) : conv_out; + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout); + dst.info()->set_data_layout(data_layout); } - TensorType _target{}; - SimpleTensor<T> _reference{}; -}; - -template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename T1 = T, bool use_bias = true> -class WinogradConvolutionLayerFastMathValidationFixture : public framework::Fixture -{ -public: - template <typename...> - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, - DataType data_type, ActivationLayerInfo act_info, const DataLayout &data_layout) - - { - ARM_COMPUTE_UNUSED(dilation); - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, act_info, data_layout); - _reference = compute_reference(input_shape, weights_shape, bias_shape, info, data_type, act_info); - } - -protected: template <typename U> void fill(U &&tensor, int i, float min, float max) { @@ -221,10 +127,12 @@ protected: framework::LogLevel::ERRORS); conv.configure(&src, &weights, (use_bias) ? &bias : nullptr, &dst, info, act_info, true /* Enable fast math */); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &weights, &bias, &dst }, data_layout); // Allocate tensors src.allocator()->allocate(); @@ -232,19 +140,25 @@ protected: dst.allocator()->allocate(); bias.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!weights.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0, -0.5f, 0.5f); fill(AccessorType(weights), 1, -0.5f, 0.5f); fill(AccessorType(bias), 2, -0.5f, 0.5f); - // Compute Winograd Convolution function - conv.run(); - + if(_mixed_layout) + { + mix_layout(conv, src, dst); + } + else + { + // Compute function + conv.run(); + } return dst; } @@ -315,28 +229,45 @@ protected: SimpleTensor<T1> filter_transform_out = reference::winograd_filter_transform<T1>(weights_t1, filter_transform_shape, winograd_info); SimpleTensor<T1> batched_gemm = reference::gemm<T1>(input_transform_out, filter_transform_out, dummy_c, 1.0f, 0.0f); SimpleTensor<T1> conv_out = reference::winograd_output_transform<T1>(batched_gemm, bias_t1, output_transform_shape, winograd_info); - SimpleTensor<T> conv_out_t(std::move(copy_tensor<T, T1>(conv_out))); + SimpleTensor<T> conv_out_t(copy_tensor<T, T1>(conv_out)); return (act_info.enabled()) ? reference::activation_layer<T>(conv_out_t, act_info) : conv_out_t; } TensorType _target{}; SimpleTensor<T> _reference{}; + bool _mixed_layout{ false }; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class WinogradInputTransformValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, WinogradInfo winograd_info, DataLayout data_layout, DataType data_type) { TensorShape output_shape = compute_winograd_input_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); - - _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); - _reference = compute_reference(input_shape, output_shape, winograd_info, data_type); + _mixed_layout = mixed_layout; + _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); + _reference = compute_reference(input_shape, output_shape, winograd_info, data_type); } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + const DataLayout data_layout_src = src.info()->data_layout(); + const DataLayout data_layout_dst = dst.info()->data_layout(); + + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout_src == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout_dst == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout_src); + dst.info()->set_data_layout(data_layout_dst); + } + template <typename U> void fill(U &&tensor, int i, float min, float max) { @@ -375,22 +306,30 @@ protected: FunctionType transf; transf.configure(&src, &dst, winograd_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &dst }, data_layout); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0, -1.f, 1.f); - // Compute Winograd input transform function - transf.run(); - + if(_mixed_layout) + { + mix_layout(transf, src, dst); + } + else + { + // Compute Winograd input transform function + transf.run(); + } return dst; } @@ -405,25 +344,43 @@ protected: return reference::winograd_input_transform<T>(src, output_shape, winograd_info); } + bool _mixed_layout{ false }; TensorType _target{}; SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class WinogradFilterTransformValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, Size2D output_tile, DataLayout data_layout, DataType data_type) { WinogradInfo winograd_info(output_tile, Size2D(input_shape[0], input_shape[1]), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW /* Not needed */); TensorShape output_shape = compute_winograd_filter_transform_shape(TensorInfo(input_shape, 1, data_type), winograd_info); - _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); - _reference = compute_reference(input_shape, output_shape, winograd_info, data_type); + _mixed_layout = mixed_layout; + _target = compute_target(input_shape, output_shape, winograd_info, data_layout, data_type); + _reference = compute_reference(input_shape, output_shape, winograd_info, data_type); } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + const DataLayout data_layout_src = src.info()->data_layout(); + const DataLayout data_layout_dst = dst.info()->data_layout(); + + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout_src == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout_dst == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout_src); + dst.info()->set_data_layout(data_layout_dst); + } + template <typename U> void fill(U &&tensor, int i, float min, float max) { @@ -463,21 +420,30 @@ protected: FunctionType filter_transform; filter_transform.configure(&src, &dst, winograd_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &dst }, data_layout); // Allocate tensors src.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0, -1.f, 1.f); - filter_transform.run(); - + if(_mixed_layout) + { + mix_layout(filter_transform, src, dst); + } + else + { + // Compute Winograd filter transform function + filter_transform.run(); + } return dst; } @@ -492,15 +458,15 @@ protected: return reference::winograd_filter_transform<T>(src, output_shape, winograd_info); } + bool _mixed_layout{ false }; TensorType _target{}; SimpleTensor<T> _reference{}; }; -template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> class WinogradOutputTransformValidationFixture : public framework::Fixture { public: - template <typename...> void setup(TensorShape input_shape, WinogradInfo winograd_info, DataType data_type, ActivationLayerInfo act_info = ActivationLayerInfo()) { _target = compute_target(input_shape, winograd_info, data_type, act_info); @@ -508,6 +474,23 @@ public: } protected: + void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) + { + const DataLayout data_layout_src = src.info()->data_layout(); + const DataLayout data_layout_dst = dst.info()->data_layout(); + + // Test Multi DataLayout graph cases, when the data layout changes after configure + src.info()->set_data_layout(data_layout_src == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + dst.info()->set_data_layout(data_layout_dst == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + + // Compute Convolution function + layer.run(); + + // Reinstating original data layout for the test suite to properly check the values + src.info()->set_data_layout(data_layout_src); + dst.info()->set_data_layout(data_layout_dst); + } + template <typename U> void fill(U &&tensor, int i, float min, float max) { @@ -545,25 +528,34 @@ protected: FunctionType output_transform; output_transform.configure(&src, &bias, &dst, winograd_info, act_info); - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); + + add_padding_x({ &src, &bias, &dst }, winograd_info.output_data_layout); // Allocate tensors src.allocator()->allocate(); bias.allocator()->allocate(); dst.allocator()->allocate(); - ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_ASSERT(!src.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors fill(AccessorType(src), 0, -1.f, 1.f); fill(AccessorType(bias), 1, -1.f, 1.f); - output_transform.run(); - + if(_mixed_layout) + { + mix_layout(output_transform, src, dst); + } + else + { + // Compute Winograd output transform function + output_transform.run(); + } return dst; } @@ -585,10 +577,11 @@ protected: return (act_info.enabled()) ? reference::activation_layer<T>(winograd_output, act_info) : winograd_output; } + bool _mixed_layout{ false }; TensorType _target{}; SimpleTensor<T> _reference{}; }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_WINOGRADCONVOLUTIONLAYERFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h new file mode 100644 index 0000000000..ca4de11a15 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/DepthwiseConvolutionLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &pad_stride, + const Size2D &dilation, + const unsigned int depth_multiplier, + const DataType data_type, + const DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != + DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout + + DepthwiseConv2dAttributes dwc_conv2d_attr; + const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), + pad_stride.pad_bottom()); + dwc_conv2d_attr.pad(padding_2d) + .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) + .dilation(dilation) + .depth_multiplier(depth_multiplier) + .dimension_rounding_type(pad_stride.round()); + + // Calculate Output and Weight Shapes + TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(input_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + + const ConvolutionInfo info{pad_stride, depth_multiplier, ActivationLayerInfo(), dilation}; + const TensorShape output_shape = + misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); + + weights_shape.set(2, output_shape.z()); + const TensorShape bias_shape = TensorShape(weights_shape[2]); + + _data_type = data_type; + _data_layout = data_layout; + _target = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + const DepthwiseConv2dAttributes dwc_conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + + // Our test shapes are assumed in NCHW data layout, thus the permutation + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, dwc_conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + DepthwiseConv2dAttributes dwc_conv2d_attr) + { + // Create reference + SimpleTensor<T> src{input_shape, _data_type, 1}; + SimpleTensor<T> weight{weights_shape, _data_type, 1}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1}; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), + dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, + dwc_conv2d_attr.pad().top, dwc_conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = + reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, + dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuDepthwiseConv2dValidationFixture + : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &info, + const Size2D &dilation, + const unsigned int depth_multiplier, + DataType data_type, + DataLayout data_layout) + { + DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, kernel_size, info, dilation, depth_multiplier, data_type, data_layout); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h new file mode 100644 index 0000000000..1f4e223b93 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace + +/** General Conv2d fixture + * Adapted from tests/validation/fixtures/ConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760; remove Gpu from name + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + const PadStrideInfo &info, + const Size2D &dilation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info, + QuantizationInfo weight_quantization_info) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, dilation); + _data_type = data_type; + _data_layout = data_layout; + _is_quantized = is_data_type_quantized_asymmetric(data_type); + _quantization_info = quantization_info; + _weight_quantization_info = weight_quantization_info; + _bias_data_type = _is_quantized ? DataType::S32 : data_type; + _target = compute_target(input_shape, weights_shape, bias_shape, conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr); + } + +protected: + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + Conv2dAttributes conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + CLScheduler::get().default_reinit(); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + Conv2dAttributes conv2d_attr) + { + // Create reference + SimpleTensor<T> src{input_shape, _data_type, 1, _quantization_info}; + SimpleTensor<T> weight{weights_shape, _data_type, 1, _weight_quantization_info}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1, _quantization_info}; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, + conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, + legacy_pad_stride, conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataType _bias_data_type{}; + DataLayout _data_layout{}; + QuantizationInfo _quantization_info{}; + QuantizationInfo _weight_quantization_info{}; + bool _is_quantized = false; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuConv2dValidationFixture + : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape output_shape, + TensorShape bias_shape, + const PadStrideInfo &info, + const Size2D &dialation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info) + { + DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, weights_shape, output_shape, bias_shape, info, dialation, data_type, data_layout, + quantization_info, quantization_info); + } +}; + +/** Specific Conv2d method: Direct Conv2d fixture + * Adapted from tests/validation/fixtures/DirectConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760 + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = + typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type; + + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + + TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, {1U, 1U} /* dilation */); + + TensorInfo input_info = TensorInfo(input_shape, 1, data_type); + TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type); + + const TensorShape output_shape = + misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info); + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type, + bias_data_type, quantization_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, + bias_data_type, quantization_info); + } + +protected: + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const Conv2dAttributes &conv2d_attr, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info, + const DataLayout &data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); + ARM_COMPUTE_UNUSED(quantization_info); + // Dataset shapes are in NCHW layout + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout)); + auto weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, data_type, data_layout)); + auto bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout)); + auto dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + ARM_COMPUTE_ASSERT(t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + const PadStrideInfo &info, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info) + { + // Create reference + SimpleTensor<T> src{input_shape, data_type, 1, quantization_info}; + SimpleTensor<T> weights{weights_shape, data_type, 1, quantization_info}; + SimpleTensor<TBias> bias{bias_shape, bias_data_type, 1, quantization_info}; + + // Fill reference + fill(src, 0); + fill(weights, 1); + fill(bias, 2); + + SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info); + return dst; + } + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationFixture + : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + DataLayout data_layout) + { + DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(), + data_layout); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h new file mode 100644 index 0000000000..69bd0efbdc --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/ElementwiseOperations.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops = false) + { + _ref_op = ref_op; + _is_inplace = is_inplace; + _data_type = data_type; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + if (is_data_type_float(tensor.data_type())) + { + switch (_ref_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)}); + break; + case ArithmeticOperation::POWER: + library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else if (tensor.data_type() == DataType::S32) + { + switch (_ref_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<int32_t, int32_t>(-1U, 1U)}); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Fuse first element wise binary Op + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *rhs_info_fuse = nullptr; + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info); + + if (_fuse) + { + rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + if (_fuse) + { + t_rhs_fuse.allocator()->init(*rhs_info_fuse); + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + if (_fuse) + { + t_rhs_fuse.allocator()->allocate(); + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if (_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + + // Run runtime + if (_fuse) + { + runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst}); + } + else + { + runtime.run({&t_lhs, &t_rhs, &t_dst}); + } + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); + + // Create reference + SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst{out_shape, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst_fuse{out_shape_fuse, _data_type, 1, QuantizationInfo()}; + + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + + reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); + if (_fuse) + { + fill(ref_rhs_fuse, 2); + reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); + } + SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst; + return *ret; + } + + ArithmeticOperation _ref_op{ArithmeticOperation::ADD}; + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; + bool _is_inplace{false}; + bool _fuse{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + DataType data_type, + bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h new file mode 100644 index 0000000000..4c1cc94d3d --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/ReshapeLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + //For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. + if (transpose_a) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + if (transpose_b) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + // Skip configurations unsupported by the device. + _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + if (!_device_supports_export_to_cl_image && export_rhs_to_cl_image) + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs. + } + + _target = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image, + data_type); + _reference = compute_reference(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, data_type); + } + +protected: + TensorType compute_target(TensorShape &shape_a, + TensorShape &shape_b, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); + CLScheduler::get().default_reinit(); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + MatMulAttributes matmul_attr{}; + matmul_attr.adj_lhs(transpose_a); + matmul_attr.adj_rhs(transpose_b); + + GpuMatMulSettings matmul_settings{}; + matmul_settings.m0(M0); + matmul_settings.n0(N0); + matmul_settings.k0(K0); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + + ARM_COMPUTE_ASSERT(t_lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + + // Run runtime + runtime.run({&t_lhs, &t_rhs, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape_a, + const TensorShape &shape_b, + const TensorShape &output_shape, + bool pretranspose_a, + bool pretranspose_b, + DataType data_type) + { + // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 3D + // This is necessary unless we choose to extend gemm reference for 5D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ); + TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ); + + // Create reference + SimpleTensor<T> a{shape_a_collapsed, data_type, 1}; + SimpleTensor<T> b{shape_b_collapsed, data_type, 1}; + SimpleTensor<T> c{output_shape_collapsed, data_type, 1}; + + // Fill reference + fill(a, 0); + fill(b, 1); + + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape()); + a_transposed_shape.set(0, a.shape().y()); + a_transposed_shape.set(1, a.shape().x()); + + TensorShape b_transposed_shape(b.shape()); + b_transposed_shape.set(0, b.shape().y()); + b_transposed_shape.set(1, b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{a_transposed_shape, data_type}; + SimpleTensor<T> b_transposed{b_transposed_shape, data_type}; + + //pretranspose a if necessary + if (pretranspose_a) + { + a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U)); + } + + // pretranspose b if necessary + if (pretranspose_b) + { + b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U)); + } + + // Use transposed tensors if boolean enabled else use original tensors + SimpleTensor<T> result = + reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); + + // We reshape the gemm output back if the tensor is high dimensional + if (output_shape_collapsed != output_shape) + { + // std::cout << "called reshape: \n"; + result = reference::reshape_layer(result, output_shape); + } + + return result; + } + + CLTensor _target{}; + SimpleTensor<T> _reference{}; + bool _device_supports_export_to_cl_image{false}; + bool _device_supports_mmul{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuMatMulValidationFixture + : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); + DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, N0, K0, + false /* export_rhs_to_cl_image bias */, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h new file mode 100644 index 0000000000..b0c7143d91 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + +#include "src/dynamic_fusion/utils/Utils.h" +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/PoolingLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type) + { + _target = compute_target(input_shape, pool_attr, data_type); + _reference = compute_reference( + input_shape, convert_pool_attr_to_pool_info(pool_attr, true /* mixed_precision */), data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type) + { + CLScheduler::get().default_reinit(); + + // Change shape due to NHWC data layout, test shapes are NCHW + permute(input_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC)); + auto dst_info = context.create_tensor_info(); + + // Create Pool2dSettings + GpuPool2dSettings pool_settings = GpuPool2dSettings(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + + // Run runtime + runtime.run({&t_input, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type) + { + // Create reference + SimpleTensor<T> src(shape, data_type, 1, QuantizationInfo()); + // Fill reference + fill(src, 0); + return reference::pooling_layer<T>(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + PoolingType pool_type, + Size2D pool_size, + Padding2D pad, + Size2D stride, + bool exclude_padding, + DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding( + exclude_padding), + data_type); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dSpecialValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, pool_attr, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h new file mode 100644 index 0000000000..c9ffbccbc7 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ActivationLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename... TArgs> +class DynamicFusionActivationValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, bool fuse, DataType data_type, ActivationLayerInfo act_info, TArgs... args) + { + _fuse = fuse; + _data_type = data_type; + _function = act_info.activation(); + _target = compute_target(shape, args...); + _reference = compute_reference(shape, act_info); + } + +protected: + std::vector<T> get_boundary_values(T min, T max) + { + // This function will return a vector filled with the following values that can + // represent two partitions derived from equivalent partitioning. + // * Lower partition: min, min + delta, lower quarter (nominal), center - delta + // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max + const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1); + const auto center_value = (min + max) / 2; + const auto lower_quarter = (min + center_value) / 2; + const auto upper_quarter = (center_value + max) / 2; + + std::vector<T> boundary_values{}; + + // To ensure all the inserted values are within the given range after subtracing/adding delta + auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values) + { + for (auto &v : new_values) + { + if (v >= min && v <= max) + { + boundary_values.emplace_back(v); + } + } + }; + + insert_values({min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), + static_cast<T>(center_value - delta)}); // lower partition + insert_values({static_cast<T>(center_value), static_cast<T>(center_value + delta), + static_cast<T>(upper_quarter), static_cast<T>(max - delta), max}); // upper partition + + return boundary_values; + } + + template <typename U> + void fill(U &&tensor) + { + float min_bound = 0; + float max_bound = 0; + std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(_function, _data_type); + library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound))); + } + + TensorType compute_target(const TensorShape &shape, TArgs... args) + { + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + + ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, args...); + if (_fuse) + { + ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, args...); + GpuOutput::create_op(sketch, ans_1_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_0_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src)); + + // Run runtime + runtime.run({&t_src, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info) + { + // Create reference + SimpleTensor<T> src{shape, _data_type, 1}; + + // Fill reference + fill(src); + + auto tmp = reference::activation_layer<T>(src, act_info); + + if (_fuse) + { + auto dst = reference::activation_layer<T>(tmp, act_info); + return dst; + } + else + { + return tmp; + } + } + +protected: + ActivationLayerInfo::ActivationFunction _function{}; + bool _fuse{false}; + DataType _data_type{}; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionSigmoidValidationFixture + : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, bool fuse, DataType data_type) + { + ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC}; + DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse, + data_type, act_info); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionTanhValidationFixture + : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, bool fuse, DataType data_type) + { + ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; + DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse, + data_type, act_info); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h new file mode 100644 index 0000000000..08fffb305b --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/DepthConvertLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> +class DynamicFusionCastValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) + { + _target = compute_target(shape, dt_in, dt_out, policy); + _reference = compute_reference(shape, dt_in, dt_out, policy); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, DataType dt_in, DataType dt_out) + { + // Restricting range to avoid inf values + if (dt_out == DataType::F16) + { + constexpr int signed_min = -32000; + constexpr int signed_max = 32000; + constexpr int unsigned_min = 0; + constexpr int unsigned_max = 65000; + + switch (dt_in) + { + case DataType::U8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::S8: + case DataType::F32: + { + library->fill_tensor_uniform(tensor, i); + break; + } + case DataType::U16: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min), + static_cast<uint16_t>(unsigned_max)); + break; + } + case DataType::S16: + { + library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min), + static_cast<int16_t>(signed_max)); + break; + } + case DataType::U32: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min), + static_cast<uint32_t>(unsigned_max)); + break; + } + case DataType::S32: + { + library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), + static_cast<int32_t>(signed_max)); + break; + } + default: + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType + compute_target(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + // Here, we use DataLayout::NCHW just for the test. However, the optimal data layout to + // be used with dynamic fusion is NHWC + ITensorInfo *src_info = + context.create_tensor_info(TensorInfo(shape, 1, dt_in, DataLayout::NCHW)); // layout is not important + ITensorInfo *dst_info = context.create_tensor_info(); + + CastAttributes attributes; + attributes.convert_policy(policy).data_type(dt_out); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src), 0, dt_in, dt_out); + + // Run runtime + runtime.run({&t_src, &t_dst}); + return t_dst; + } + + SimpleTensor<T2> + compute_reference(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy) + { + // Create reference + SimpleTensor<T1> src{shape, dt_in, 1}; + + // Fill reference + fill(src, 0, dt_in, dt_out); + + return reference::depth_convert<T1, T2>(src, dt_out, policy, 0); + } + + TensorType _target{}; + SimpleTensor<T2> _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h new file mode 100644 index 0000000000..e8f6f83e42 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ActivationLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionClampValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, ClampAttributes attributes, bool fuse, DataType data_type) + { + // CLAMP is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped. + ActivationLayerInfo act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() }; + + _fuse = fuse; + _attributes = attributes; + _data_type = data_type; + _target = compute_target(shape, attributes); + _reference = compute_reference(shape, act_info); + } + +protected: + std::vector<T> get_boundary_values(T min, T max) + { + // This function will return a vector filled with the following values that can + // represent two partitions derived from equivalent partitioning. + // * Lower partition: min, min + delta, lower quarter (nominal), center - delta + // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max + const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1); + const auto center_value = (min + max) / 2; + const auto lower_quarter = (min + center_value) / 2; + const auto upper_quarter = (center_value + max) / 2; + + std::vector<T> boundary_values{}; + + // To ensure all the inserted values are within the given range after subtracing/adding delta + auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values) + { + for(auto &v : new_values) + { + if(v >= min && v <= max) + { + boundary_values.emplace_back(v); + } + } + }; + + insert_values({ min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), static_cast<T>(center_value - delta) }); // lower partition + insert_values({ static_cast<T>(center_value), static_cast<T>(center_value + delta), static_cast<T>(upper_quarter), static_cast<T>(max - delta), max }); // upper partition + + return boundary_values; + } + + template <typename U> + void fill(U &&tensor) + { + float min_bound = 0; + float max_bound = 0; + std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, _data_type); + library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound))); + } + + TensorType compute_target(const TensorShape &shape, ClampAttributes attributes) + { + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Create sketch tensors + ITensorInfo* src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + ITensorInfo* dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + + ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, attributes); + if(_fuse) + { + ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, attributes); + GpuOutput::create_op(sketch, ans_1_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_0_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src)); + + // Run runtime + runtime.run({ &t_src, &t_dst }); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info) + { + // Create reference + SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info }; + + // Fill reference + fill(src); + + auto dst = reference::activation_layer<T>(src, act_info, _quantization_info); + return dst; + } + +protected: + QuantizationInfo _quantization_info{}; + ClampAttributes _attributes{}; + bool _fuse{ false }; + DataType _data_type{}; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h new file mode 100644 index 0000000000..f02aa5e36a --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/Globals.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* We use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing + * the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros + * to print the test data. + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulValidationFixture : public framework::Fixture +{ +public: + void setup(const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops = false) + { + _data_type = data_type; + _is_inplace = is_inplace; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Fuse first multiplication op + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *rhs_info_fuse = nullptr; + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info); + + if (_fuse) + { + rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + if (_fuse) + { + t_rhs_fuse.allocator()->init(*rhs_info_fuse); + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + if (_fuse) + { + t_rhs_fuse.allocator()->allocate(); + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if (_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + + // Run runtime + if (_fuse) + { + runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst}); + } + else + { + runtime.run({&t_lhs, &t_rhs, &t_dst}); + } + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create reference + SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()}; + + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + SimpleTensor<T> ref_dst = reference::pixel_wise_multiplication<T, T, T>( + ref_lhs, ref_rhs, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type, + QuantizationInfo()); + if (_fuse) + { + fill(ref_rhs_fuse, 2); + SimpleTensor<T> ref_dst_fuse = reference::pixel_wise_multiplication<T, T, T>( + ref_dst, ref_rhs_fuse, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type, + QuantizationInfo()); + return ref_dst_fuse; + } + return ref_dst; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + bool _is_inplace{false}; + bool _fuse{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulOneOpValidationFixture + : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, DataType data_type, bool is_inplace) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape0, shape0, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulBroadcastValidationFixture + : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulTwoOpsValidationFixture + : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h new file mode 100644 index 0000000000..bde3360940 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/ReshapeAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h" + +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/Globals.h" +#include "tests/validation/reference/ReshapeLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuReshapeLayerValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type) + { + _target = compute_target(input_shape, output_shape, data_type); + _reference = compute_reference(input_shape, output_shape, data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(TensorShape &input_shape, TensorShape &output_shape, DataType data_type) + { + // Check if indeed the input shape can be reshape to the output one + ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size()); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *src_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type)); + ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(output_shape, 1, data_type)); + ReshapeAttributes attributes; + attributes.shape(output_shape); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src), 0); + + // Run runtime + runtime.run({&t_src, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> + compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type) + { + // Create reference + SimpleTensor<T> src{input_shape, data_type}; + + // Fill reference + fill(src, 0); + + return reference::reshape_layer<T>(src, output_shape); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; +/** [ReshapeLayer fixture] **/ +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h new file mode 100644 index 0000000000..711767b66f --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h @@ -0,0 +1,272 @@ +/* +* Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/SimpleTensor.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/Scale.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionResizeGenericValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout, + InterpolationPolicy interpolation_policy, + SamplingPolicy sampling_policy, + bool align_corners, + QuantizationInfo output_quantization_info) + { + _shape = shape; + _interpolation_policy = interpolation_policy; + _sampling_policy = sampling_policy; + _data_type = data_type; + _input_quantization_info = quantization_info; + _output_quantization_info = output_quantization_info; + _align_corners = align_corners; + _data_layout = data_layout; + + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion resize supports only NHWC layout + + generate_scale(shape); + + std::mt19937 generator(library->seed()); + std::uniform_int_distribution<uint32_t> distribution_u8(0, 255); + + _target = compute_target(shape); + _reference = compute_reference(shape); + } + +protected: + void generate_scale(const TensorShape &shape) + { + static constexpr float _min_scale{0.25f}; + static constexpr float _max_scale{3.f}; + + constexpr float max_width{8192.0f}; + constexpr float max_height{6384.0f}; + constexpr float min_width{1.f}; + constexpr float min_height{1.f}; + + std::mt19937 generator(library->seed()); + std::uniform_real_distribution<float> distribution_float(_min_scale, _max_scale); + + auto generate = [&](size_t input_size, float min_output, float max_output) -> int + { + const float generated_scale = distribution_float(generator); + const int output_size = static_cast<int>( + utility::clamp(static_cast<float>(input_size) * generated_scale, min_output, max_output)); + return output_size; + }; + + // Input shape is always given in NCHW layout. NHWC is dealt by permute in compute_target() + const int idx_width = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT); + + _output_width = generate(shape[idx_width], min_width, max_width); + _output_height = generate(shape[idx_height], min_height, max_height); + } + + template <typename U> + void fill(U &&tensor) + { + if (tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution<float> distribution(-5.0f, 5.0f); + library->fill(tensor, distribution, 0); + } + else if (tensor.data_type() == DataType::F16) + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-5.0f, 5.0f}; + library->fill(tensor, distribution, 0); + } + else if (is_data_type_quantized(tensor.data_type())) + { + std::uniform_int_distribution<> distribution(0, 100); + library->fill(tensor, distribution, 0); + } + else + { + library->fill_tensor_uniform(tensor, 0); + } + } + + TensorType compute_target(TensorShape shape) + { + // Our test shapes are assumed in NCHW data layout, thus the permutation + permute(shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type, _data_layout)); + src_info->set_quantization_info(_input_quantization_info); + ITensorInfo *dst_info = context.create_tensor_info(); + + ResizeAttributes attributes; + attributes.align_corners(_align_corners) + .sampling_policy(_sampling_policy) + .interpolation_policy(_interpolation_policy) + .output_width(_output_width) + .output_height(_output_height); + + ITensorInfo *scale_result_info = FunctionType::create_op(sketch, src_info, attributes); + GpuOutput::create_op(sketch, scale_result_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src)); + + // Run runtime + runtime.run({&t_src, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape) + { + // Create reference + SimpleTensor<T> src{shape, _data_type, 1, _input_quantization_info}; + + // Reference code is NCHW, so the input shapes are NCHW + const int idx_width = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT); + + const float scale_x = static_cast<float>(_output_width) / shape[idx_width]; + const float scale_y = static_cast<float>(_output_height) / shape[idx_height]; + + // Fill reference + fill(src); + + return reference::scale<T>(src, scale_x, scale_y, _interpolation_policy, BorderMode::REPLICATE, + static_cast<T>(0), _sampling_policy, /* ceil_policy_scale */ false, _align_corners, + _output_quantization_info); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + TensorShape _shape{}; + InterpolationPolicy _interpolation_policy{}; + SamplingPolicy _sampling_policy{}; + DataType _data_type{}; + DataLayout _data_layout{}; + QuantizationInfo _input_quantization_info{}; + QuantizationInfo _output_quantization_info{}; + bool _align_corners{false}; + int _output_width{0}; + int _output_height{0}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionResizeValidationFixture + : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, + DataType data_type, + DataLayout data_layout, + InterpolationPolicy policy, + SamplingPolicy sampling_policy, + bool align_corners) + { + DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape, data_type, QuantizationInfo(), data_layout, policy, sampling_policy, align_corners, + QuantizationInfo()); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> +class DynamicFusionResizeQuantizedValidationFixture + : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout, + InterpolationPolicy policy, + SamplingPolicy sampling_policy, + bool align_corners) + { + DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape, data_type, quantization_info, data_layout, policy, sampling_policy, align_corners, + quantization_info); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h new file mode 100644 index 0000000000..175d4ff889 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h @@ -0,0 +1,158 @@ +/* +* Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H + +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/SimpleTensor.h" +#include "tests/validation/reference/SoftmaxLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionSoftmaxValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log) + { + _reference = compute_reference(shape, data_type, beta, axis, is_log); + _target = compute_target(shape, data_type, beta, axis, is_log); + } + +protected: + template <typename U> + void fill(U &&tensor) + { + if (tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution<float> distribution(-10.0f, 10.0f); + library->fill(tensor, distribution, 0); + } + else if (tensor.data_type() == DataType::F16) + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-10.0f, 10.0f}; + library->fill(tensor, distribution, 0); + } + else if (!is_data_type_quantized(tensor.data_type())) + { + std::uniform_int_distribution<> distribution(0, 100); + library->fill(tensor, distribution, 0); + } + else + { + library->fill_tensor_uniform(tensor, 0); + } + } + + TensorType compute_target(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log) + { + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + SoftmaxAttributes softmax_attr{}; + softmax_attr.axis(axis).beta(beta).is_log_softmax(is_log); + ITensorInfo *src_info = context.create_tensor_info(shape, 1, data_type); + ITensorInfo *dst_info = context.create_tensor_info(shape, 1, data_type); + FunctionType::create_op(sketch, src_info, dst_info, softmax_attr); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + // Instead of using ACL allocated memory, the user can choose to import memory into the tensors + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType src{}; + TensorType dst{}; + + // Initialize user tensors + src.allocator()->init(*src_info); + dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + fill(AccessorType(src)); + + // Run runtime + runtime.run({&src, &dst}); + + return dst; + } + + SimpleTensor<T> + compute_reference(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log) + { + // Create reference + SimpleTensor<T> src{shape, data_type, 1}; + + // Fill reference + fill(src); + + return reference::softmax_layer<T>(src, beta, axis, is_log); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionSoftmaxValidationFixture + : public DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log) + { + DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape, data_type, beta, axis, is_log); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H diff --git a/tests/validation/gpu/unit/Context.cpp b/tests/validation/gpu/unit/Context.cpp index 06b4a83925..598e219fd9 100644 --- a/tests/validation/gpu/unit/Context.cpp +++ b/tests/validation/gpu/unit/Context.cpp @@ -21,11 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "tests/framework/Asserts.h" -#include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" - -#include "arm_compute/Acl.hpp" +#include "tests/validation/fixtures/UNIT/ContextFixture.h" #include "src/gpu/cl/ClContext.h" @@ -41,67 +37,9 @@ TEST_SUITE(CL) TEST_SUITE(UNIT) TEST_SUITE(Context) -/** Test-case for AclCreateContext and AclDestroy Context - * - * Validate that AclCreateContext can create and destroy a context - * - * Test Steps: - * - Call AclCreateContext with valid target - * - Confirm that context is not nullptr and error code is AclSuccess - * - Destroy context - * - Confirm that AclSuccess is reported - */ -TEST_CASE(SimpleContextCApi, framework::DatasetMode::ALL) -{ - AclContext ctx = nullptr; - ARM_COMPUTE_ASSERT(AclCreateContext(&ctx, AclGpuOcl, nullptr) == AclStatus::AclSuccess); - ARM_COMPUTE_ASSERT(ctx != nullptr); - ARM_COMPUTE_ASSERT(AclDestroyContext(ctx) == AclStatus::AclSuccess); -} - -/** Test-case for Context from the C++ interface - * - * Test Steps: - * - Create a Context obejct - * - Confirm that StatusCode::Success is reported - * - Confirm that equality operator works - * - Confirm that inequality operator works - */ -TEST_CASE(SimpleContextCppApi, framework::DatasetMode::ALL) -{ - acl::StatusCode status = acl::StatusCode::Success; - acl::Context ctx(acl::Target::GpuOcl, &status); - ARM_COMPUTE_ASSERT(status == acl::StatusCode::Success); - - auto ctx_eq = ctx; - ARM_COMPUTE_ASSERT(ctx_eq == ctx); - - acl::Context ctx_ienq(acl::Target::GpuOcl, &status); - ARM_COMPUTE_ASSERT(status == acl::StatusCode::Success); - ARM_COMPUTE_ASSERT(ctx_ienq != ctx); -} - -/** Test-case for CpuCapabilities - * - * Validate that AclCreateContext can create/destroy multiple contexts with different options - * - * Test Steps: - * - Call AclCreateContext with different targets - * - Confirm that AclSuccess is reported - * - Destroy all contexts - * - Confirm that AclSuccess is reported - */ -TEST_CASE(MultipleContexts, framework::DatasetMode::ALL) -{ - const unsigned int num_tests = 5; - std::array<AclContext, num_tests> ctxs{}; - for(unsigned int i = 0; i < num_tests; ++i) - { - ARM_COMPUTE_ASSERT(AclCreateContext(&ctxs[i], AclTarget::AclGpuOcl, nullptr) == AclStatus::AclSuccess); - ARM_COMPUTE_ASSERT(ctxs[i] != nullptr); - ARM_COMPUTE_ASSERT(AclDestroyContext(ctxs[i]) == AclStatus::AclSuccess); - } -} +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCApi, SimpleContextCApiFixture<AclTarget::AclGpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCppApi, SimpleContextCppApiFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MultipleContexts, MultipleContextsFixture<AclTarget::AclGpuOcl>, framework::DatasetMode::ALL) /** Test-case for MLGO kernel configuration file * @@ -148,9 +86,9 @@ TEST_CASE(CheckMLGO, framework::DatasetMode::ALL) ofs << mlgo_str; ofs.close(); - AclContextOptions opts = acl_default_ctx_options; - opts.kernel_config_file = mlgo_filename.c_str(); - arm_compute::gpu::opencl::ClContext ctx(&opts); + acl::Context::Options opts; + opts.copts.kernel_config_file = mlgo_filename.c_str(); + arm_compute::gpu::opencl::ClContext ctx(&opts.copts); const MLGOHeuristics &heuristics = ctx.mlgo(); diff --git a/tests/validation/gpu/unit/Queue.cpp b/tests/validation/gpu/unit/Queue.cpp new file mode 100644 index 0000000000..8154a7954f --- /dev/null +++ b/tests/validation/gpu/unit/Queue.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "tests/validation/fixtures/UNIT/QueueFixture.h" + +#include "arm_compute/AclOpenClExt.h" +#include "src/gpu/cl/ClQueue.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(Queue) + +EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueueWithInvalidContext, CreateQueueWithInvalidContextFixture, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueuerWithInvalidOptions, CreateQueuerWithInvalidOptionsFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidQueue, DestroyInvalidQueueFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleQueue, SimpleQueueFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) + +TEST_CASE(KhrQueuePriorities, framework::DatasetMode::ALL) +{ + acl::StatusCode err = acl::StatusCode::Success; + + acl::Context ctx(acl::Target::GpuOcl, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + acl::Queue queue(ctx, &err); + ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success); + + cl_device_id cl_dev; + auto status = AclGetClDevice(ctx.get(), &cl_dev); + ARM_COMPUTE_ASSERT(status == AclSuccess); + + std::string extensions = cl::Device(cl_dev).getInfo<CL_DEVICE_EXTENSIONS>(); + if(extensions.find("cl_khr_priority_hints") != std::string::npos) + { + cl_int error = CL_SUCCESS; + + cl_context cl_ctx; + auto status = AclGetClContext(ctx.get(), &cl_ctx); + ARM_COMPUTE_ASSERT(status == AclSuccess); + + /* Check a queue with high priority */ + cl_queue_properties queue_properties[] = { CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0 }; + cl_command_queue priority_queue = clCreateCommandQueueWithProperties(cl_ctx, cl_dev, queue_properties, &error); + ARM_COMPUTE_ASSERT(error == CL_SUCCESS); + + clReleaseCommandQueue(priority_queue); + } +} + +TEST_SUITE_END() // Queue +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/gpu/unit/Tensor.cpp b/tests/validation/gpu/unit/Tensor.cpp new file mode 100644 index 0000000000..18102733ce --- /dev/null +++ b/tests/validation/gpu/unit/Tensor.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "tests/validation/fixtures/UNIT/TensorFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(Tensor) + +EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidContext, CreateTensorWithInvalidContextFixture, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidDescriptor, CreateTensorWithInvalidDescriptorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensor, DestroyInvalidTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensor, SimpleTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(TensorStress, TensorStressFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MapInvalidTensor, MapInvalidTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MapAllocatedTensor, MapAllocatedTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetSize, TensorSizeFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidSize, InvalidTensorSizeFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetDescriptor, DescriptorConversionFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidDescriptor, InvalidDescriptorConversionFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) + +TEST_SUITE_END() // Tensor +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/gpu/unit/TensorPack.cpp b/tests/validation/gpu/unit/TensorPack.cpp new file mode 100644 index 0000000000..b62426d056 --- /dev/null +++ b/tests/validation/gpu/unit/TensorPack.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "tests/validation/fixtures/UNIT/TensorPackFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(UNIT) +TEST_SUITE(TensorPack) + +EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorPackWithInvalidContext, CreateTensorPackWithInvalidContextFixture, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensorPack, DestroyInvalidTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(AddInvalidObjectToTensorPack, AddInvalidObjectToTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensorPack, SimpleTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) +EMPTY_BODY_FIXTURE_TEST_CASE(MultipleTensorsInPack, MultipleTensorsInPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL) + +TEST_SUITE_END() // Tensor +TEST_SUITE_END() // UNIT +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp index 664b969125..2172362bdd 100644 --- a/tests/validation/reference/ActivationLayer.cpp +++ b/tests/validation/reference/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "ActivationLayer.h" #include "arm_compute/core/Types.h" + #include "tests/validation/Helpers.h" namespace arm_compute @@ -40,7 +41,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo ARM_COMPUTE_UNUSED(oq_info); // Create reference - SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 }; + SimpleTensor<T> dst{src.shape(), src.data_type(), 1}; // Compute reference const T a(info.a()); @@ -48,7 +49,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ - for(int i = 0; i < src.num_elements(); ++i) + for (int i = 0; i < src.num_elements(); ++i) { dst[i] = activate_float<T>(src[i], a, b, info.activation()); } @@ -57,7 +58,8 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo } template <> -SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +SimpleTensor<uint8_t> +activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) { const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; @@ -68,7 +70,8 @@ SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src } template <> -SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +SimpleTensor<int8_t> +activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) { const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; @@ -79,7 +82,8 @@ SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, A } template <> -SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +SimpleTensor<int16_t> +activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) { const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; @@ -88,9 +92,14 @@ SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src SimpleTensor<int16_t> dst = convert_to_symmetric<int16_t>(dst_tmp, dst_qinfo); return dst; } -template SimpleTensor<int32_t> activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); -template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); -template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<int32_t> +activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<float> +activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<half> +activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<bfloat16> +activation_layer(const SimpleTensor<bfloat16> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ActivationLayer.h b/tests/validation/reference/ActivationLayer.h index 8aad1af63e..7f896bd696 100644 --- a/tests/validation/reference/ActivationLayer.h +++ b/tests/validation/reference/ActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020,2022,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ACTIVATION_LAYER_H -#define ARM_COMPUTE_TEST_ACTIVATION_LAYER_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H +#define ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -40,7 +40,7 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a { T ret; - switch(activation) + switch (activation) { case ActivationLayerInfo::ActivationFunction::ABS: ret = std::abs(x); @@ -61,13 +61,13 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a ret = std::min<T>(a, std::max<T>(b, x)); break; case ActivationLayerInfo::ActivationFunction::LEAKY_RELU: - ret = (x > 0) ? x : a * x; + ret = x > static_cast<T>(0) ? x : static_cast<T>(a * x); break; case ActivationLayerInfo::ActivationFunction::SOFT_RELU: - ret = std::log(static_cast<T>(1) + std::exp(x)); + ret = std::log(static_cast<T>(1) + std::exp(static_cast<double>(x))); break; case ActivationLayerInfo::ActivationFunction::ELU: - ret = (x > 0) ? x : a * (std::exp(x) - static_cast<T>(1)); + ret = x > static_cast<T>(0) ? x : static_cast<T>(a * (std::exp(x) - static_cast<T>(1))); break; case ActivationLayerInfo::ActivationFunction::SQRT: ret = std::sqrt(x); @@ -82,7 +82,14 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a ret = x; break; case ActivationLayerInfo::ActivationFunction::HARD_SWISH: - ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) * 0.166666667f); + ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) * + 0.166666667f); + break; + case ActivationLayerInfo::ActivationFunction::SWISH: + ret = static_cast<T>(x) / (static_cast<T>(1) + std::exp(-a * x)); + break; + case ActivationLayerInfo::ActivationFunction::GELU: + ret = x * 0.5f * (1 + erf(x / std::sqrt(2.0f))); break; default: ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -93,9 +100,11 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a } template <typename T> -SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info = QuantizationInfo()); +SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, + ActivationLayerInfo info, + const QuantizationInfo &oq_info = QuantizationInfo()); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ACTIVATION_LAYER_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H diff --git a/tests/validation/reference/BatchToSpaceLayer.cpp b/tests/validation/reference/BatchToSpaceLayer.cpp index 404ee73cac..63d121f59b 100644 --- a/tests/validation/reference/BatchToSpaceLayer.cpp +++ b/tests/validation/reference/BatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,10 @@ */ #include "BatchToSpaceLayer.h" +#include "arm_compute/core/Validate.h" #include "tests/validation/Helpers.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" namespace arm_compute { namespace test @@ -35,32 +37,37 @@ namespace reference { // Batch to Space template <typename T> -SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape) +SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape) { - ARM_COMPUTE_ERROR_ON(block_shape[0] <= 0); - ARM_COMPUTE_ERROR_ON(block_shape[1] <= 0); - SimpleTensor<T> result(dst_shape, src.data_type()); + ARM_COMPUTE_ERROR_ON(block_shape[0] < 1); + ARM_COMPUTE_ERROR_ON(block_shape[1] < 1); + const auto expected_dst_shape = misc::shape_calculator::compute_batch_to_space_shape(DataLayout::NCHW, src.shape(), block_shape[0], block_shape[1], crop_info); + ARM_COMPUTE_ERROR_ON(arm_compute::detail::have_different_dimensions(expected_dst_shape, dst_shape, 0)); + ARM_COMPUTE_UNUSED(expected_dst_shape); - int in_pos = 0; - const auto width_in = static_cast<int>(src.shape()[0]); - const auto height_in = static_cast<int>(src.shape()[1]); - const auto z_in = static_cast<int>(src.shape()[2]); - const auto batch_in = static_cast<int>(src.shape()[3]); + SimpleTensor<T> result(dst_shape, src.data_type()); + int out_pos = 0; + const auto width_out = static_cast<int>(dst_shape[0]); + const auto height_out = static_cast<int>(dst_shape[1]); + const auto z_out = static_cast<int>(dst_shape[2]); + const auto batch_out = static_cast<int>(dst_shape[3]); - for(int batch = 0; batch < batch_in; ++batch) + for(int batch = 0; batch < batch_out; ++batch) { - for(int z = 0; z < z_in; ++z) + for(int z = 0; z < z_out; ++z) { - for(int y = 0; y < height_in; ++y) + for(int y = 0; y < height_out; ++y) { - for(int x = 0; x < width_in; ++x) + for(int x = 0; x < width_out; ++x) { - const int r = src.shape()[3] / (block_shape[0] * block_shape[1]); - const int out_x = (block_shape[0] * x + (batch / r) % block_shape[0]); - const int out_y = (block_shape[1] * y + (batch / r) / block_shape[0]); - const int out_pos = out_x + dst_shape[0] * out_y + z * dst_shape[0] * dst_shape[1] + (batch % r) * dst_shape[0] * dst_shape[1] * dst_shape[2]; - result[out_pos] = src[in_pos]; - ++in_pos; + const int x_c = x + crop_info.left; + const int y_c = y + crop_info.top; + const int in_batch = batch + ((x_c % block_shape[0]) + (y_c % block_shape[1]) * (block_shape[0])) * dst_shape[3]; + const int in_x = x_c / block_shape[0]; + const int in_y = y_c / block_shape[1]; + const int in_pos = in_x + src.shape()[0] * in_y + z * src.shape()[0] * src.shape()[1] + in_batch * src.shape()[0] * src.shape()[1] * src.shape()[2]; + result[out_pos] = src[in_pos]; + ++out_pos; } } } @@ -68,8 +75,8 @@ SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<in return result; } -template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape); -template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape); +template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape); +template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/BatchToSpaceLayer.h b/tests/validation/reference/BatchToSpaceLayer.h index 52556cb53f..a37bfc3373 100644 --- a/tests/validation/reference/BatchToSpaceLayer.h +++ b/tests/validation/reference/BatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H #define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H +#include "arm_compute/core/Types.h" #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -36,7 +37,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape); +SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Box3x3.cpp b/tests/validation/reference/Box3x3.cpp deleted file mode 100644 index ccc7f1b3b9..0000000000 --- a/tests/validation/reference/Box3x3.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Helpers.h" - -#include "Box3x3.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> box3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - const std::array<T, 9> filter{ { 1, 1, 1, 1, 1, 1, 1, 1, 1 } }; - const float scale = 1.f / static_cast<float>(filter.size()); - const uint32_t num_elements = src.num_elements(); -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx) - { - const Coordinates id = index2coord(src.shape(), element_idx); - apply_2d_spatial_filter(id, src, dst, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value); - } - return dst; -} - -template SimpleTensor<uint8_t> box3x3(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/CannyEdgeDetector.cpp b/tests/validation/reference/CannyEdgeDetector.cpp deleted file mode 100644 index aa2351ddd8..0000000000 --- a/tests/validation/reference/CannyEdgeDetector.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "CannyEdgeDetector.h" - -#include "Utils.h" -#include "support/ToolchainSupport.h" -#include "tests/validation/Helpers.h" -#include "tests/validation/reference/Magnitude.h" -#include "tests/validation/reference/NonMaximaSuppression.h" -#include "tests/validation/reference/Phase.h" -#include "tests/validation/reference/Sobel.h" - -#include <cmath> -#include <stack> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -const auto MARK_ZERO = 0u; -const auto MARK_MAYBE = 127u; -const auto MARK_EDGE = 255u; - -template <typename T> -void trace_edge(SimpleTensor<T> &dst, const ValidRegion &valid_region) -{ - std::stack<Coordinates> pixels_stack; - for(auto i = 0; i < dst.num_elements(); ++i) - { - if(dst[i] == MARK_EDGE) - { - pixels_stack.push(index2coord(dst.shape(), i)); - } - } - - while(!pixels_stack.empty()) - { - const Coordinates pixel_coord = pixels_stack.top(); - pixels_stack.pop(); - - std::array<Coordinates, 8> neighbours = - { - { - Coordinates(pixel_coord.x() - 1, pixel_coord.y() + 0), - Coordinates(pixel_coord.x() + 1, pixel_coord.y() + 0), - Coordinates(pixel_coord.x() - 1, pixel_coord.y() - 1), - Coordinates(pixel_coord.x() + 1, pixel_coord.y() + 1), - Coordinates(pixel_coord.x() + 0, pixel_coord.y() - 1), - Coordinates(pixel_coord.x() + 0, pixel_coord.y() + 1), - Coordinates(pixel_coord.x() + 1, pixel_coord.y() - 1), - Coordinates(pixel_coord.x() - 1, pixel_coord.y() + 1) - } - }; - - // Mark MAYBE neighbours as edges since they are next to an EDGE - std::for_each(neighbours.begin(), neighbours.end(), [&](Coordinates & coord) - { - if(is_in_valid_region(valid_region, coord)) - { - const size_t pixel_index = coord2index(dst.shape(), coord); - const T pixel = dst[pixel_index]; - if(pixel == MARK_MAYBE) - { - dst[pixel_index] = MARK_EDGE; - pixels_stack.push(coord); - } - } - }); - } - - // Mark all remaining MAYBE pixels as ZERO (not edges) - for(auto i = 0; i < dst.num_elements(); ++i) - { - if(dst[i] == MARK_MAYBE) - { - dst[i] = MARK_ZERO; - } - } -} - -template <typename U, typename T> -SimpleTensor<T> canny_edge_detector_impl(const SimpleTensor<T> &src, int32_t upper, int32_t lower, int gradient_size, MagnitudeType norm_type, - BorderMode border_mode, T constant_border_value) -{ - ARM_COMPUTE_ERROR_ON(gradient_size != 3 && gradient_size != 5 && gradient_size != 7); - ARM_COMPUTE_ERROR_ON(lower < 0 || lower >= upper); - - // Output: T == uint8_t - SimpleTensor<T> dst{ src.shape(), src.data_type() }; - ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(gradient_size / 2 + 1)); - - // Sobel computation: U == int16_t or int32_t - SimpleTensor<U> gx{}; - SimpleTensor<U> gy{}; - std::tie(gx, gy) = sobel<U>(src, gradient_size, border_mode, constant_border_value, GradientDimension::GRAD_XY); - - using unsigned_U = typename traits::make_unsigned_conditional_t<U>::type; - using promoted_U = typename common_promoted_signed_type<U>::intermediate_type; - - // Gradient magnitude and phase (edge direction) - const DataType mag_data_type = gx.data_type() == DataType::S16 ? DataType::U16 : DataType::U32; - SimpleTensor<unsigned_U> grad_mag{ gx.shape(), mag_data_type }; - SimpleTensor<uint8_t> grad_dir{ gy.shape(), DataType::U8 }; - - for(auto i = 0; i < grad_mag.num_elements(); ++i) - { - double mag = 0.f; - - if(norm_type == MagnitudeType::L2NORM) - { - mag = support::cpp11::round(std::sqrt(static_cast<promoted_U>(gx[i]) * gx[i] + static_cast<promoted_U>(gy[i]) * gy[i])); - } - else // MagnitudeType::L1NORM - { - mag = static_cast<promoted_U>(std::abs(gx[i])) + static_cast<promoted_U>(std::abs(gy[i])); - } - - float angle = 180.f * std::atan2(static_cast<float>(gy[i]), static_cast<float>(gx[i])) / M_PI; - grad_dir[i] = support::cpp11::round(angle < 0.f ? 180 + angle : angle); - grad_mag[i] = saturate_cast<unsigned_U>(mag); - } - - /* - Quantise the phase into 4 directions - 0° dir=0 0.0 <= p < 22.5 or 157.5 <= p < 180 - 45° dir=1 22.5 <= p < 67.5 - 90° dir=2 67.5 <= p < 112.5 - 135° dir=3 112.5 <= p < 157.5 - */ - for(auto i = 0; i < grad_dir.num_elements(); ++i) - { - const auto direction = std::fabs(grad_dir[i]); - grad_dir[i] = (direction < 22.5 || direction >= 157.5) ? 0 : (direction < 67.5) ? 1 : (direction < 112.5) ? 2 : 3; - } - - // Non-maximum suppression - std::vector<int> strong_edges; - const auto upper_thresh = static_cast<uint32_t>(upper); - const auto lower_thresh = static_cast<uint32_t>(lower); - - const auto pixel_at_offset = [&](const SimpleTensor<unsigned_U> &tensor, const Coordinates & coord, int xoffset, int yoffset) - { - return tensor_elem_at(tensor, Coordinates{ coord.x() + xoffset, coord.y() + yoffset }, border_mode, static_cast<unsigned_U>(constant_border_value)); - }; - - for(auto i = 0; i < dst.num_elements(); ++i) - { - const auto coord = index2coord(dst.shape(), i); - if(!is_in_valid_region(valid_region, coord) || grad_mag[i] <= lower_thresh) - { - dst[i] = MARK_ZERO; - continue; - } - - unsigned_U mag_90; - unsigned_U mag90; - switch(grad_dir[i]) - { - case 0: // North/South edge direction, compare against East/West pixels (left & right) - mag_90 = pixel_at_offset(grad_mag, coord, -1, 0); - mag90 = pixel_at_offset(grad_mag, coord, 1, 0); - break; - case 1: // NE/SW edge direction, compare against NW/SE pixels (top-left & bottom-right) - mag_90 = pixel_at_offset(grad_mag, coord, -1, -1); - mag90 = pixel_at_offset(grad_mag, coord, +1, +1); - break; - case 2: // East/West edge direction, compare against North/South pixels (top & bottom) - mag_90 = pixel_at_offset(grad_mag, coord, 0, -1); - mag90 = pixel_at_offset(grad_mag, coord, 0, +1); - break; - case 3: // NW/SE edge direction, compare against NE/SW pixels (top-right & bottom-left) - mag_90 = pixel_at_offset(grad_mag, coord, +1, -1); - mag90 = pixel_at_offset(grad_mag, coord, -1, +1); - break; - default: - ARM_COMPUTE_ERROR("Invalid gradient phase provided"); - break; - } - - // Potential edge if greater than both pixels at +/-90° on either side - if(grad_mag[i] > mag_90 && grad_mag[i] > mag90) - { - // Double thresholding and edge tracing - if(grad_mag[i] > upper_thresh) - { - dst[i] = MARK_EDGE; // Definite edge pixel - strong_edges.emplace_back(i); - } - else - { - dst[i] = MARK_MAYBE; - } - } - else - { - dst[i] = MARK_ZERO; // Since not greater than neighbours - } - } - - // Final edge tracing - trace_edge<T>(dst, valid_region); - return dst; -} -} // namespace - -template <typename T> -SimpleTensor<T> canny_edge_detector(const SimpleTensor<T> &src, - int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, - BorderMode border_mode, T constant_border_value) -{ - if(gradient_size < 7) - { - return canny_edge_detector_impl<int16_t>(src, upper_thresh, lower_thresh, gradient_size, norm_type, border_mode, constant_border_value); - } - else - { - return canny_edge_detector_impl<int32_t>(src, upper_thresh, lower_thresh, gradient_size, norm_type, border_mode, constant_border_value); - } -} - -template SimpleTensor<uint8_t> canny_edge_detector(const SimpleTensor<uint8_t> &src, - int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, - BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/CannyEdgeDetector.h b/tests/validation/reference/CannyEdgeDetector.h deleted file mode 100644 index e05895ab95..0000000000 --- a/tests/validation/reference/CannyEdgeDetector.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_CANNY_EDGE_DETECTOR_H -#define ARM_COMPUTE_TEST_CANNY_EDGE_DETECTOR_H - -#include "arm_compute/core/Types.h" -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> canny_edge_detector(const SimpleTensor<T> &src, - int32_t upper_thresh, int32_t lower_thresh, int gradient_size, MagnitudeType norm_type, - BorderMode border_mode, T constant_border_value = 0); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CANNY_EDGE_DETECTOR_H */ diff --git a/tests/validation/reference/ChannelCombine.cpp b/tests/validation/reference/ChannelCombine.cpp deleted file mode 100644 index dcd4cf551b..0000000000 --- a/tests/validation/reference/ChannelCombine.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ChannelCombine.h" - -#include "arm_compute/core/Types.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -template <typename T> -inline std::vector<SimpleTensor<T>> create_image_planes(const TensorShape &shape, Format format) -{ - TensorShape image_shape = adjust_odd_shape(shape, format); - - std::vector<SimpleTensor<T>> image_planes; - - switch(format) - { - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - { - image_planes.emplace_back(image_shape, format); - break; - } - case Format::NV12: - case Format::NV21: - { - TensorShape shape_uv88 = calculate_subsampled_shape(image_shape, Format::UV88); - - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(shape_uv88, Format::UV88); - break; - } - case Format::IYUV: - { - TensorShape shape_sub2 = calculate_subsampled_shape(image_shape, Format::IYUV); - - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(shape_sub2, Format::U8); - image_planes.emplace_back(shape_sub2, Format::U8); - break; - } - case Format::YUV444: - { - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(image_shape, Format::U8); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - - return image_planes; -} -} // namespace - -template <typename T> -std::vector<SimpleTensor<T>> channel_combine(const TensorShape &shape, const std::vector<SimpleTensor<T>> &image_planes, Format format) -{ - std::vector<SimpleTensor<T>> dst = create_image_planes<T>(shape, format); - -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(unsigned int plane_idx = 0; plane_idx < dst.size(); ++plane_idx) - { - SimpleTensor<T> &dst_tensor = dst[plane_idx]; - const uint32_t num_elements = dst_tensor.num_elements(); - - for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx) - { - Coordinates coord = index2coord(dst_tensor.shape(), element_idx); - - switch(format) - { - case Format::RGB888: - case Format::RGBA8888: - { - // Copy R/G/B or A channel - for(int channel_idx = 0; channel_idx < dst_tensor.num_channels(); ++channel_idx) - { - const T &src_value = reinterpret_cast<const T *>(image_planes[channel_idx](coord))[0]; - T &dst_value = reinterpret_cast<T *>(dst_tensor(coord))[channel_idx]; - - dst_value = src_value; - } - break; - } - case Format::YUYV422: - case Format::UYVY422: - { - // Find coordinates of the sub-sampled pixel - const Coordinates coord_hori(coord.x() / 2, coord.y()); - - const T &src0 = reinterpret_cast<const T *>(image_planes[0](coord))[0]; - const T &src1 = reinterpret_cast<const T *>(image_planes[1](coord_hori))[0]; - - const int shift = (Format::YUYV422 == format) ? 1 : 0; - T &dst0 = reinterpret_cast<T *>(dst_tensor(coord))[1 - shift]; - T &dst1 = reinterpret_cast<T *>(dst_tensor(coord))[0 + shift]; - - dst0 = src0; - dst1 = src1; - - Coordinates coord2 = index2coord(dst_tensor.shape(), ++element_idx); - - const T &src2 = reinterpret_cast<const T *>(image_planes[0](coord2))[0]; - const T &src3 = reinterpret_cast<const T *>(image_planes[2](coord_hori))[0]; - - T &dst2 = reinterpret_cast<T *>(dst_tensor(coord2))[1 - shift]; - T &dst3 = reinterpret_cast<T *>(dst_tensor(coord2))[0 + shift]; - - dst2 = src2; - dst3 = src3; - - break; - } - case Format::NV12: - case Format::NV21: - { - if(0U == plane_idx) - { - // Get and combine Y channel from plane0 of destination multi-image - dst_tensor[element_idx] = image_planes[0][element_idx]; - } - else - { - const int shift = (Format::NV12 == format) ? 0 : 1; - - // Get U channel from plane1 and V channel from plane2 of the source - const T &src_u0 = reinterpret_cast<const T *>(image_planes[1](coord))[0]; - const T &src_v0 = reinterpret_cast<const T *>(image_planes[2](coord))[0]; - - // Get U and V channel from plane1 of destination multi-image - T &dst_u0 = reinterpret_cast<T *>(dst_tensor(coord))[0 + shift]; - T &dst_v0 = reinterpret_cast<T *>(dst_tensor(coord))[1 - shift]; - - // Combine channel U and V - dst_u0 = src_u0; - dst_v0 = src_v0; - } - - break; - } - case Format::IYUV: - case Format::YUV444: - { - // Get Y/U/V element - const T &src = reinterpret_cast<const T *>(image_planes[plane_idx](coord))[0]; - T &dst = reinterpret_cast<T *>(dst_tensor(coord))[0]; - - // Copy Y/U/V plane - dst = src; - - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - } - } - - return dst; -} - -template std::vector<SimpleTensor<uint8_t>> channel_combine(const TensorShape &shape, const std::vector<SimpleTensor<uint8_t>> &image_planes, Format format); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/ChannelCombine.h b/tests/validation/reference/ChannelCombine.h deleted file mode 100644 index 315e2d4dd4..0000000000 --- a/tests/validation/reference/ChannelCombine.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_CHANNEL_COMBINE_H -#define ARM_COMPUTE_TEST_CHANNEL_COMBINE_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -std::vector<SimpleTensor<T>> channel_combine(const TensorShape &shape, const std::vector<SimpleTensor<T>> &image_planes, Format format); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CHANNEL_COMBINE_H */ diff --git a/tests/validation/reference/ChannelExtract.cpp b/tests/validation/reference/ChannelExtract.cpp deleted file mode 100644 index 8674510269..0000000000 --- a/tests/validation/reference/ChannelExtract.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ChannelExtract.h" - -#include "arm_compute/core/Types.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint8_t> channel_extract(const TensorShape &shape, const std::vector<SimpleTensor<T>> &tensor_planes, Format format, Channel channel) -{ - // Find plane and channel index - const unsigned int plane_idx = plane_idx_from_channel(format, channel); - const unsigned int channel_idx = channel_idx_from_format(format, channel); - - // Create dst and get src tensor - SimpleTensor<T> src = tensor_planes[plane_idx]; - SimpleTensor<T> dst{ calculate_subsampled_shape(shape, format, channel), Format::U8 }; - - // Single planar formats with subsampling require a double horizontal step - const int step_x = ((Format::YUYV422 == format || Format::UYVY422 == format) && Channel::Y != channel) ? 2 : 1; - const int width = dst.shape().x(); - const int height = dst.shape().y(); - - // Loop over each pixel and extract channel -#if defined(_OPENMP) - #pragma omp parallel for collapse(2) -#endif /* _OPENMP */ - for(int y = 0; y < height; ++y) - { - for(int x = 0; x < width; ++x) - { - const Coordinates src_coord{ x * step_x, y }; - const Coordinates dst_coord{ x, y }; - - const auto *src_pixel = reinterpret_cast<const T *>(src(src_coord)); - auto *dst_pixel = reinterpret_cast<T *>(dst(dst_coord)); - - dst_pixel[0] = src_pixel[channel_idx]; // NOLINT - } - } - - return dst; -} - -template SimpleTensor<uint8_t> channel_extract(const TensorShape &shape, const std::vector<SimpleTensor<uint8_t>> &tensor_planes, Format format, Channel channel); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/ChannelExtract.h b/tests/validation/reference/ChannelExtract.h deleted file mode 100644 index ce1e6732bd..0000000000 --- a/tests/validation/reference/ChannelExtract.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_CHANNEL_EXTRACT_H -#define ARM_COMPUTE_TEST_CHANNEL_EXTRACT_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint8_t> channel_extract(const TensorShape &shape, const std::vector<SimpleTensor<T>> &tensor_planes, Format format, Channel channel); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_CHANNEL_EXTRACT_H */ diff --git a/tests/validation/reference/ColorConvert.cpp b/tests/validation/reference/ColorConvert.cpp deleted file mode 100644 index c6a4630999..0000000000 --- a/tests/validation/reference/ColorConvert.cpp +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ColorConvert.h" - -#include "arm_compute/core/Types.h" -#include "tests/validation/Helpers.h" -#include "tests/validation/reference/ColorConvertHelper.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -template <typename T> -inline std::vector<SimpleTensor<T>> create_image_planes(const TensorShape &shape, Format format) -{ - TensorShape image_shape = adjust_odd_shape(shape, format); - - std::vector<SimpleTensor<T>> image_planes; - - switch(format) - { - case Format::U8: - case Format::RGB888: - case Format::RGBA8888: - case Format::YUYV422: - case Format::UYVY422: - { - image_planes.emplace_back(image_shape, format); - break; - } - case Format::NV12: - case Format::NV21: - { - TensorShape shape_uv88 = calculate_subsampled_shape(image_shape, Format::UV88); - - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(shape_uv88, Format::UV88); - break; - } - case Format::IYUV: - { - TensorShape shape_sub2 = calculate_subsampled_shape(image_shape, Format::IYUV); - - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(shape_sub2, Format::U8); - image_planes.emplace_back(shape_sub2, Format::U8); - break; - } - case Format::YUV444: - { - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(image_shape, Format::U8); - image_planes.emplace_back(image_shape, Format::U8); - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - - return image_planes; -} -} // namespace - -template <typename T> -std::vector<SimpleTensor<T>> color_convert(const TensorShape &shape, const std::vector<SimpleTensor<T>> &tensor_planes, Format src_format, Format dst_format) -{ - std::vector<SimpleTensor<T>> dst = create_image_planes<T>(shape, dst_format); - - switch(src_format) - { - case Format::RGB888: - { - switch(dst_format) - { - case Format::RGBA8888: - colorconvert_helper::detail::colorconvert_rgb_to_rgbx(tensor_planes[0], dst[0]); - break; - case Format::U8: - colorconvert_helper::detail::colorconvert_rgb_to_u8(tensor_planes[0], dst[0]); - break; - case Format::NV12: - colorconvert_helper::detail::colorconvert_rgb_to_nv12(tensor_planes[0], dst); - break; - case Format::IYUV: - colorconvert_helper::detail::colorconvert_rgb_to_iyuv(tensor_planes[0], dst); - break; - case Format::YUV444: - colorconvert_helper::detail::colorconvert_rgb_to_yuv4(tensor_planes[0], dst); - break; - default: - ARM_COMPUTE_ERROR("Not Supported"); - break; - } - break; - } - case Format::RGBA8888: - { - switch(dst_format) - { - case Format::RGB888: - colorconvert_helper::detail::colorconvert_rgbx_to_rgb(tensor_planes[0], dst[0]); - break; - case Format::NV12: - colorconvert_helper::detail::colorconvert_rgb_to_nv12(tensor_planes[0], dst); - break; - case Format::IYUV: - colorconvert_helper::detail::colorconvert_rgb_to_iyuv(tensor_planes[0], dst); - break; - case Format::YUV444: - colorconvert_helper::detail::colorconvert_rgb_to_yuv4(tensor_planes[0], dst); - break; - default: - ARM_COMPUTE_ERROR("Not Supported"); - break; - } - break; - } - case Format::UYVY422: - case Format::YUYV422: - { - switch(dst_format) - { - case Format::RGB888: - case Format::RGBA8888: - colorconvert_helper::detail::colorconvert_yuyv_to_rgb(tensor_planes[0], src_format, dst[0]); - break; - case Format::NV12: - colorconvert_helper::detail::colorconvert_yuyv_to_nv12(tensor_planes[0], src_format, dst); - break; - case Format::IYUV: - colorconvert_helper::detail::colorconvert_yuyv_to_iyuv(tensor_planes[0], src_format, dst); - break; - default: - ARM_COMPUTE_ERROR("Not Supported"); - break; - } - break; - } - case Format::IYUV: - { - switch(dst_format) - { - case Format::RGB888: - case Format::RGBA8888: - colorconvert_helper::detail::colorconvert_iyuv_to_rgb(tensor_planes, dst[0]); - break; - default: - ARM_COMPUTE_ERROR("Not Supported"); - break; - } - break; - } - case Format::NV12: - case Format::NV21: - { - switch(dst_format) - { - case Format::RGB888: - case Format::RGBA8888: - colorconvert_helper::detail::colorconvert_nv12_to_rgb(src_format, tensor_planes, dst[0]); - break; - case Format::IYUV: - colorconvert_helper::detail::colorconvert_nv_to_iyuv(tensor_planes, src_format, dst); - break; - case Format::YUV444: - colorconvert_helper::detail::colorconvert_nv_to_yuv4(tensor_planes, src_format, dst); - break; - default: - ARM_COMPUTE_ERROR("Not Supported"); - break; - } - break; - } - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - return dst; -} - -template std::vector<SimpleTensor<uint8_t>> color_convert(const TensorShape &shape, const std::vector<SimpleTensor<uint8_t>> &tensor_planes, Format src_format, Format dst_format); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/ColorConvert.h b/tests/validation/reference/ColorConvert.h deleted file mode 100644 index 28776cb85f..0000000000 --- a/tests/validation/reference/ColorConvert.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_COLOR_CONVERT_H -#define ARM_COMPUTE_TEST_COLOR_CONVERT_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -std::vector<SimpleTensor<T>> color_convert(const TensorShape &shape, const std::vector<SimpleTensor<T>> &tensor_planes, Format src_format, Format dst_format); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_COLOR_CONVERT_H */ diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp new file mode 100644 index 0000000000..e4010a507a --- /dev/null +++ b/tests/validation/reference/Conv3D.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Conv3D.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "support/AclRequires.h" +#include "tests/validation/reference/UtilsQuantizedAsymm.h" + +// Source/Destination Tensor shape indices (N D H W C) +constexpr unsigned int batch_dim = 4u; +constexpr unsigned int depth_dim = 3u; +constexpr unsigned int height_dim = 2u; +constexpr unsigned int width_dim = 1u; +constexpr unsigned int channel_dim = 0u; + +// Weight tensor shape indices (D H W Cin Cout) +constexpr unsigned int weights_depth_dim = 4u; +constexpr unsigned int weights_height_dim = 3u; +constexpr unsigned int weights_width_dim = 2u; +constexpr unsigned int weights_CHin_dim = 1u; +constexpr unsigned int weights_CHout_dim = 0u; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +namespace +{ +inline bool is_valid_pixel(int i, int min, int max) +{ + return (i >= min && i < max); +} + +// Evaluate the weights against an element in a given tensor. +template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 > +T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, + int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) +{ + ARM_COMPUTE_UNUSED(oq_info); + + const unsigned int weights_width = weights.shape()[weights_width_dim]; + const unsigned int weights_height = weights.shape()[weights_height_dim]; + const unsigned int weights_depth = weights.shape()[weights_depth_dim]; + + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int src_width = src.shape()[width_dim]; + const unsigned int src_height = src.shape()[height_dim]; + const unsigned int src_depth = src.shape()[depth_dim]; + + T total(0); + for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) + { + const int idx_z = z_start + dilation.depth * weight_d; + for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y) + { + const int idx_y = y_start + dilation.height * weight_y; + for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x) + { + const int idx_x = x_start + dilation.width * weight_x; + + //Check if the point is within padding + const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width); + const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height); + const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth); + const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid); + if(is_invalid_pixel) + { + continue; + } + + for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in) + { + const T *in_ptr = src.data(); + const T *w_ptr = weights.data(); + + const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch }); + const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d }); + T input_value = in_ptr[in_offset]; + T weight_value = w_ptr[weight_offset]; + total += (input_value * weight_value); + } + } + } + } + + const TB *b_ptr = bias.data(); + TB bias_value = b_ptr[ch_out]; + + return total + bias_value; +} + +template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > +T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, + int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) +{ + const unsigned int weights_width = weights.shape()[weights_width_dim]; + const unsigned int weights_height = weights.shape()[weights_height_dim]; + const unsigned int weights_depth = weights.shape()[weights_depth_dim]; + + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int src_width = src.shape()[width_dim]; + const unsigned int src_height = src.shape()[height_dim]; + const unsigned int src_depth = src.shape()[depth_dim]; + + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + + const int input_offset = -iq_info.offset; + const float input_scale = iq_info.scale; + int weights_offset = -wq_info.offset; + float weights_scale = wq_info.scale; + const int output_offset = oq_info.offset; + const float output_scale = oq_info.scale; + + int output_multiplier = 0; + int output_shift = 0; + const float multiplier = input_scale * weights_scale / output_scale; + arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + + int32_t total(0); + for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) + { + const int idx_z = z_start + dilation.depth * weight_d; + for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y) + { + const int idx_y = y_start + dilation.height * weight_y; + for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x) + { + const int idx_x = x_start + dilation.width * weight_x; + + //Check if the point is within padding + const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width); + const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height); + const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth); + const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid); + if(is_invalid_pixel) + { + continue; + } + + for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in) + { + const T *in_ptr = src.data(); + const T *w_ptr = weights.data(); + + const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch }); + const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d }); + T input_value = in_ptr[in_offset]; + T weight_value = w_ptr[weight_offset]; + total += ((input_value + input_offset) * (weight_value + weights_offset)); + } + } + } + } + + const TB *b_ptr = bias.data(); + TB bias_value = b_ptr[ch_out]; + + total += bias_value; + + return validation::quantize_down_scale_by_fixedpoint(total, output_multiplier, output_shift, output_offset, + std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()); +} +} // namespace + +template <typename T, typename TB> +SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info) +{ + // Compute reference + const unsigned int batch_size = src.shape()[batch_dim]; + const unsigned int dst_width = dst.shape()[width_dim]; + const unsigned int dst_height = dst.shape()[height_dim]; + const unsigned int dst_depth = dst.shape()[depth_dim]; + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int weights_out_ch = weights.shape()[weights_CHout_dim]; + const unsigned int dst_channels = dst.shape()[channel_dim]; + const size_t pad_left = conv3d_info.padding.left; + const size_t pad_top = conv3d_info.padding.top; + const size_t pad_front = conv3d_info.padding.front; + const size_t stride_x = conv3d_info.stride.x(); + const size_t stride_y = conv3d_info.stride.y(); + const size_t stride_z = conv3d_info.stride.z(); + + const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_conv3d_shape(src.shape(), weights.shape(), conv3d_info); + + ARM_COMPUTE_UNUSED(src_channels, weights_out_ch, dst_channels, dst_shape, weights_CHin_dim); + // Number of batches of source and destination tensors must match. + ARM_COMPUTE_ERROR_ON(src.shape()[batch_dim] != dst.shape()[batch_dim]); + // Input channels in the source and weights must match. + ARM_COMPUTE_ERROR_ON(src_channels != weights.shape()[weights_CHin_dim]); + // Weight channels in the destination and weights must match. + ARM_COMPUTE_ERROR_ON(weights_out_ch != dst_channels); + // Bias must match the number of destination channels. + ARM_COMPUTE_ERROR_ON(bias.shape()[0] != dst_channels); + // Compare given dst tensor shape with expected shape. + ARM_COMPUTE_ERROR_ON(dst.shape() != dst_shape); + + for(unsigned int batch = 0; batch < batch_size; ++batch) + { + for(unsigned int z_out = 0; z_out < dst_depth; ++z_out) + { + const int z_start = (z_out * stride_z) - pad_front; + for(unsigned int y_out = 0; y_out < dst_height; ++y_out) + { + const int y_start = (y_out * stride_y) - pad_top; + for(unsigned int x_out = 0; x_out < dst_width; ++x_out) + { + const int x_start = (x_out * stride_x) - pad_left; + for(unsigned int ch_out = 0; ch_out < dst_channels; ++ch_out) + { + T *out_ptr = dst.data(); + + const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch }); + out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); + } + } + } + } + } + return dst; +} + +template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst, + const Conv3dInfo &conv3d_info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/Box3x3.h b/tests/validation/reference/Conv3D.h index f377f280af..e3674f4bfb 100644 --- a/tests/validation/reference/Box3x3.h +++ b/tests/validation/reference/Conv3D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,10 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_BOX3X3_H -#define ARM_COMPUTE_TEST_BOX3X3_H +#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H +#define ARM_COMPUTE_TEST_CONV3D_LAYER_H +#include "Utils.h" +#include "arm_compute/runtime/FunctionDescriptors.h" #include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" namespace arm_compute { @@ -34,10 +37,11 @@ namespace validation { namespace reference { -template <typename T> -SimpleTensor<T> box3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value); +template <typename T, typename TB> +SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, + const Conv3dInfo &conv3d_info); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_BOX3X3_H */ +#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */ diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h index 1666e3857b..b67e88e839 100644 --- a/tests/validation/reference/Convolution3d.h +++ b/tests/validation/reference/Convolution3d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "support/Requires.h" +#include "support/AclRequires.h" #include "tests/validation/Helpers.h" #include "tests/validation/reference/UtilsQuantizedAsymm.h" diff --git a/tests/validation/reference/DFT.cpp b/tests/validation/reference/DFT.cpp index fd126c7d73..2b03c270ac 100644 --- a/tests/validation/reference/DFT.cpp +++ b/tests/validation/reference/DFT.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -400,10 +400,10 @@ SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, auto padded_src = pad_layer(src, padding_in); // Flip weights - std::vector<uint32_t> axis_v = { 0, 1 }; - SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 }; + std::vector<uint32_t> axis_v = { 0, 1 }; + SimpleTensor<int32_t> axis{ TensorShape(2U), DataType::S32 }; std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data()); - auto flipped_w = reverse(w, axis); + auto flipped_w = reverse(w, axis, /* use_inverted_axis */ false); // Pad weights to have the same size as input const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } }; diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp index 891828533f..eeb25fcbe3 100644 --- a/tests/validation/reference/DeconvolutionLayer.cpp +++ b/tests/validation/reference/DeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,8 +33,8 @@ namespace validation { namespace reference { -template <typename T, typename TB> -SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, +template <typename T, typename TW, typename TB> +SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info, QuantizationInfo out_qinfo) { // Create reference @@ -99,7 +99,7 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens } // Flip weights by 180 degrees - SimpleTensor<T> weights_flipped{ weights.shape(), weights.data_type(), 1, weights.quantization_info() }; + SimpleTensor<TW> weights_flipped{ weights.shape(), weights.data_type(), 1, weights.quantization_info(), weights.data_layout() }; #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ @@ -143,6 +143,8 @@ SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTens template SimpleTensor<uint8_t> deconvolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape, const PadStrideInfo &info, QuantizationInfo out_quant_info); +template SimpleTensor<uint8_t> deconvolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape, + const PadStrideInfo &info, QuantizationInfo out_quant_info); template SimpleTensor<int8_t> deconvolution_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape, const PadStrideInfo &info, QuantizationInfo out_quant_info); template SimpleTensor<float> deconvolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape, diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h index 07b9a531a7..16f0d9ae59 100644 --- a/tests/validation/reference/DeconvolutionLayer.h +++ b/tests/validation/reference/DeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,16 +39,16 @@ namespace reference * * src Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. * Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16. - * weights The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input. + * weights The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * bias Optional, ignored if NULL. The biases have one dimension. - * Data type supported: Same as @p input, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type + * Data type supported: Same as @p input, except for input of QASYMM8/QASYMM8_SIGNED types where biases should be of S32 type * output_shape Output tensor shape. The output has the same number of dimensions as the @p input. * info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * a The number of zeros added to right and top edges of the input. * */ -template <typename T, typename TB> -SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info, +template <typename T, typename TW, typename TB> +SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info, QuantizationInfo out_qinfo = QuantizationInfo()); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp index 94c719ade7..3f88897f8e 100644 --- a/tests/validation/reference/DepthConvertLayer.cpp +++ b/tests/validation/reference/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -165,7 +165,7 @@ template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, Data template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); // BFLOAT16 -template SimpleTensor<float> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); // F16 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); @@ -186,6 +186,25 @@ template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, Dat template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +// S64 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// U64 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index 64a89aa6a0..67d69c2c38 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,12 @@ TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt) ARM_COMPUTE_UNUSED(dt); return static_cast<TOut>(dequantize_qsymm16(val, qinfo)); } +template <typename TOut> +TOut dequantize(int32_t val, const UniformQuantizationInfo qinfo, DataType dt) +{ + ARM_COMPUTE_UNUSED(dt); + return static_cast<TOut>(dequantize_s32(val, qinfo)); +} } // namespace template <typename TOut, typename TIn> SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src) @@ -115,6 +121,7 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src); template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src); template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src); +template SimpleTensor<float> dequantization_layer(const SimpleTensor<int32_t> &src); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Derivative.cpp b/tests/validation/reference/Derivative.cpp deleted file mode 100644 index c65ebcada5..0000000000 --- a/tests/validation/reference/Derivative.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Derivative.h" - -#include "Utils.h" -#include "tests/Types.h" - -#include <array> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -const std::array<int8_t, 9> derivative_3_x{ { 0, 0, 0, -1, 0, 1, 0, 0, 0 } }; -const std::array<int8_t, 9> derivative_3_y{ { 0, -1, 0, 0, 0, 0, 0, 1, 0 } }; - -template <typename T> -struct data_type; - -template <> -struct data_type<int16_t> -{ - const static DataType value = DataType::S16; -}; -} // namespace - -template <typename T, typename U> -std::pair<SimpleTensor<T>, SimpleTensor<T>> derivative(const SimpleTensor<U> &src, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension) -{ - const unsigned int filter_size = 3; - - SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels()); - SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels()); - - ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2)); - - const uint32_t num_elements = src.num_elements(); -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(uint32_t i = 0; i < num_elements; ++i) - { - Coordinates coord = index2coord(src.shape(), i); - - if(!is_in_valid_region(valid_region, coord)) - { - continue; - } - - switch(gradient_dimension) - { - case GradientDimension::GRAD_X: - apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ filter_size, filter_size }, derivative_3_x.data(), 1.f, border_mode, - constant_border_value); - break; - case GradientDimension::GRAD_Y: - apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ filter_size, filter_size }, derivative_3_y.data(), 1.f, border_mode, - constant_border_value); - break; - case GradientDimension::GRAD_XY: - apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ filter_size, filter_size }, derivative_3_x.data(), 1.f, border_mode, - constant_border_value); - apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ filter_size, filter_size }, derivative_3_y.data(), 1.f, border_mode, - constant_border_value); - break; - default: - ARM_COMPUTE_ERROR("Gradient dimension not supported"); - } - } - - return std::make_pair(dst_x, dst_y); -} - -template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> derivative(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, - GradientDimension gradient_dimension); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Derivative.h b/tests/validation/reference/Derivative.h deleted file mode 100644 index 16f764e90e..0000000000 --- a/tests/validation/reference/Derivative.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_DERIVATIVE_H -#define ARM_COMPUTE_TEST_DERIVATIVE_H - -#include "tests/SimpleTensor.h" -#include "tests/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -std::pair<SimpleTensor<T>, SimpleTensor<T>> derivative(const SimpleTensor<U> &src, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DERIVATIVE_H */ diff --git a/tests/validation/reference/Dilate.cpp b/tests/validation/reference/Dilate.cpp deleted file mode 100644 index be8ccb6f3a..0000000000 --- a/tests/validation/reference/Dilate.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Dilate.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" - -#include <algorithm> -#include <array> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> dilate(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value) -{ - /* - -1 x +1 - -1 [tl][tc][tr] -1 - y [ml][xy][mr] y - +1 [bl][bc][br] +1 - -1 x +1 - dilate: - dst(x, y) = max[ src(x', y') for x-1<=x'<=x+1, y-1<=y'<=y+1 ] = max({tl, tc, tr, ml, xy, mr, bl, bc, br}) - */ - SimpleTensor<T> dst(src.shape(), src.data_type()); - const uint32_t num_elements = src.num_elements(); - -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(uint32_t i = 0; i < num_elements; ++i) - { - Coordinates coord = index2coord(src.shape(), i); - const int x = coord.x(); - const int y = coord.y(); - - std::array<T, 9> neighbours = { { 0 } }; - for(int row = y - 1, j = 0; row <= y + 1; ++row) - { - for(int col = x - 1; col <= x + 1; ++col, ++j) - { - coord.set(0, col); - coord.set(1, row); - neighbours[j] = tensor_elem_at(src, coord, border_mode, constant_border_value); - } - } - - dst[i] = *std::max_element(neighbours.cbegin(), neighbours.cend()); - } - - return dst; -} - -template SimpleTensor<uint8_t> dilate(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/ElementWiseUnary.cpp b/tests/validation/reference/ElementWiseUnary.cpp deleted file mode 100644 index 1d46ed648f..0000000000 --- a/tests/validation/reference/ElementWiseUnary.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ElementWiseUnary.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - - for(int i = 0; i < src.num_elements(); ++i) - { - switch(op) - { - case ElementWiseUnary::RSQRT: - dst[i] = 1.f / std::sqrt(src[i]); - break; - case ElementWiseUnary::EXP: - dst[i] = std::exp(src[i]); - break; - case ElementWiseUnary::NEG: - dst[i] = -src[i]; - break; - case ElementWiseUnary::LOG: - dst[i] = std::log(src[i]); - break; - case ElementWiseUnary::ABS: - dst[i] = std::abs(src[i]); - break; - case ElementWiseUnary::SIN: - dst[i] = std::sin(src[i]); - break; - case ElementWiseUnary::ROUND: - dst[i] = arm_compute::support::cpp11::nearbyint(src[i]); - break; - default: - ARM_COMPUTE_ERROR("Not implemented"); - } - } - - return dst; -} - -template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, ElementWiseUnary op); -template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, ElementWiseUnary op); -template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, ElementWiseUnary op); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp index f22c84e153..edbbab8600 100644 --- a/tests/validation/reference/ElementwiseOperations.cpp +++ b/tests/validation/reference/ElementwiseOperations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -74,15 +74,6 @@ T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy case ArithmeticOperation::DIV: { val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2)); - if(std::is_integral<T>::value) - { - // Implement flooring division - val = (src2 == 0) ? 0 : val; - if(static_cast<int32_t>(src1) % static_cast<int32_t>(src2) != 0 && ((src1 < 0) != (src2 < 0))) - { - --val; - } - } break; } case ArithmeticOperation::POWER: diff --git a/tests/validation/reference/ElementwiseUnary.cpp b/tests/validation/reference/ElementwiseUnary.cpp new file mode 100644 index 0000000000..558f9d24fc --- /dev/null +++ b/tests/validation/reference/ElementwiseUnary.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2018-2020, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ElementwiseUnary.h" +#include "tests/validation/Helpers.h" +#include "utils/TypePrinter.h" +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template <typename T> +SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op) +{ + for(int i = 0; i < src.num_elements(); ++i) + { + switch(op) + { + case ElementWiseUnary::RSQRT: + dst[i] = 1.f / std::sqrt(src[i]); + break; + case ElementWiseUnary::EXP: + dst[i] = std::exp(src[i]); + break; + case ElementWiseUnary::NEG: + dst[i] = -src[i]; + break; + case ElementWiseUnary::LOG: + dst[i] = std::log(src[i]); + break; + case ElementWiseUnary::ABS: + dst[i] = std::abs(src[i]); + break; + case ElementWiseUnary::SIN: + dst[i] = std::sin(src[i]); + break; + case ElementWiseUnary::ROUND: + dst[i] = arm_compute::support::cpp11::nearbyint(src[i]); + break; + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + } + return dst; +} +template <> +SimpleTensor<int8_t> elementwise_unary(const SimpleTensor<int8_t> &src, SimpleTensor<int8_t> &dst, ElementWiseUnary op) +{ + if(dst.data_type() == DataType::QASYMM8_SIGNED) + { + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp(src.shape(), DataType::F32); + for(int i = 0; i < src.num_elements(); ++i) + { + switch(op) + { + case ElementWiseUnary::RSQRT: + if(src_tmp[i] != 0) + { + dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]); + } + else + { + // rsqrt(0) give 'inf' so set to the maximum in int8: 127 + dst_tmp[i] = (127.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ; + } + break; + + case ElementWiseUnary::LOG: + if(src_tmp[i] != 0) + { + dst_tmp[i] = std::log(src_tmp[i]); + } + else + { + dst_tmp[i] = (-128.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ; + } + break; + + default: + elementwise_unary(src_tmp, dst_tmp, op); + break; + } + } + dst = convert_to_asymmetric<int8_t>(dst_tmp, dst.quantization_info()); + } + else + { + ARM_COMPUTE_ERROR("Not implemented"); + } + return dst; +} +template <> +SimpleTensor<uint8_t> elementwise_unary(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &dst, ElementWiseUnary op) +{ + if(dst.data_type() == DataType::QASYMM8) + { + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp(src.shape(), DataType::F32); + for(int i = 0; i < src.num_elements(); ++i) + { + switch(op) + { + case ElementWiseUnary::RSQRT: + if(src_tmp[i] != 0) + { + dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]); + } + else + { + // rsqrt(0) give 'inf' so set to the maximum in uint8: 255 + dst_tmp[i] = (255.0f - dst.quantization_info().uniform().offset)* dst.quantization_info().uniform().scale; + } + break; + + case ElementWiseUnary::LOG: + if(src_tmp[i] != 0) + { + dst_tmp[i] = std::log(src_tmp[i]); + } + else + { + dst_tmp[i] = -dst.quantization_info().uniform().offset * dst.quantization_info().uniform().scale; + } + break; + + default: + elementwise_unary(src_tmp, dst_tmp, op); + break; + } + } + dst = convert_to_asymmetric<uint8_t>(dst_tmp, dst.quantization_info()); + } + else + { + ARM_COMPUTE_ERROR("Not implemented"); + } + return dst; +} + +template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, SimpleTensor<float> &dst, ElementWiseUnary op); +template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, SimpleTensor<half> &dst, ElementWiseUnary op); +template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, SimpleTensor<int32_t> &dst, ElementWiseUnary op); + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/ElementWiseUnary.h b/tests/validation/reference/ElementwiseUnary.h index be4a229a5b..ae7a49bce4 100644 --- a/tests/validation/reference/ElementWiseUnary.h +++ b/tests/validation/reference/ElementwiseUnary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op); +SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/EqualizeHistogram.cpp b/tests/validation/reference/EqualizeHistogram.cpp deleted file mode 100644 index 8a957d7085..0000000000 --- a/tests/validation/reference/EqualizeHistogram.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "EqualizeHistogram.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> equalize_histogram(const SimpleTensor<T> &src) -{ - const size_t num_bins = 256; // 0-255 inclusive - - std::vector<T> lut(num_bins); - std::vector<uint32_t> hist(num_bins); - std::vector<uint32_t> cd(num_bins); // cumulative distribution - - SimpleTensor<T> dst(src.shape(), src.data_type()); - - // Create the histogram - for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx) - { - hist[src[element_idx]]++; - } - - // Calculate cumulative distribution - std::partial_sum(hist.begin(), hist.end(), cd.begin()); - - // Get the number of pixels that have the lowest non-zero value - const uint32_t cd_min = *std::find_if(hist.begin(), hist.end(), [](const uint32_t &x) - { - return x > 0; - }); - - const size_t total_num_pixels = cd.back(); - - // Single color - create linear distribution - if(total_num_pixels == cd_min) - { - std::iota(lut.begin(), lut.end(), 0); - } - else - { - const float diff = total_num_pixels - cd_min; - - for(size_t i = 0; i < num_bins; ++i) - { - lut[i] = lround((cd[i] - cd_min) / diff * 255.f); - } - } - - // Fill output tensor with equalized values -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(int i = 0; i < src.num_elements(); ++i) - { - dst[i] = lut[src[i]]; - } - - return dst; -} - -template SimpleTensor<uint8_t> equalize_histogram(const SimpleTensor<uint8_t> &src); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/EqualizeHistogram.h b/tests/validation/reference/EqualizeHistogram.h deleted file mode 100644 index c79b2131aa..0000000000 --- a/tests/validation/reference/EqualizeHistogram.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_H -#define ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> equalize_histogram(const SimpleTensor<T> &src); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_EQUALIZE_HISTOGRAM_H */ diff --git a/tests/validation/reference/FastCorners.cpp b/tests/validation/reference/FastCorners.cpp deleted file mode 100644 index 25fbf1b6f2..0000000000 --- a/tests/validation/reference/FastCorners.cpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "FastCorners.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" -#include "tests/validation/reference/NonMaximaSuppression.h" - -#include "tests/framework/Asserts.h" -#include <iomanip> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -constexpr unsigned int bresenham_radius = 3; -constexpr unsigned int bresenham_count = 16; - -/* - Offsets of the 16 pixels in the Bresenham circle of radius 3 centered on P - . . . . . . . . . - . . . F 0 1 . . . - . . E . . . 2 . . - . D . . . . . 3 . - . C . . P . . 4 . - . B . . . . . 5 . - . . A . . . 6 . . - . . . 9 8 7 . . . - . . . . . . . . . -*/ -const std::array<std::array<int, 2>, 16> circle_offsets = -{ - { - { { 0, -3 } }, // 0 - pixel #1 - { { 1, -3 } }, // 1 - pixel #2 - { { 2, -2 } }, // 2 - pixel #3 - { { 3, -1 } }, // 3 - pixel #4 - { { 3, 0 } }, // 4 - pixel #5 - { { 3, 1 } }, // 5 - pixel #6 - { { 2, 2 } }, // 6 - pixel #7 - { { 1, 3 } }, // 7 - pixel #8 - { { 0, 3 } }, // 8 - pixel #9 - { { -1, 3 } }, // 9 - pixel #10 - { { -2, 2 } }, // A - pixel #11 - { { -3, 1 } }, // B - pixel #12 - { { -3, 0 } }, // C - pixel #13 - { { -3, -1 } }, // D - pixel #14 - { { -2, -2 } }, // E - pixel #15 - { { -1, -3 } } // F - pixel #16 - } -}; - -/* - FAST-9 bit masks for consecutive points surrounding a corner candidate - Rejection of non-corners is expedited by checking pixels 1, 9, then 5, 13... -*/ -const std::array<uint16_t, 16> fast9_masks = -{ - { - 0x01FF, // 0000 0001 1111 1111 - 0x03FE, // 0000 0011 1111 1110 - 0x07FC, // 0000 0111 1111 1100 - 0x0FF8, // 0000 1111 1111 1000 - 0x1FF0, // 0001 1111 1111 0000 - 0x3FE0, // 0011 1111 1110 0000 - 0x7FC0, // 0111 1111 1100 0000 - 0xFF80, // 1111 1111 1000 0000 - 0xFF01, // 1111 1111 0000 0001 - 0xFE03, // 1111 1110 0000 0011 - 0xFC07, // 1111 1100 0000 0111 - 0xF80F, // 1111 1000 0000 1111 - 0xF01F, // 1111 0000 0001 1111 - 0xE03F, // 1110 0000 0011 1111 - 0xC07F, // 1100 0000 0111 1111 - 0x80FF // 1000 0000 1111 1111 - } -}; - -inline bool in_range(const uint8_t low, const uint8_t high, const uint8_t val) -{ - return low <= val && val <= high; -} - -template <typename T, typename F> -bool is_a_corner(const Coordinates &candidate, const SimpleTensor<T> &src, uint8_t threshold, BorderMode border_mode, T constant_border_value, F intensity_at) -{ - const auto intensity_p = tensor_elem_at(src, candidate, border_mode, constant_border_value); - const auto thresh_bright = intensity_p + threshold; - const auto thresh_dark = intensity_p - threshold; - - // Quicker rejection of non-corner points by checking pixels 1, 9 then 5, 13 around the candidate - const auto p1 = intensity_at(candidate, 0); - const auto p9 = intensity_at(candidate, 8); - const auto p5 = intensity_at(candidate, 4); - const auto p13 = intensity_at(candidate, 12); - - if((in_range(thresh_dark, thresh_bright, p1) && in_range(thresh_dark, thresh_bright, p9)) - || (in_range(thresh_dark, thresh_bright, p5) && in_range(thresh_dark, thresh_bright, p13))) - { - return false; - } - - uint16_t mask_bright = 0; - uint16_t mask_dark = 0; - - // Set bits of the brighter/darker pixels mask accordingly - for(unsigned int n = 0; n < bresenham_count; ++n) - { - T intensity_n = intensity_at(candidate, n); - mask_bright |= (intensity_n > thresh_bright) << n; - mask_dark |= (intensity_n < thresh_dark) << n; - } - - // Mark as corner candidate if brighter/darker pixel sequence satisfies any one of the FAST-9 masks - const auto found = std::find_if(fast9_masks.begin(), fast9_masks.end(), [&](decltype(fast9_masks[0]) mask) - { - return (mask_bright & mask) == mask || (mask_dark & mask) == mask; - }); - - return found != fast9_masks.end(); -} -} // namespace - -template <typename T> -std::vector<KeyPoint> fast_corners(const SimpleTensor<T> &src, float input_thresh, bool suppress_nonmax, BorderMode border_mode, T constant_border_value) -{ - // Get intensity of pixel at given index on the Bresenham circle around a candidate point - const auto intensity_at = [&](const Coordinates & point, const unsigned int idx) - { - const auto offset = circle_offsets[idx]; - Coordinates px{ point.x() + offset[0], point.y() + offset[1] }; - return tensor_elem_at(src, px, border_mode, constant_border_value); - }; - - const auto threshold = static_cast<uint8_t>(input_thresh); - std::vector<KeyPoint> corners; - - // 1. Detect potential corners (the segment test) - std::vector<Coordinates> corner_candidates; - SimpleTensor<uint8_t> scores(src.shape(), DataType::U8); - ValidRegion valid_region = shape_to_valid_region(src.shape(), BorderMode::UNDEFINED == border_mode, BorderSize(bresenham_radius)); - - const uint32_t num_elements = src.num_elements(); - for(uint32_t i = 0; i < num_elements; ++i) - { - Coordinates candidate = index2coord(src.shape(), i); - scores[i] = 0; - if(!is_in_valid_region(valid_region, candidate)) - { - continue; - } - - if(is_a_corner(candidate, src, threshold, border_mode, constant_border_value, intensity_at)) - { - corner_candidates.emplace_back(candidate); - scores[i] = 1; - } - } - - // 2. Calculate corner scores if necessary - if(suppress_nonmax) - { - for(const auto &candidate : corner_candidates) - { - const auto index = coord2index(scores.shape(), candidate); - uint8_t thresh_max = UINT8_MAX; - uint8_t thresh_min = threshold; - uint8_t response = (thresh_min + thresh_max) / 2; - - // Corner score (response) is the largest threshold for which the pixel remains a corner - while(thresh_max - thresh_min > 1) - { - response = (thresh_min + thresh_max) / 2; - if(is_a_corner(candidate, src, response, border_mode, constant_border_value, intensity_at)) - { - thresh_min = response; // raise threshold - } - else - { - thresh_max = response; // lower threshold - } - } - scores[index] = thresh_min; - } - - scores = non_maxima_suppression(scores, border_mode, constant_border_value); - valid_region = shape_to_valid_region(scores.shape(), BorderMode::UNDEFINED == border_mode, BorderSize(bresenham_radius + 1)); - } - - for(const auto &candidate : corner_candidates) - { - const auto index = coord2index(scores.shape(), candidate); - if(scores[index] > 0.f && is_in_valid_region(valid_region, candidate)) - { - KeyPoint corner; - corner.x = candidate.x(); - corner.y = candidate.y(); - corner.strength = scores[index]; - corner.tracking_status = 1; - corner.scale = 0.f; - corner.orientation = 0.f; - corner.error = 0.f; - corners.emplace_back(corner); - } - } - - return corners; -} - -template std::vector<KeyPoint> fast_corners(const SimpleTensor<uint8_t> &src, float threshold, bool suppress_nonmax, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp index 21333958f8..af30e9ee54 100644 --- a/tests/validation/reference/FullyConnectedLayer.cpp +++ b/tests/validation/reference/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -123,7 +123,7 @@ SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTe // Create reference SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, out_quant_info }; - // Sanity checks + // Health checks const int num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1); const int num_input_dimensions = src.shape().num_dimensions() - num_batch_dimensions; const unsigned int linear_input_size = src.shape().total_size_lower(num_input_dimensions); diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp index 6b3aa390f0..d513343796 100644 --- a/tests/validation/reference/GEMM.cpp +++ b/tests/validation/reference/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "tests/validation/reference/ArithmeticOperations.h" namespace arm_compute { @@ -35,10 +36,11 @@ namespace validation namespace reference { template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> -SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) +SimpleTensor<T> +gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) { // Create reference - SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 }; + SimpleTensor<T> dst{c.shape(), c.data_type(), 1}; // Compute reference const int M = a.shape().y(); @@ -50,30 +52,47 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S const int a_stride_z = K * M; const int a_stride_w = K * M * D; - const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions - const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + const int b_stride_z = + b.shape().num_dimensions() > 2 + ? N * K + : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions + int b_stride_w = + b.shape().num_dimensions() > 3 + ? K * N * D + : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + + // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions: + // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4 + const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 && + c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1; + + // Batched-GEMM + if (is_batched_gemm) + { + b_stride_w = b_stride_z; + } const int c_stride_z = N * M; const int c_stride_w = N * M * D; -#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__)) +#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__)) #pragma omp parallel for collapse(2) #endif /* _OPENMP */ - for(int w = 0; w < W; ++w) + for (int w = 0; w < W; ++w) { - for(int depth = 0; depth < D; ++depth) + for (int depth = 0; depth < D; ++depth) { const int base_addr_a = depth * a_stride_z + w * a_stride_w; const int base_addr_b = depth * b_stride_z + w * b_stride_w; const int base_addr_c = depth * c_stride_z + w * c_stride_w; - for(int row = 0; row < M; ++row) + for (int row = 0; row < M; ++row) { - for(int col = 0; col < N; ++col) + for (int col = 0; col < N; ++col) { T acc(0); - for(int k = 0; k < K; ++k) + for (int k = 0; k < K; ++k) { acc += a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N]; } @@ -89,11 +108,12 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S } template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> -SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) +SimpleTensor<T> gemm_mixed_precision( + const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) { // GEMM mixed-precision combines F32 accumulators with F16 multiplications // Create reference - SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 }; + SimpleTensor<T> dst{c.shape(), c.data_type(), 1}; // Compute reference const int M = a.shape().y(); @@ -105,36 +125,54 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso const int a_stride_z = K * M; const int a_stride_w = K * M * D; - const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions - const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + const int b_stride_z = + b.shape().num_dimensions() > 2 + ? N * K + : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions + int b_stride_w = + b.shape().num_dimensions() > 3 + ? K * N * D + : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + + // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions: + // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4 + const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 && + c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1; + + // Batched-GEMM + if (is_batched_gemm) + { + b_stride_w = b_stride_z; + } const int c_stride_z = N * M; const int c_stride_w = N * M * D; -#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__)) +#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__)) #pragma omp parallel for collapse(2) #endif /* _OPENMP */ - for(int w = 0; w < W; ++w) + for (int w = 0; w < W; ++w) { - for(int depth = 0; depth < D; ++depth) + for (int depth = 0; depth < D; ++depth) { const int base_addr_a = depth * a_stride_z + w * a_stride_w; const int base_addr_b = depth * b_stride_z + w * b_stride_w; const int base_addr_c = depth * c_stride_z + w * c_stride_w; - for(int row = 0; row < M; ++row) + for (int row = 0; row < M; ++row) { - for(int col = 0; col < N; ++col) + for (int col = 0; col < N; ++col) { float acc(0); - for(int k = 0; k < K; ++k) + for (int k = 0; k < K; ++k) { acc += static_cast<float>(a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N]); } // Finalize the result: alpha * A * B + beta * C - dst[base_addr_c + col + row * N] = static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]); + dst[base_addr_c + col + row * N] = + static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]); } } } @@ -143,8 +181,21 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso return dst; } +template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> +void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst) +{ + // Compute reference + SimpleTensor<T> dst_gemm = gemm(a, b, c, alpha, beta); + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE); +} + +template SimpleTensor<bfloat16> gemm(const SimpleTensor<bfloat16> &a, const SimpleTensor<bfloat16> &b, const SimpleTensor<bfloat16> &c, float alpha, float beta); template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta); template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); + +template void gemm_accumulate(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta, SimpleTensor<float> &dst); +template void gemm_accumulate(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta, SimpleTensor<half> &dst); + template SimpleTensor<half> gemm_mixed_precision(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/GEMM.h b/tests/validation/reference/GEMM.h index 5feaeda584..1b97570122 100644 --- a/tests/validation/reference/GEMM.h +++ b/tests/validation/reference/GEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMM_H -#define ARM_COMPUTE_TEST_GEMM_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMM_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMM_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -41,8 +41,11 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta); +template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> +void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst); + } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMM_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMM_H diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 1615b51e73..30c577d850 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "GEMMLowp.h" #include "arm_compute/core/Types.h" +#include "tests/validation/reference/ArithmeticOperations.h" #include "tests/validation/reference/UtilsQuantizedAsymm.h" #include "support/ToolchainSupport.h" @@ -230,6 +231,13 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c return c; } +template <typename T_out, typename T_in, typename T_in_1> +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T_out> &dst) +{ + SimpleTensor<T_out> dst_gemm = gemmlowp_matrix_multiply_core<T_out, T_in, T_in_1>(a, b, shape_c, a_offset, b_offset); + reference::arithmetic_operation<T_out>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE); +} + // used to validate assembly kernels which don't know anything about offsets template <typename T1, typename T2, typename T3> SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c) @@ -336,6 +344,8 @@ template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<in std::vector<int32_t> result_shift, int32_t min, int32_t max); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst); +template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst); template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c); template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index 99015d71fb..6e471fdad1 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_H -#define ARM_COMPUTE_TEST_GEMMLOWP_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -38,6 +38,9 @@ namespace reference template <typename T1, typename T2, typename T3> SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template <typename T1, typename T2, typename T3> +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T1> &dst_); + template <typename T1, typename T2, typename T3 = T2> SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c); @@ -71,4 +74,4 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp index 93ac09cf95..c90c04f8cc 100644 --- a/tests/validation/reference/Gather.cpp +++ b/tests/validation/reference/Gather.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,27 +39,56 @@ namespace reference template <typename T> SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis) { - const auto *indices_ptr = static_cast<const uint32_t *>(indices.data()); const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_gather_shape(src.shape(), indices.shape(), actual_axis); SimpleTensor<T> dst(dst_shape, src.data_type()); + const auto src_ptr = static_cast<const T *>(src.data()); + const auto indices_ptr = static_cast<const uint32_t *>(indices.data()); + const auto dst_ptr = static_cast<T *>(dst.data()); + + const uint32_t index_limit = src.shape()[actual_axis]; + Window win; win.use_tensor_dimensions(dst_shape); - execute_window_loop(win, [&](const Coordinates & id) - { - Coordinates offset; - for(unsigned int dim = 0; dim < id.num_dimensions(); ++dim) + + execute_window_loop(win, [&](const Coordinates &dst_coords) { + const auto dst_addr = coords2index(dst.shape(), dst_coords); + + // Calculate the coordinates of the index value. + Coordinates idx_coords; + + for(size_t i = 0; i < indices.shape().num_dimensions(); ++i) { - if(dim == actual_axis) + idx_coords.set(i, dst_coords[i + actual_axis]); + } + + const auto index = indices_ptr[coords2index(indices.shape(), idx_coords)]; + + if(index < index_limit) + { + // Calculate the coordinates of the source data. + Coordinates src_coords; + + for(size_t i = 0; i < actual_axis; ++i) { - offset.set(dim, indices_ptr[id[dim]]); + src_coords.set(i, dst_coords[i]); } - else + + src_coords.set(actual_axis, index); + + for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i) { - offset.set(dim, id[dim]); + src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]); } + + // Copy the data. + const auto src_addr = coords2index(src.shape(), src_coords); + dst_ptr[dst_addr] = src_ptr[src_addr]; + } + else + { + dst_ptr[dst_addr] = 0; } - *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset)); }); return dst; @@ -72,4 +101,4 @@ template SimpleTensor<uint8_t> gather(const SimpleTensor<uint8_t> &src, const Si } // namespace reference } // namespace validation } // namespace test -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/tests/validation/reference/Gaussian3x3.cpp b/tests/validation/reference/Gaussian3x3.cpp deleted file mode 100644 index 2e307e8152..0000000000 --- a/tests/validation/reference/Gaussian3x3.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Helpers.h" - -#include "Gaussian3x3.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> gaussian3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - const std::array<T, 9> filter{ { 1, 2, 1, 2, 4, 2, 1, 2, 1 } }; - const float scale = 1.f / 16.f; - const uint32_t num_elements = src.num_elements(); - -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx) - { - const Coordinates id = index2coord(src.shape(), element_idx); - apply_2d_spatial_filter(id, src, dst, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value); - } - return dst; -} - -template SimpleTensor<uint8_t> gaussian3x3(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Gaussian3x3.h b/tests/validation/reference/Gaussian3x3.h deleted file mode 100644 index a433db6693..0000000000 --- a/tests/validation/reference/Gaussian3x3.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_GAUSSIAN3X3_H -#define ARM_COMPUTE_TEST_GAUSSIAN3X3_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> gaussian3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GAUSSIAN3X3_H */ diff --git a/tests/validation/reference/Gaussian5x5.cpp b/tests/validation/reference/Gaussian5x5.cpp deleted file mode 100644 index 2133d8980e..0000000000 --- a/tests/validation/reference/Gaussian5x5.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Helpers.h" - -#include "Gaussian5x5.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> gaussian5x5(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - const std::array<T, 25> filter{ { - 1, 4, 6, 4, 1, - 4, 16, 24, 16, 4, - 6, 24, 36, 24, 6, - 4, 16, 24, 16, 4, - 1, 4, 6, 4, 1 - } }; - const float scale = 1.f / 256.f; - const uint32_t num_elements = src.num_elements(); - -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx) - { - const Coordinates id = index2coord(src.shape(), element_idx); - apply_2d_spatial_filter(id, src, dst, TensorShape(5U, 5U), filter.data(), scale, border_mode, constant_border_value); - } - return dst; -} - -template SimpleTensor<uint8_t> gaussian5x5(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Gaussian5x5.h b/tests/validation/reference/Gaussian5x5.h deleted file mode 100644 index 42920bd4c0..0000000000 --- a/tests/validation/reference/Gaussian5x5.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_GAUSSIAN5X5_H -#define ARM_COMPUTE_TEST_GAUSSIAN5X5_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> gaussian5x5(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GAUSSIAN5X5_H */ diff --git a/tests/validation/reference/GaussianPyramidHalf.cpp b/tests/validation/reference/GaussianPyramidHalf.cpp deleted file mode 100644 index 5bddd853a0..0000000000 --- a/tests/validation/reference/GaussianPyramidHalf.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017-2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "GaussianPyramidHalf.h" - -#include "arm_compute/core/Helpers.h" - -#include "Gaussian5x5.h" -#include "Scale.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -std::vector<SimpleTensor<T>> gaussian_pyramid_half(const SimpleTensor<T> &src, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels) -{ - std::vector<SimpleTensor<T>> dst; - - // Level0 is equal to src - dst.push_back(src); - - for(size_t i = 1; i < num_levels; ++i) - { - // Gaussian Filter - const SimpleTensor<T> out_gaus5x5 = reference::gaussian5x5(dst[i - 1], border_mode, constant_border_value); - - // Scale down by 2 with nearest interpolation - const SimpleTensor<T> out = reference::scale(out_gaus5x5, SCALE_PYRAMID_HALF, SCALE_PYRAMID_HALF, InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value, SamplingPolicy::CENTER, - true); - - dst.push_back(out); - } - - return dst; -} - -template std::vector<SimpleTensor<uint8_t>> gaussian_pyramid_half(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/GaussianPyramidHalf.h b/tests/validation/reference/GaussianPyramidHalf.h deleted file mode 100644 index 225ef00727..0000000000 --- a/tests/validation/reference/GaussianPyramidHalf.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_H -#define ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -std::vector<SimpleTensor<T>> gaussian_pyramid_half(const SimpleTensor<T> &src, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_H */
\ No newline at end of file diff --git a/tests/validation/reference/HOGDescriptor.cpp b/tests/validation/reference/HOGDescriptor.cpp deleted file mode 100644 index e00beaf5d7..0000000000 --- a/tests/validation/reference/HOGDescriptor.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "HOGDescriptor.h" - -#include "Derivative.h" -#include "Magnitude.h" -#include "Phase.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -template <typename T> -void hog_orientation_compute(const SimpleTensor<T> &mag, const SimpleTensor<T> &phase, std::vector<T> &bins, const HOGInfo &hog_info) -{ - const Size2D &cell_size = hog_info.cell_size(); - const size_t num_bins = hog_info.num_bins(); - - float phase_scale = (PhaseType::SIGNED == hog_info.phase_type() ? num_bins / 360.0f : num_bins / 180.0f); - phase_scale *= (PhaseType::SIGNED == hog_info.phase_type() ? 360.0f / 255.0f : 1.0f); - - int row_idx = 0; - for(size_t yc = 0; yc < cell_size.height; ++yc) - { - for(size_t xc = 0; xc < cell_size.width; xc++) - { - const float mag_value = mag[(row_idx + xc)]; - const float phase_value = phase[(row_idx + xc)] * phase_scale + 0.5f; - const float w1 = phase_value - floor(phase_value); - - // The quantised phase is the histogram index [0, num_bins - 1] - // Check limit of histogram index. If hidx == num_bins, hidx = 0 - const auto hidx = static_cast<unsigned int>(phase_value) % num_bins; - - // Weighted vote between 2 bins - bins[hidx] += mag_value * (1.0f - w1); - bins[(hidx + 1) % num_bins] += mag_value * w1; - } - - row_idx += cell_size.width; - } -} - -template <typename T> -void hog_block_normalization_compute(SimpleTensor<T> &block, SimpleTensor<T> &desc, const HOGInfo &hog_info, uint32_t block_idx) -{ - const int num_bins_per_block = desc.num_channels(); - const HOGNormType norm_type = hog_info.normalization_type(); - const Coordinates id = index2coord(desc.shape(), block_idx); - - float sum = 0.0f; - - // Calculate sum - for(int i = 0; i < num_bins_per_block; ++i) - { - const float val = block[i]; - sum += (norm_type == HOGNormType::L1_NORM) ? std::fabs(val) : val * val; - } - - // Calculate normalization scale - float scale = 1.0f / (std::sqrt(sum) + num_bins_per_block * 0.1f); - - if(norm_type == HOGNormType::L2HYS_NORM) - { - // Reset sum - sum = 0.0f; - for(int i = 0; i < num_bins_per_block; ++i) - { - float val = block[i] * scale; - - // Clip scaled input_value if over l2_hyst_threshold - val = fmin(val, hog_info.l2_hyst_threshold()); - sum += val * val; - block[i] = val; - } - - // We use the same constants of OpenCV - scale = 1.0f / (std::sqrt(sum) + 1e-3f); - } - - for(int i = 0; i < num_bins_per_block; ++i) - { - block[i] *= scale; - reinterpret_cast<float *>(desc(id))[i] = block[i]; - } -} -} // namespace - -template <typename T, typename U, typename V> -void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &phase, SimpleTensor<V> &hog_space, const HOGInfo &hog_info) -{ - const Size2D &cell_size = hog_info.cell_size(); - - const size_t num_bins = hog_info.num_bins(); - const size_t shape_width = hog_space.shape().x() * hog_info.cell_size().width; - const size_t shape_height = hog_space.shape().y() * hog_info.cell_size().height; - - TensorShape cell_shape(cell_size.width, cell_size.height); - - SimpleTensor<V> mag_cell(cell_shape, DataType::F32); - SimpleTensor<V> phase_cell(cell_shape, DataType::F32); - - int cell_idx = 0; - int y_offset = 0; - - // Traverse shape - for(auto sy = cell_size.height; sy <= shape_height; sy += cell_size.height) - { - int x_offset = 0; - for(auto sx = cell_size.width; sx <= shape_width; sx += cell_size.width) - { - int row_idx = 0; - int elem_idx = 0; - - // Traverse cell - for(auto y = 0u; y < cell_size.height; ++y) - { - for(auto x = 0u; x < cell_size.width; ++x) - { - int shape_idx = x + row_idx + x_offset + y_offset; - mag_cell[elem_idx] = mag[shape_idx]; - phase_cell[elem_idx] = phase[shape_idx]; - elem_idx++; - } - - row_idx += shape_width; - } - - // Partition magnitude values into bins based on phase values - std::vector<V> bins(num_bins); - hog_orientation_compute(mag_cell, phase_cell, bins, hog_info); - - for(size_t i = 0; i < num_bins; ++i) - { - hog_space[cell_idx * num_bins + i] = bins[i]; - } - - x_offset += cell_size.width; - cell_idx++; - } - - y_offset += (cell_size.height * shape_width); - } -} - -template <typename T> -void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_space, const HOGInfo &hog_info) -{ - const Size2D cells_per_block = hog_info.num_cells_per_block(); - const Size2D cells_per_block_stride = hog_info.num_cells_per_block_stride(); - const Size2D &block_size = hog_info.block_size(); - const Size2D &block_stride = hog_info.block_stride(); - const size_t num_bins = hog_info.num_bins(); - - const size_t shape_width = hog_space.shape().x() * hog_info.cell_size().width; - const size_t shape_height = hog_space.shape().y() * hog_info.cell_size().height; - const size_t num_bins_per_block_x = cells_per_block.width * num_bins; - - // Tensor representing single block - SimpleTensor<T> block(TensorShape{ 1u, 1u }, DataType::F32, cells_per_block.area() * num_bins); - - uint32_t block_idx = 0; - int block_y_offset = 0; - - // Traverse shape - for(auto sy = block_size.height; sy <= shape_height; sy += block_stride.height) - { - int block_x_offset = 0; - for(auto sx = block_size.width; sx <= shape_width; sx += block_stride.width) - { - int cell_y_offset = 0; - int elem_idx = 0; - - // Traverse block - for(auto y = 0u; y < cells_per_block.height; ++y) - { - for(auto x = 0u; x < num_bins_per_block_x; ++x) - { - int idx = x + cell_y_offset + block_x_offset + block_y_offset; - block[elem_idx] = hog_space[idx]; - elem_idx++; - } - - cell_y_offset += hog_space.shape().x() * num_bins; - } - - // Normalize block and write to descriptor - hog_block_normalization_compute(block, desc, hog_info, block_idx); - - block_x_offset += cells_per_block_stride.width * num_bins; - block_idx++; - } - - block_y_offset += cells_per_block_stride.height * num_bins * hog_space.shape().x(); - } -} - -template <typename T, typename U> -SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info) -{ - SimpleTensor<int16_t> grad_x; - SimpleTensor<int16_t> grad_y; - - // Create tensor info for HOG descriptor - TensorInfo desc_info(hog_info, src.shape().x(), src.shape().y()); - SimpleTensor<T> desc(desc_info.tensor_shape(), DataType::F32, desc_info.num_channels()); - - // Create HOG space tensor (num_cells_x, num_cells_y) - TensorShape hog_space_shape(src.shape().x() / hog_info.cell_size().width, - src.shape().y() / hog_info.cell_size().height); - - // For each cell a histogram with a num_bins is created - TensorInfo info_hog_space(hog_space_shape, hog_info.num_bins(), DataType::F32); - SimpleTensor<T> hog_space(info_hog_space.tensor_shape(), DataType::F32, info_hog_space.num_channels()); - - // Calculate derivative - std::tie(grad_x, grad_y) = derivative<int16_t>(src, border_mode, constant_border_value, GradientDimension::GRAD_XY); - - // For each cell create histogram based on magnitude and phase - hog_orientation_binning(magnitude(grad_x, grad_y, MagnitudeType::L2NORM), - phase(grad_x, grad_y, hog_info.phase_type()), - hog_space, - hog_info); - - // Normalize histograms based on block size - hog_block_normalization(desc, hog_space, hog_info); - - return desc; -} - -template void hog_orientation_binning(const SimpleTensor<int16_t> &mag, const SimpleTensor<uint8_t> &phase, SimpleTensor<float> &hog_space, const HOGInfo &hog_info); -template void hog_block_normalization(SimpleTensor<float> &desc, const SimpleTensor<float> &hog_space, const HOGInfo &hog_info); -template SimpleTensor<float> hog_descriptor(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, const HOGInfo &hog_info); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/HOGDetector.cpp b/tests/validation/reference/HOGDetector.cpp deleted file mode 100644 index 798c3fc142..0000000000 --- a/tests/validation/reference/HOGDetector.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "HOGDetector.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -/** Computes the number of detection windows to iterate over in the feature vector. */ -Size2D num_detection_windows(const TensorShape &shape, const Size2D &window_step, const HOGInfo &hog_info) -{ - const size_t num_block_strides_width = hog_info.detection_window_size().width / hog_info.block_stride().width; - const size_t num_block_strides_height = hog_info.detection_window_size().height / hog_info.block_stride().height; - - return Size2D{ floor_to_multiple(shape.x() - num_block_strides_width, window_step.width) + window_step.width, - floor_to_multiple(shape.y() - num_block_strides_height, window_step.height) + window_step.height }; -} -} // namespace - -template <typename T> -std::vector<DetectionWindow> hog_detector(const SimpleTensor<T> &src, const std::vector<T> &descriptor, unsigned int max_num_detection_windows, - const HOGInfo &hog_info, const Size2D &detection_window_stride, float threshold, uint16_t idx_class) -{ - ARM_COMPUTE_ERROR_ON_MSG((detection_window_stride.width % hog_info.block_stride().width != 0), - "Detection window stride width must be multiple of block stride width"); - ARM_COMPUTE_ERROR_ON_MSG((detection_window_stride.height % hog_info.block_stride().height != 0), - "Detection window stride height must be multiple of block stride height"); - - // Create vector for identifying each detection window - std::vector<DetectionWindow> windows; - - // Calculate detection window step - const Size2D window_step(detection_window_stride.width / hog_info.block_stride().width, - detection_window_stride.height / hog_info.block_stride().height); - - // Calculate number of detection windows - const Size2D num_windows = num_detection_windows(src.shape(), window_step, hog_info); - - // Calculate detection window and row offsets in feature vector - const size_t src_offset_x = window_step.width * hog_info.num_bins() * hog_info.num_cells_per_block().area(); - const size_t src_offset_y = window_step.height * hog_info.num_bins() * hog_info.num_cells_per_block().area() * src.shape().x(); - const size_t src_offset_row = src.num_channels() * src.shape().x(); - - // Calculate detection window attributes - const Size2D num_block_positions_per_detection_window = hog_info.num_block_positions_per_image(hog_info.detection_window_size()); - const unsigned int num_bins_per_descriptor_x = num_block_positions_per_detection_window.width * src.num_channels(); - const unsigned int num_blocks_per_descriptor_y = num_block_positions_per_detection_window.height; - - ARM_COMPUTE_ERROR_ON((num_bins_per_descriptor_x * num_blocks_per_descriptor_y + 1) != hog_info.descriptor_size()); - - size_t win_id = 0; - - // Traverse feature vector in detection window steps - for(auto win_y = 0u, offset_y = 0u; win_y < num_windows.height; win_y += window_step.height, offset_y += src_offset_y) - { - for(auto win_x = 0u, offset_x = 0u; win_x < num_windows.width; win_x += window_step.width, offset_x += src_offset_x) - { - // Reset the score - float score = 0.0f; - - // Traverse detection window - for(auto y = 0u, offset_row = 0u; y < num_blocks_per_descriptor_y; ++y, offset_row += src_offset_row) - { - const int bin_offset = y * num_bins_per_descriptor_x; - - for(auto x = 0u; x < num_bins_per_descriptor_x; ++x) - { - // Compute Linear SVM - const float a = src[x + offset_x + offset_y + offset_row]; - const float b = descriptor[x + bin_offset]; - score += a * b; - } - } - - // Add the bias. The bias is located at the position (descriptor_size() - 1) - score += descriptor[num_bins_per_descriptor_x * num_blocks_per_descriptor_y]; - - if(score > threshold) - { - DetectionWindow window; - - if(win_id++ < max_num_detection_windows) - { - window.x = win_x * hog_info.block_stride().width; - window.y = win_y * hog_info.block_stride().height; - window.width = hog_info.detection_window_size().width; - window.height = hog_info.detection_window_size().height; - window.idx_class = idx_class; - window.score = score; - - windows.push_back(window); - } - } - } - } - - return windows; -} - -template std::vector<DetectionWindow> hog_detector(const SimpleTensor<float> &src, const std::vector<float> &descriptor, unsigned int max_num_detection_windows, - const HOGInfo &hog_info, const Size2D &detection_window_stride, float threshold, uint16_t idx_class); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/HOGMultiDetection.cpp b/tests/validation/reference/HOGMultiDetection.cpp deleted file mode 100644 index 50d846c0be..0000000000 --- a/tests/validation/reference/HOGMultiDetection.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2017-2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "HOGMultiDetection.h" - -#include "Derivative.h" -#include "HOGDescriptor.h" -#include "HOGDetector.h" -#include "Magnitude.h" -#include "Phase.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -void validate_models(const std::vector<HOGInfo> &models) -{ - ARM_COMPUTE_ERROR_ON(0 == models.size()); - - for(size_t i = 1; i < models.size(); ++i) - { - ARM_COMPUTE_ERROR_ON_MSG(models[0].phase_type() != models[i].phase_type(), - "All HOG parameters must have the same phase type"); - - ARM_COMPUTE_ERROR_ON_MSG(models[0].normalization_type() != models[i].normalization_type(), - "All HOG parameters must have the same normalization_type"); - - ARM_COMPUTE_ERROR_ON_MSG((models[0].l2_hyst_threshold() != models[i].l2_hyst_threshold()) && (models[0].normalization_type() == arm_compute::HOGNormType::L2HYS_NORM), - "All HOG parameters must have the same l2 hysteresis threshold if you use L2 hysteresis normalization type"); - } -} -} // namespace - -void detection_windows_non_maxima_suppression(std::vector<DetectionWindow> &multi_windows, float min_distance) -{ - const size_t num_candidates = multi_windows.size(); - size_t num_detections = 0; - - // Sort by idx_class first and by score second - std::sort(multi_windows.begin(), multi_windows.end(), [](const DetectionWindow & lhs, const DetectionWindow & rhs) - { - if(lhs.idx_class < rhs.idx_class) - { - return true; - } - if(rhs.idx_class < lhs.idx_class) - { - return false; - } - - // idx_classes are equal so compare by score - if(lhs.score > rhs.score) - { - return true; - } - if(rhs.score > lhs.score) - { - return false; - } - - return false; - }); - - const float min_distance_pow2 = min_distance * min_distance; - - // Euclidean distance - for(size_t i = 0; i < num_candidates; ++i) - { - if(0.0f != multi_windows.at(i).score) - { - DetectionWindow cur; - cur.x = multi_windows.at(i).x; - cur.y = multi_windows.at(i).y; - cur.width = multi_windows.at(i).width; - cur.height = multi_windows.at(i).height; - cur.idx_class = multi_windows.at(i).idx_class; - cur.score = multi_windows.at(i).score; - - // Store window - multi_windows.at(num_detections) = cur; - ++num_detections; - - const float xc = cur.x + cur.width * 0.5f; - const float yc = cur.y + cur.height * 0.5f; - - for(size_t k = i + 1; k < (num_candidates) && (cur.idx_class == multi_windows.at(k).idx_class); ++k) - { - const float xn = multi_windows.at(k).x + multi_windows.at(k).width * 0.5f; - const float yn = multi_windows.at(k).y + multi_windows.at(k).height * 0.5f; - - const float dx = std::fabs(xn - xc); - const float dy = std::fabs(yn - yc); - - if(dx < min_distance && dy < min_distance) - { - const float d = dx * dx + dy * dy; - - if(d < min_distance_pow2) - { - // Invalidate detection window - multi_windows.at(k).score = 0.0f; - } - } - } - } - } - - multi_windows.resize(num_detections); -} - -template <typename T> -std::vector<DetectionWindow> hog_multi_detection(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value, - const std::vector<HOGInfo> &models, std::vector<std::vector<float>> descriptors, - unsigned int max_num_detection_windows, float threshold, bool non_maxima_suppression, float min_distance) -{ - ARM_COMPUTE_ERROR_ON(descriptors.size() != models.size()); - validate_models(models); - - const size_t width = src.shape().x(); - const size_t height = src.shape().y(); - const size_t num_models = models.size(); - - // Initialize previous values - size_t prev_num_bins = models[0].num_bins(); - Size2D prev_cell_size = models[0].cell_size(); - Size2D prev_block_size = models[0].block_size(); - Size2D prev_block_stride = models[0].block_stride(); - - std::vector<size_t> input_orient_bin; - std::vector<size_t> input_hog_detect; - std::vector<std::pair<size_t, size_t>> input_block_norm; - - input_orient_bin.push_back(0); - input_hog_detect.push_back(0); - input_block_norm.emplace_back(0, 0); - - // Iterate through the number of models and check if orientation binning - // and block normalization steps can be skipped - for(size_t i = 1; i < num_models; ++i) - { - size_t cur_num_bins = models[i].num_bins(); - Size2D cur_cell_size = models[i].cell_size(); - Size2D cur_block_size = models[i].block_size(); - Size2D cur_block_stride = models[i].block_stride(); - - // Check if binning and normalization steps are required - if((cur_num_bins != prev_num_bins) || (cur_cell_size.width != prev_cell_size.width) || (cur_cell_size.height != prev_cell_size.height)) - { - prev_num_bins = cur_num_bins; - prev_cell_size = cur_cell_size; - prev_block_size = cur_block_size; - prev_block_stride = cur_block_stride; - - // Compute orientation binning and block normalization. Update input to process - input_orient_bin.push_back(i); - input_block_norm.emplace_back(i, input_orient_bin.size() - 1); - } - else if((cur_block_size.width != prev_block_size.width) || (cur_block_size.height != prev_block_size.height) || (cur_block_stride.width != prev_block_stride.width) - || (cur_block_stride.height != prev_block_stride.height)) - { - prev_block_size = cur_block_size; - prev_block_stride = cur_block_stride; - - // Compute block normalization. Update input to process - input_block_norm.emplace_back(i, input_orient_bin.size() - 1); - } - - // Update input to process for hog detector - input_hog_detect.push_back(input_block_norm.size() - 1); - } - - size_t num_orient_bin = input_orient_bin.size(); - size_t num_block_norm = input_block_norm.size(); - size_t num_hog_detect = input_hog_detect.size(); - - std::vector<SimpleTensor<float>> hog_spaces(num_orient_bin); - std::vector<SimpleTensor<float>> hog_norm_spaces(num_block_norm); - - // Calculate derivative - SimpleTensor<int16_t> grad_x; - SimpleTensor<int16_t> grad_y; - std::tie(grad_x, grad_y) = derivative<int16_t>(src, border_mode, constant_border_value, GradientDimension::GRAD_XY); - - // Calculate magnitude and phase - SimpleTensor<int16_t> _mag = magnitude(grad_x, grad_y, MagnitudeType::L2NORM); - SimpleTensor<uint8_t> _phase = phase(grad_x, grad_y, models[0].phase_type()); - - // Calculate Tensors for the HOG space and orientation binning - for(size_t i = 0; i < num_orient_bin; ++i) - { - const size_t idx_multi_hog = input_orient_bin[i]; - - const size_t num_bins = models[idx_multi_hog].num_bins(); - const size_t num_cells_x = width / models[idx_multi_hog].cell_size().width; - const size_t num_cells_y = height / models[idx_multi_hog].cell_size().height; - - // TensorShape of hog space - TensorShape hog_space_shape(num_cells_x, num_cells_y); - - // Initialise HOG space - TensorInfo info_hog_space(hog_space_shape, num_bins, DataType::F32); - hog_spaces.at(i) = SimpleTensor<float>(info_hog_space.tensor_shape(), DataType::F32, info_hog_space.num_channels()); - - // For each cell create histogram based on magnitude and phase - hog_orientation_binning(_mag, _phase, hog_spaces[i], models[idx_multi_hog]); - } - - // Calculate Tensors for the normalized HOG space and block normalization - for(size_t i = 0; i < num_block_norm; ++i) - { - const size_t idx_multi_hog = input_block_norm[i].first; - const size_t idx_orient_bin = input_block_norm[i].second; - - // Create tensor info for HOG descriptor - TensorInfo tensor_info(models[idx_multi_hog], src.shape().x(), src.shape().y()); - hog_norm_spaces.at(i) = SimpleTensor<float>(tensor_info.tensor_shape(), DataType::F32, tensor_info.num_channels()); - - // Normalize histograms based on block size - hog_block_normalization(hog_norm_spaces[i], hog_spaces[idx_orient_bin], models[idx_multi_hog]); - } - - std::vector<DetectionWindow> multi_windows; - - // Calculate Detection Windows for HOG detector - for(size_t i = 0; i < num_hog_detect; ++i) - { - const size_t idx_block_norm = input_hog_detect[i]; - - // NOTE: Detection window stride fixed to block stride - const Size2D detection_window_stride = models[i].block_stride(); - - std::vector<DetectionWindow> windows = hog_detector(hog_norm_spaces[idx_block_norm], descriptors[i], - max_num_detection_windows, models[i], detection_window_stride, threshold, i); - - multi_windows.insert(multi_windows.end(), windows.begin(), windows.end()); - } - - // Suppress Non-maxima detection windows - if(non_maxima_suppression) - { - detection_windows_non_maxima_suppression(multi_windows, min_distance); - } - - return multi_windows; -} - -template std::vector<DetectionWindow> hog_multi_detection(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, - const std::vector<HOGInfo> &models, std::vector<std::vector<float>> descriptors, - unsigned int max_num_detection_windows, float threshold, bool non_maxima_suppression, float min_distance); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/HarrisCornerDetector.cpp b/tests/validation/reference/HarrisCornerDetector.cpp deleted file mode 100644 index 6c46b3de5d..0000000000 --- a/tests/validation/reference/HarrisCornerDetector.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "HarrisCornerDetector.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" -#include "tests/validation/reference/NonMaximaSuppression.h" -#include "tests/validation/reference/Sobel.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -template <typename T> -std::tuple<SimpleTensor<T>, SimpleTensor<T>, float> compute_sobel(const SimpleTensor<uint8_t> &src, int gradient_size, int block_size, BorderMode border_mode, uint8_t constant_border_value) -{ - SimpleTensor<T> grad_x; - SimpleTensor<T> grad_y; - float norm_factor = 0.f; - - std::tie(grad_x, grad_y) = sobel<T>(src, gradient_size, border_mode, constant_border_value, GradientDimension::GRAD_XY); - - switch(gradient_size) - { - case 3: - norm_factor = 1.f / (4 * 255 * block_size); - break; - case 5: - norm_factor = 1.f / (16 * 255 * block_size); - break; - case 7: - norm_factor = 1.f / (64 * 255 * block_size); - break; - default: - ARM_COMPUTE_ERROR("Gradient size not supported."); - } - - return std::make_tuple(grad_x, grad_y, norm_factor); -} - -template <typename T, typename U> -std::vector<KeyPoint> harris_corner_detector_impl(const SimpleTensor<U> &src, float threshold, float min_dist, float sensitivity, int gradient_size, int block_size, BorderMode border_mode, - U constant_border_value) -{ - ARM_COMPUTE_ERROR_ON(block_size != 3 && block_size != 5 && block_size != 7); - - SimpleTensor<T> grad_x; - SimpleTensor<T> grad_y; - float norm_factor = 0.f; - - // Sobel - std::tie(grad_x, grad_y, norm_factor) = compute_sobel<T>(src, gradient_size, block_size, border_mode, constant_border_value); - - SimpleTensor<float> scores(src.shape(), DataType::F32); - ValidRegion scores_region = shape_to_valid_region(scores.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(gradient_size / 2 + block_size / 2)); - - // Calculate scores - for(int i = 0; i < scores.num_elements(); ++i) - { - Coordinates src_coord = index2coord(src.shape(), i); - Coordinates block_top_left{ src_coord.x() - block_size / 2, src_coord.y() - block_size / 2 }; - Coordinates block_bottom_right{ src_coord.x() + block_size / 2, src_coord.y() + block_size / 2 }; - - if(!is_in_valid_region(scores_region, src_coord)) - { - scores[i] = 0.f; - continue; - } - - float Gx2 = 0.f; - float Gy2 = 0.f; - float Gxy = 0.f; - - // Calculate Gx^2, Gy^2 and Gxy within the given window - for(int y = block_top_left.y(); y <= block_bottom_right.y(); ++y) - { - for(int x = block_top_left.x(); x <= block_bottom_right.x(); ++x) - { - Coordinates block_coord(x, y); - - const float norm_x = tensor_elem_at(grad_x, block_coord, border_mode, static_cast<T>(constant_border_value)) * norm_factor; - const float norm_y = tensor_elem_at(grad_y, block_coord, border_mode, static_cast<T>(constant_border_value)) * norm_factor; - - Gx2 += std::pow(norm_x, 2); - Gy2 += std::pow(norm_y, 2); - Gxy += norm_x * norm_y; - } - } - - const float trace2 = std::pow(Gx2 + Gy2, 2); - const float det = Gx2 * Gy2 - std::pow(Gxy, 2); - const float response = det - sensitivity * trace2; - - if(response > threshold) - { - scores[i] = response; - } - else - { - scores[i] = 0.f; - } - } - - // Suppress non-maxima candidates - SimpleTensor<float> suppressed_scores = non_maxima_suppression(scores, border_mode != BorderMode::UNDEFINED ? BorderMode::CONSTANT : BorderMode::UNDEFINED, 0.f); - ValidRegion suppressed_scores_region = shape_to_valid_region(suppressed_scores.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(gradient_size / 2 + block_size / 2 + 1)); - - // Create vector of candidate corners - std::vector<KeyPoint> corner_candidates; - - for(int i = 0; i < suppressed_scores.num_elements(); ++i) - { - Coordinates coord = index2coord(suppressed_scores.shape(), i); - - if(is_in_valid_region(suppressed_scores_region, coord) && suppressed_scores[i] != 0.f) - { - KeyPoint corner; - corner.x = coord.x(); - corner.y = coord.y(); - corner.tracking_status = 1; - corner.strength = suppressed_scores[i]; - corner.scale = 0.f; - corner.orientation = 0.f; - corner.error = 0.f; - - corner_candidates.emplace_back(corner); - } - } - - // Sort descending by strength - std::sort(corner_candidates.begin(), corner_candidates.end(), [](const KeyPoint & a, const KeyPoint & b) - { - return a.strength > b.strength; - }); - - std::vector<KeyPoint> corners; - corners.reserve(corner_candidates.size()); - - // Only add corner if there is no stronger within min_dist - for(const KeyPoint &point : corner_candidates) - { - const auto strongest = std::find_if(corners.begin(), corners.end(), [&](const KeyPoint & other) - { - return std::sqrt((std::pow(point.x - other.x, 2) + std::pow(point.y - other.y, 2))) < min_dist; - }); - - if(strongest == corners.end()) - { - corners.emplace_back(point); - } - } - - corners.shrink_to_fit(); - - return corners; -} -} // namespace - -template <typename T> -std::vector<KeyPoint> harris_corner_detector(const SimpleTensor<T> &src, float threshold, float min_dist, float sensitivity, int gradient_size, int block_size, BorderMode border_mode, - T constant_border_value) -{ - if(gradient_size < 7) - { - return harris_corner_detector_impl<int16_t>(src, threshold, min_dist, sensitivity, gradient_size, block_size, border_mode, constant_border_value); - } - else - { - return harris_corner_detector_impl<int32_t>(src, threshold, min_dist, sensitivity, gradient_size, block_size, border_mode, constant_border_value); - } -} - -template std::vector<KeyPoint> harris_corner_detector(const SimpleTensor<uint8_t> &src, float threshold, float min_dist, float sensitivity, int gradient_size, int block_size, BorderMode border_mode, - uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Histogram.cpp b/tests/validation/reference/Histogram.cpp deleted file mode 100644 index f9c77108e1..0000000000 --- a/tests/validation/reference/Histogram.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Histogram.h" - -#include "Utils.h" -#include "arm_compute/core/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint32_t> histogram(const SimpleTensor<T> &src, size_t num_bins, int32_t offset, uint32_t range) -{ - SimpleTensor<uint32_t> dst(TensorShape(num_bins), DataType::U32); - - // Clear the distribution - for(size_t element_idx = 0; element_idx < num_bins; ++element_idx) - { - dst[element_idx] = 0; - } - - // Create the histogram - for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx) - { - if((offset <= src[element_idx]) && (src[element_idx] < (offset + range))) - { - const int index = (src[element_idx] - offset) * num_bins / range; - dst[index]++; - } - } - - return dst; -} - -template SimpleTensor<uint32_t> histogram(const SimpleTensor<uint8_t> &src, size_t num_bins, int32_t offset, uint32_t range); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Histogram.h b/tests/validation/reference/Histogram.h deleted file mode 100644 index 5f6c7d27a2..0000000000 --- a/tests/validation/reference/Histogram.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_HISTOGRAM_H -#define ARM_COMPUTE_TEST_HISTOGRAM_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint32_t> histogram(const SimpleTensor<T> &src, size_t num_bins, int32_t offset, uint32_t range); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_HISTOGRAM_H */ diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp new file mode 100644 index 0000000000..7500560c91 --- /dev/null +++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "IndirectConv2dAddressPrecalculation.h" + +#include "arm_compute/core/Types.h" + +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_dst, const TensorShape &shape_dst, + const PadStrideInfo &conv_info) +{ + SimpleTensor<int32_t> out{ shape_dst, DataType::S32 }; + + constexpr unsigned int width_idx = 1; + constexpr unsigned int heigh_idx = 2; + + const int src_conv_width = static_cast<int32_t>(shape_conv_src[width_idx]); // NHWC + const int src_conv_height = static_cast<int32_t>(shape_conv_src[heigh_idx]); // NHWC + const int dst_conv_width = static_cast<int32_t>(shape_conv_dst[width_idx]); // NHWC + const int wei_conv_width = static_cast<int32_t>(shape_conv_wei[width_idx]); // NHWC + const int wei_conv_height = static_cast<int32_t>(shape_conv_wei[heigh_idx]); // NHWC + const int dst_width = static_cast<int32_t>(shape_dst[0]); + const int dst_height = static_cast<int32_t>(shape_dst[1]); + const int dst_batch = static_cast<int32_t>(shape_dst[2]); + const int ks = wei_conv_width * wei_conv_height; + const int stride_x = static_cast<int32_t>(conv_info.stride().first); + const int stride_y = static_cast<int32_t>(conv_info.stride().second); + const int pad_left = static_cast<int32_t>(conv_info.pad_left()); + const int pad_top = static_cast<int32_t>(conv_info.pad_top()); + + const int m0 = dst_width / ks; + + for(int z = 0; z < dst_batch; ++z) + { + for(int y = 0; y < dst_height; ++y) + { + const int mout = y * m0; + for(int ki = 0; ki < ks; ++ki) + { + const int xk = ki % wei_conv_width; + const int yk = ki / wei_conv_width; + for(int mi = 0; mi < m0; ++mi) + { + int xi = ((mout + mi) % dst_conv_width) * stride_x; + int yi = ((mout + mi) / dst_conv_width) * stride_y; + xi -= pad_left; + yi -= pad_top; + const int x_s = xi + xk; + const int y_s = yi + yk; + int my = x_s + y_s * src_conv_width; + my = my + z * src_conv_width * src_conv_height; + my = x_s >= 0 ? my : -1; + my = x_s < src_conv_width ? my : -1; + my = y_s >= 0 ? my : -1; + my = y_s < src_conv_height ? my : -1; + + const unsigned int addr_out = mi + ki * m0 + y * (dst_width) + z * (dst_width * dst_height); + out[addr_out] = my; + } + } + } + } + + return out; +} +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/reference/YOLOLayer.h b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h index 33cf630aca..f4a90dfd9f 100644 --- a/tests/validation/reference/YOLOLayer.h +++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_YOLO_LAYER_H -#define ARM_COMPUTE_TEST_YOLO_LAYER_H +#ifndef ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H +#define ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -35,13 +35,10 @@ namespace validation { namespace reference { -template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> -SimpleTensor<T> yolo_layer(const SimpleTensor<T> &src, const ActivationLayerInfo &info, int32_t num_classes); - -template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0> -SimpleTensor<T> yolo_layer(const SimpleTensor<T> &src, const ActivationLayerInfo &info, int32_t num_classes); +SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_out, const TensorShape &shape_out, + const PadStrideInfo &conv_info); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_YOLO_LAYER_H */ +#endif /* ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H */
\ No newline at end of file diff --git a/tests/validation/reference/IntegralImage.cpp b/tests/validation/reference/IntegralImage.cpp deleted file mode 100644 index 0f6a7504fe..0000000000 --- a/tests/validation/reference/IntegralImage.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "IntegralImage.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint32_t> integral_image(const SimpleTensor<T> &src) -{ - SimpleTensor<uint32_t> dst(src.shape(), DataType::U32); - - // Length of dimensions - const size_t width = src.shape().x(); - const size_t height = src.shape().y(); - const size_t depth = src.shape().total_size_upper(2); - - const size_t image_size = width * height; - - for(size_t z = 0; z < depth; ++z) - { - size_t current_image = z * image_size; - - //First element of each image - dst[current_image] = src[current_image]; - - // First row of each image (add only pixel on the left) - for(size_t x = 1; x < width; ++x) - { - dst[current_image + x] = static_cast<uint32_t>(src[current_image + x]) + dst[current_image + x - 1]; - } - - // Subsequent rows - for(size_t y = 1; y < height; ++y) - { - size_t current_row = current_image + (width * y); - - // First element of each row (add only pixel up) - dst[current_row] = static_cast<uint32_t>(src[current_row]) + dst[current_row - width]; - - // Following row elements - for(size_t x = 1; x < width; ++x) - { - size_t current_pixel = current_row + x; - - // out = in + up(out) + left(out) - up_left(out) - dst[current_pixel] = static_cast<uint32_t>(src[current_pixel]) + dst[current_pixel - 1] - + dst[current_pixel - width] - dst[current_pixel - width - 1]; - } - } - } - - return dst; -} - -template SimpleTensor<uint32_t> integral_image(const SimpleTensor<uint8_t> &src); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/IntegralImage.h b/tests/validation/reference/IntegralImage.h deleted file mode 100644 index 2c9b96a1d6..0000000000 --- a/tests/validation/reference/IntegralImage.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_INTEGRAL_IMAGE_H -#define ARM_COMPUTE_TEST_INTEGRAL_IMAGE_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint32_t> integral_image(const SimpleTensor<T> &src); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_INTEGRAL_IMAGE_H */ diff --git a/tests/validation/reference/LaplacianPyramid.cpp b/tests/validation/reference/LaplacianPyramid.cpp deleted file mode 100644 index 904b8403b3..0000000000 --- a/tests/validation/reference/LaplacianPyramid.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "LaplacianPyramid.h" - -#include "tests/validation/reference/ArithmeticOperations.h" -#include "tests/validation/reference/DepthConvertLayer.h" -#include "tests/validation/reference/Gaussian5x5.h" -#include "tests/validation/reference/GaussianPyramidHalf.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -std::vector<SimpleTensor<U>> laplacian_pyramid(const SimpleTensor<T> &src, SimpleTensor<U> &dst, size_t num_levels, BorderMode border_mode, uint8_t constant_border_value) -{ - std::vector<SimpleTensor<T>> pyramid_conv; - std::vector<SimpleTensor<U>> pyramid_dst; - - // First, a Gaussian pyramid with SCALE_PYRAMID_HALF is created - std::vector<SimpleTensor<T>> gaussian_level_pyramid = reference::gaussian_pyramid_half(src, border_mode, constant_border_value, num_levels); - - // For each level i, the corresponding image Ii is blurred with Gaussian 5x5 - // filter, and the difference between the two images is the corresponding - // level Li of the Laplacian pyramid - for(size_t i = 0; i < num_levels; ++i) - { - const SimpleTensor<T> level_filtered = reference::gaussian5x5(gaussian_level_pyramid[i], border_mode, constant_border_value); - pyramid_conv.push_back(level_filtered); - - const SimpleTensor<U> level_filtered_converted = depth_convert<T, U>(level_filtered, DataType::S16, ConvertPolicy::WRAP, 0); - const SimpleTensor<U> gaussian_level_converted = depth_convert<T, U>(gaussian_level_pyramid[i], DataType::S16, ConvertPolicy::WRAP, 0); - - const SimpleTensor<U> level_sub = reference::arithmetic_operation<U>(reference::ArithmeticOperation::SUB, gaussian_level_converted, level_filtered_converted, dst.data_type(), ConvertPolicy::WRAP); - pyramid_dst.push_back(level_sub); - } - - // Return the lowest resolution image and the pyramid - dst = depth_convert<T, U>(pyramid_conv[num_levels - 1], DataType::S16, ConvertPolicy::WRAP, 0); - - return pyramid_dst; -} - -template std::vector<SimpleTensor<int16_t>> laplacian_pyramid(const SimpleTensor<uint8_t> &src, SimpleTensor<int16_t> &dst, size_t num_levels, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/LaplacianPyramid.h b/tests/validation/reference/LaplacianPyramid.h deleted file mode 100644 index 0596b81648..0000000000 --- a/tests/validation/reference/LaplacianPyramid.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_LAPLACIAN_PYRAMID_H -#define ARM_COMPUTE_TEST_LAPLACIAN_PYRAMID_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -std::vector<SimpleTensor<U>> laplacian_pyramid(const SimpleTensor<T> &src, SimpleTensor<U> &dst, size_t num_levels, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_LAPLACIAN_PYRAMID_H */ diff --git a/tests/validation/reference/LaplacianReconstruct.cpp b/tests/validation/reference/LaplacianReconstruct.cpp deleted file mode 100644 index 2a0fcc2a65..0000000000 --- a/tests/validation/reference/LaplacianReconstruct.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "LaplacianReconstruct.h" - -#include "arm_compute/core/Types.h" -#include "tests/validation/reference/ArithmeticOperations.h" -#include "tests/validation/reference/DepthConvertLayer.h" -#include "tests/validation/reference/Scale.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -SimpleTensor<U> laplacian_reconstruct(const std::vector<SimpleTensor<T>> &pyramid, const SimpleTensor<T> &low_res, BorderMode border_mode, T constant_border_value) -{ - std::vector<SimpleTensor<T>> tmp_pyramid(pyramid); - - const size_t last_level = pyramid.size() - 1; - const DataType data_type = low_res.data_type(); - - // input + L(n-1) - tmp_pyramid[last_level] = reference::arithmetic_operation(reference::ArithmeticOperation::ADD, low_res, pyramid[last_level], data_type, ConvertPolicy::SATURATE); - - // Scale levels n-1 to 1, and add levels n-2 to 0 - for(size_t i = last_level; i-- > 0;) - { - const float scale_x = static_cast<float>(tmp_pyramid[i].shape().x()) / tmp_pyramid[i + 1].shape().x(); - const float scale_y = static_cast<float>(tmp_pyramid[i].shape().y()) / tmp_pyramid[i + 1].shape().y(); - - tmp_pyramid[i] = reference::scale(tmp_pyramid[i + 1], scale_x, scale_y, InterpolationPolicy::NEAREST_NEIGHBOR, - border_mode, constant_border_value, SamplingPolicy::CENTER, false); - - tmp_pyramid[i] = reference::arithmetic_operation(reference::ArithmeticOperation::ADD, tmp_pyramid[i], pyramid[i], data_type, ConvertPolicy::SATURATE); - } - - return reference::depth_convert<T, U>(tmp_pyramid[0], DataType::U8, ConvertPolicy::SATURATE, 0); -} - -template SimpleTensor<uint8_t> laplacian_reconstruct(const std::vector<SimpleTensor<int16_t>> &pyramid, const SimpleTensor<int16_t> &low_res, BorderMode border_mode, int16_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/LaplacianReconstruct.h b/tests/validation/reference/LaplacianReconstruct.h deleted file mode 100644 index 8820c92983..0000000000 --- a/tests/validation/reference/LaplacianReconstruct.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_LAPLACIAN_RECONSTRUCT_H -#define ARM_COMPUTE_TEST_LAPLACIAN_RECONSTRUCT_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -SimpleTensor<U> laplacian_reconstruct(const std::vector<SimpleTensor<T>> &pyramid, const SimpleTensor<T> &low_res, BorderMode border_mode, T constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_LAPLACIAN_RECONSTRUCT_H */ diff --git a/tests/validation/reference/Magnitude.cpp b/tests/validation/reference/Magnitude.cpp deleted file mode 100644 index 390aaa5d48..0000000000 --- a/tests/validation/reference/Magnitude.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Magnitude.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> magnitude(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, MagnitudeType magnitude_type) -{ - SimpleTensor<T> mag(gx.shape(), gx.data_type()); - - using intermediate_type = typename common_promoted_unsigned_type<T>::intermediate_type; - - for(int i = 0; i < gx.num_elements(); ++i) - { - double val = 0.f; - - if(magnitude_type == MagnitudeType::L1NORM) - { - val = static_cast<intermediate_type>(std::abs(gx[i])) + static_cast<intermediate_type>(std::abs(gy[i])); - } - else // MagnitudeType::L2NORM - { - // Note: kernel saturates to uint32_t instead of intermediate_type for S32 format - auto sum = static_cast<uint32_t>(gx[i] * gx[i]) + static_cast<uint32_t>(gy[i] * gy[i]); - val = std::sqrt(sum) + 0.5f; - } - - mag[i] = saturate_cast<T>(val); - } - - return mag; -} - -template SimpleTensor<int16_t> magnitude(const SimpleTensor<int16_t> &gx, const SimpleTensor<int16_t> &gy, MagnitudeType magnitude_type); -template SimpleTensor<int32_t> magnitude(const SimpleTensor<int32_t> &gx, const SimpleTensor<int32_t> &gy, MagnitudeType magnitude_type); -template SimpleTensor<half_float::half> magnitude(const SimpleTensor<half_float::half> &gx, const SimpleTensor<half_float::half> &gy, MagnitudeType magnitude_type); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Magnitude.h b/tests/validation/reference/Magnitude.h deleted file mode 100644 index 81db27de20..0000000000 --- a/tests/validation/reference/Magnitude.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_MAGNITUDE_H -#define ARM_COMPUTE_TEST_MAGNITUDE_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> magnitude(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, MagnitudeType magnitude_type); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_MAGNITUDE_H */ diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp index 0a23fa19bb..a7c8a784d9 100644 --- a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,6 +63,15 @@ SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float e return dst; } +template <> +SimpleTensor<uint8_t> mean_std_normalization_layer(const SimpleTensor<uint8_t> &src, float epsilon) +{ + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = mean_std_normalization_layer<float>(src_tmp, epsilon); + SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info()); + return dst; +} + template SimpleTensor<float> mean_std_normalization_layer(const SimpleTensor<float> &src, float epsilon); template SimpleTensor<half> mean_std_normalization_layer(const SimpleTensor<half> &src, float epsilon); } // namespace reference diff --git a/tests/validation/reference/Median3x3.cpp b/tests/validation/reference/Median3x3.cpp deleted file mode 100644 index 55f5f62292..0000000000 --- a/tests/validation/reference/Median3x3.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Helpers.h" - -#include "Median3x3.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -constexpr unsigned int filter_size = 3; /* Size of the kernel/filter in number of elements. */ -constexpr BorderSize border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */ -} // namespace - -template <typename T> -SimpleTensor<T> median3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - const int size_tot_filter = filter_size * filter_size; - const uint32_t num_elements = src.num_elements(); - - for(uint32_t src_idx = 0; src_idx < num_elements; ++src_idx) - { - std::array<T, size_tot_filter> filter_elems = { { 0 } }; - Coordinates id = index2coord(src.shape(), src_idx); - const int x = id.x(); - const int y = id.y(); - - for(int j = y - static_cast<int>(border_size.top), index = 0; j <= y + static_cast<int>(border_size.bottom); ++j) - { - for(int i = x - static_cast<int>(border_size.left); i <= x + static_cast<int>(border_size.right); ++i, ++index) - { - id.set(0, i); - id.set(1, j); - filter_elems[index] = tensor_elem_at(src, id, border_mode, constant_border_value); - } - } - std::sort(filter_elems.begin(), filter_elems.end()); - dst[src_idx] = filter_elems[size_tot_filter / 2]; - } - - return dst; -} - -template SimpleTensor<uint8_t> median3x3(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Median3x3.h b/tests/validation/reference/Median3x3.h deleted file mode 100644 index a10f428d0c..0000000000 --- a/tests/validation/reference/Median3x3.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_MEDIAN3X3_H -#define ARM_COMPUTE_TEST_MEDIAN3X3_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> median3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_MEDIAN3X3_H */ diff --git a/tests/validation/reference/NonLinearFilter.cpp b/tests/validation/reference/NonLinearFilter.cpp deleted file mode 100644 index ada8286927..0000000000 --- a/tests/validation/reference/NonLinearFilter.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "NonLinearFilter.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> non_linear_filter(const SimpleTensor<T> &src, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, - uint8_t constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - - ARM_COMPUTE_ERROR_ON(pattern == MatrixPattern::OTHER && mask == nullptr); - ARM_COMPUTE_UNUSED(pattern); - - using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type; - - const int sq_mask_size = mask_size * mask_size; - const int half_mask_size = mask_size / 2; - std::vector<intermediate_type> vals(sq_mask_size); - intermediate_type current_value = 0; - - const ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(half_mask_size)); - const uint32_t num_elements = src.num_elements(); - - for(uint32_t element_idx = 0, count = 0, index = 0; element_idx < num_elements; ++element_idx, count = 0, index = 0) - { - Coordinates id = index2coord(src.shape(), element_idx); - if(is_in_valid_region(valid_region, id)) - { - int idx = id.x(); - int idy = id.y(); - for(int y = idy - half_mask_size; y <= idy + half_mask_size; ++y) - { - for(int x = idx - half_mask_size; x <= idx + half_mask_size; ++x, ++index) - { - id.set(0, x); - id.set(1, y); - current_value = tensor_elem_at(src, id, border_mode, constant_border_value); - - if(mask[index] == 255) - { - vals[count] = static_cast<intermediate_type>(current_value); - ++count; - } - } - } - std::sort(vals.begin(), vals.begin() + count); - - ARM_COMPUTE_ERROR_ON(count == 0); - - switch(function) - { - case NonLinearFilterFunction::MIN: - dst[element_idx] = saturate_cast<T>(vals[0]); - break; - case NonLinearFilterFunction::MAX: - dst[element_idx] = saturate_cast<T>(vals[count - 1]); - break; - case NonLinearFilterFunction::MEDIAN: - dst[element_idx] = saturate_cast<T>(vals[count / 2]); - break; - default: - ARM_COMPUTE_ERROR("Unsupported NonLinearFilter function."); - } - } - } - - return dst; -} - -template SimpleTensor<uint8_t> non_linear_filter(const SimpleTensor<uint8_t> &src, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/NonLinearFilter.h b/tests/validation/reference/NonLinearFilter.h deleted file mode 100644 index ecf6563a15..0000000000 --- a/tests/validation/reference/NonLinearFilter.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_BITWISE_NOT_H -#define ARM_COMPUTE_TEST_BITWISE_NOT_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> non_linear_filter(const SimpleTensor<T> &src, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, - uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_BITWISE_NOT_H */ diff --git a/tests/validation/reference/OpticalFlow.cpp b/tests/validation/reference/OpticalFlow.cpp deleted file mode 100644 index 0a04214045..0000000000 --- a/tests/validation/reference/OpticalFlow.cpp +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright (c) 2018 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "OpticalFlow.h" - -#include "GaussianPyramidHalf.h" -#include "Scharr.h" -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -using KeyPointArray = std::vector<KeyPoint>; -using InternalKeyPointArray = std::vector<InternalKeyPoint>; - -// Constants used for Lucas-Kanade Algorithm -constexpr int W_BITS = 14; -constexpr float D0 = 1 << W_BITS; -constexpr float DETERMINANT_THRESHOLD = 1.0e-07f; -constexpr float EIGENVALUE_THRESHOLD = 1.0e-04f; -constexpr float FLT_SCALE = 1.0f / (1 << 20); - -// Creates an InternalKeyPointArray for tracking non-integral pixel coordinates -InternalKeyPointArray create_internal_keypoints(const KeyPointArray &keypoints) -{ - InternalKeyPointArray internal_keypoints; - - for(auto keypoint : keypoints) - { - InternalKeyPoint internal_keypoint; - - internal_keypoint.x = static_cast<float>(keypoint.x); - internal_keypoint.y = static_cast<float>(keypoint.y); - internal_keypoint.tracking_status = static_cast<bool>(keypoint.tracking_status); - - internal_keypoints.push_back(internal_keypoint); - } - - return internal_keypoints; -} - -// Scale tracked points based on Pyramid level -void scale_tracked_points(size_t level, size_t num_levels, bool use_initial_estimate, - InternalKeyPointArray &old_points_internal, InternalKeyPointArray &new_points_internal, - const KeyPointArray &old_points, const KeyPointArray &new_points_estimates) -{ - if(level == num_levels - 1) // lowest resolution - { - const float scale = std::pow(SCALE_PYRAMID_HALF, level); - - for(size_t i = 0; i < old_points.size(); ++i) - { - old_points_internal.at(i).x = old_points.at(i).x * scale; - old_points_internal.at(i).y = old_points.at(i).y * scale; - old_points_internal.at(i).tracking_status = true; - - InternalKeyPoint keypoint_to_track; - - if(use_initial_estimate) - { - keypoint_to_track.x = new_points_estimates.at(i).x * scale; - keypoint_to_track.y = new_points_estimates.at(i).y * scale; - keypoint_to_track.tracking_status = (new_points_estimates.at(i).tracking_status == 1); - } - else - { - keypoint_to_track.x = old_points_internal.at(i).x; - keypoint_to_track.y = old_points_internal.at(i).y; - keypoint_to_track.tracking_status = true; - } - - new_points_internal.at(i) = keypoint_to_track; - } - } - else - { - for(size_t i = 0; i < old_points.size(); ++i) - { - old_points_internal.at(i).x /= SCALE_PYRAMID_HALF; - old_points_internal.at(i).y /= SCALE_PYRAMID_HALF; - new_points_internal.at(i).x /= SCALE_PYRAMID_HALF; - new_points_internal.at(i).y /= SCALE_PYRAMID_HALF; - } - } -} - -bool is_invalid_keypoint(const InternalKeyPoint &keypoint, const ValidRegion &valid_region, size_t window_dimension) -{ - const int half_window = window_dimension / 2; - const int x = std::floor(keypoint.x); - const int y = std::floor(keypoint.y); - - return (x - half_window < valid_region.start(0)) || (x + half_window >= valid_region.end(0) - 1) || (y - half_window < valid_region.start(1)) || (y + half_window >= valid_region.end(1) - 1); -} - -template <typename T> -constexpr int INT_ROUND(T x, int n) -{ - return (x + (1 << (n - 1))) >> n; -} - -// Return the bilinear value at a specified coordinate with different border modes -template <typename T> -int bilinear_interpolate(const SimpleTensor<T> &in, Coordinates id, float wx, float wy, BorderMode border_mode, T constant_border_value, int scale) -{ - const int level = id.x(); - const int idy = id.y(); - - const float dx = wx; - const float dy = wy; - const float dx_1 = 1.0f - dx; - const float dy_1 = 1.0f - dy; - - const T border_value = constant_border_value; - - id.set(0, level); - id.set(1, idy); - const T tl = tensor_elem_at(in, id, border_mode, border_value); - id.set(0, level + 1); - id.set(1, idy); - const T tr = tensor_elem_at(in, id, border_mode, border_value); - id.set(0, level); - id.set(1, idy + 1); - const T bl = tensor_elem_at(in, id, border_mode, border_value); - id.set(0, level + 1); - id.set(1, idy + 1); - const T br = tensor_elem_at(in, id, border_mode, border_value); - - // weights - const int w00 = roundf(dx_1 * dy_1 * D0); - const int w01 = roundf(dx * dy_1 * D0); - const int w10 = roundf(dx_1 * dy * D0); - const int w11 = D0 - w00 - w01 - w10; - - return static_cast<int>(INT_ROUND(tl * w00 + tr * w01 + bl * w10 + br * w11, scale)); -} - -template <typename T> -std::vector<int> compute_derivative(const SimpleTensor<T> &input, const InternalKeyPoint &keypoint, - BorderMode border_mode, uint8_t constant_border_value, size_t window_dimension, int scale) -{ - std::vector<int> bilinear_values; - - const int half_window = window_dimension / 2; - - float keypoint_int_x = 0; - float keypoint_int_y = 0; - - const float wx = std::modf(keypoint.x, &keypoint_int_x); - const float wy = std::modf(keypoint.y, &keypoint_int_y); - - Coordinates tl_window(static_cast<int>(keypoint_int_x) - half_window, static_cast<int>(keypoint_int_y) - half_window); - Coordinates br_window(static_cast<int>(keypoint_int_x) + half_window, static_cast<int>(keypoint_int_y) + half_window); - - for(int y = tl_window.y(); y <= br_window.y(); ++y) - { - for(int x = tl_window.x(); x <= br_window.x(); ++x) - { - bilinear_values.push_back(bilinear_interpolate(input, Coordinates(x, y), wx, wy, border_mode, static_cast<T>(constant_border_value), scale)); - } - } - - return bilinear_values; -} - -std::tuple<float, float, float> compute_spatial_gradient_matrix(const std::vector<int> &bilinear_ix, const std::vector<int> &bilinear_iy) -{ - ARM_COMPUTE_ERROR_ON(bilinear_ix.size() != bilinear_iy.size()); - - int iA11 = 0; - int iA12 = 0; - int iA22 = 0; - - for(size_t i = 0; i < bilinear_ix.size(); ++i) - { - int ixval = bilinear_ix[i]; - int iyval = bilinear_iy[i]; - - iA11 += ixval * ixval; - iA12 += ixval * iyval; - iA22 += iyval * iyval; - } - - return std::make_tuple(iA11 * FLT_SCALE, iA12 * FLT_SCALE, iA22 * FLT_SCALE); -} - -std::tuple<double, double> compute_temporal_gradient_vector(const std::vector<int> &bilinear_it_old, - const std::vector<int> &bilinear_it_new, - const std::vector<int> &bilinear_ix, - const std::vector<int> &bilinear_iy) -{ - ARM_COMPUTE_ERROR_ON(bilinear_ix.size() != bilinear_iy.size()); - ARM_COMPUTE_ERROR_ON(bilinear_it_old.size() != bilinear_it_new.size()); - - int ib1 = 0; - int ib2 = 0; - - for(size_t i = 0; i < bilinear_ix.size(); ++i) - { - int ixval = bilinear_ix[i]; - int iyval = bilinear_iy[i]; - int ival = bilinear_it_old[i]; - int jval = bilinear_it_new[i]; - - const int diff = jval - ival; - - ib1 += diff * ixval; - ib2 += diff * iyval; - } - - const double b1 = ib1 * FLT_SCALE; - const double b2 = ib2 * FLT_SCALE; - - return std::make_tuple(b1, b2); -} -} // namespace - -template <typename T> -std::vector<KeyPoint> optical_flow(const SimpleTensor<T> &old_input, const SimpleTensor<T> &new_input, - const OpticalFlowParameters ¶ms, size_t num_levels, - const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates, - BorderMode border_mode, uint8_t constant_border_value) -{ - const int filter_size = 3; // scharr filter size - const size_t max_iterations = 1000; // fixed by kernel - const size_t window_dimension = params.window_dimension; - const size_t num_iterations = (params.termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : params.num_iterations; - - KeyPointArray new_points(old_points.size()); - - InternalKeyPointArray old_points_internal = create_internal_keypoints(old_points); - InternalKeyPointArray new_points_internal = create_internal_keypoints(new_points_estimates); - - SimpleTensor<int16_t> scharr_gx; - SimpleTensor<int16_t> scharr_gy; - - // Create pyramids - std::vector<SimpleTensor<T>> old_pyramid = gaussian_pyramid_half(old_input, border_mode, constant_border_value, num_levels); - std::vector<SimpleTensor<T>> new_pyramid = gaussian_pyramid_half(new_input, border_mode, constant_border_value, num_levels); - - // Iterate over each level of the pyramid - for(size_t idx = num_levels; idx > 0; --idx) - { - const size_t level = idx - 1; - - // Calculate scharr gradients - std::tie(scharr_gx, scharr_gy) = scharr<int16_t, T>(old_pyramid[level], filter_size, border_mode, constant_border_value, GradientDimension::GRAD_XY); - - scale_tracked_points(level, num_levels, params.use_initial_estimate, old_points_internal, new_points_internal, old_points, new_points_estimates); - - // Calculate valid region based on image dimensions of current pyramid level - const ValidRegion valid_region = shape_to_valid_region(old_pyramid[level].shape(), (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2)); - - for(size_t i = 0; i < old_points.size(); ++i) - { - InternalKeyPoint &old_keypoint = old_points_internal.at(i); - InternalKeyPoint &new_keypoint = new_points_internal.at(i); - - // Helper function for untracking keypoints when on the lowest pyramid level (high resolution) - const auto untrack_keypoint = [&](bool predicate) - { - if(predicate && (level == 0)) - { - new_keypoint.tracking_status = false; - return true; - } - return predicate; - }; - - if(!old_keypoint.tracking_status) - { - continue; - } - - // Check if tracked coordinate is outside image coordinate - if(untrack_keypoint(is_invalid_keypoint(old_keypoint, valid_region, window_dimension))) - { - continue; - } - - // Compute spatial derivative - std::vector<int> bilinear_ix = compute_derivative(scharr_gx, old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS); - std::vector<int> bilinear_iy = compute_derivative(scharr_gy, old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS); - - float A11 = 0.f; - float A12 = 0.f; - float A22 = 0.f; - std::tie(A11, A12, A22) = compute_spatial_gradient_matrix(bilinear_ix, bilinear_iy); - - // Calculate criteria for lost tracking : Matrix A is invertible - // 1. The determinant of the matrix is less than DETERMINANT_THRESHOLD - // 2. The minimum eigenvalue of the matrix is less than EIGENVALUE_THRESHOLD - const float trace_A = A11 + A22; - const float determinant = A11 * A22 - A12 * A12; - const float discriminant = (trace_A * trace_A) - 4.0f * (determinant); - const float eigenvalue_A = (trace_A - std::sqrt(discriminant)) / 2.0f; - - // Divide by window_dimension squared to reduce the floating point accummulation error - const float eigenvalue = eigenvalue_A / (window_dimension * window_dimension); - - // Check if it is a good point to track - if(untrack_keypoint(eigenvalue < EIGENVALUE_THRESHOLD || determinant < DETERMINANT_THRESHOLD)) - { - continue; - } - - float prev_delta_x = 0.f; - float prev_delta_y = 0.f; - - for(size_t j = 0; j < num_iterations; ++j) - { - // Check if tracked coordinate is outside image coordinate - if(untrack_keypoint(is_invalid_keypoint(new_keypoint, valid_region, window_dimension))) - { - break; - } - - // Compute temporal derivative - std::vector<int> bilinear_it_old = compute_derivative(old_pyramid[level], old_keypoint, border_mode, constant_border_value, window_dimension, W_BITS - 5); - std::vector<int> bilinear_it_new = compute_derivative(new_pyramid[level], new_keypoint, border_mode, constant_border_value, window_dimension, W_BITS - 5); - - double b1 = 0.f; - double b2 = 0.f; - std::tie(b1, b2) = compute_temporal_gradient_vector(bilinear_it_old, bilinear_it_new, bilinear_ix, bilinear_iy); - - // Compute motion vector -> A^-1 * -b - const float delta_x = (A12 * b2 - A22 * b1) / determinant; - const float delta_y = (A12 * b1 - A11 * b2) / determinant; - - // Update the new position - new_keypoint.x += delta_x; - new_keypoint.y += delta_y; - - const float magnitude_squared = delta_x * delta_x + delta_y * delta_y; - - // Check if termination criteria is EPSILON and if it is satisfied - if(magnitude_squared <= params.epsilon && (params.termination == Termination::TERM_CRITERIA_EPSILON || params.termination == Termination::TERM_CRITERIA_BOTH)) - { - break; - } - - // Check convergence analyzing the previous delta - if(j > 0 && (std::fabs(delta_x + prev_delta_x) < 0.01f && std::fabs(delta_y + prev_delta_y) < 0.01f)) - { - new_keypoint.x -= delta_x * SCALE_PYRAMID_HALF; - new_keypoint.y -= delta_y * SCALE_PYRAMID_HALF; - - break; - } - - prev_delta_x = delta_x; - prev_delta_y = delta_y; - } - } - } - - // Copy optical flow coordinates to output vector - for(size_t i = 0; i < old_points.size(); ++i) - { - const InternalKeyPoint &new_keypoint = new_points_internal.at(i); - - new_points.at(i).x = roundf(new_keypoint.x); - new_points.at(i).y = roundf(new_keypoint.y); - new_points.at(i).tracking_status = new_keypoint.tracking_status ? 1 : 0; - } - - return new_points; -} - -template std::vector<KeyPoint> optical_flow(const SimpleTensor<uint8_t> &old_input, const SimpleTensor<uint8_t> &new_input, - const OpticalFlowParameters ¶ms, size_t num_levels, - const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates, - BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/OpticalFlow.h b/tests/validation/reference/OpticalFlow.h deleted file mode 100644 index 1bc367ab6a..0000000000 --- a/tests/validation/reference/OpticalFlow.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_OPTICAL_FLOW_H -#define ARM_COMPUTE_TEST_OPTICAL_FLOW_H - -#include "tests/SimpleTensor.h" -#include "tests/Types.h" - -#include <vector> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -std::vector<KeyPoint> optical_flow(const SimpleTensor<T> &old_input, const SimpleTensor<T> &new_input, - const OpticalFlowParameters ¶ms, size_t num_levels, - const std::vector<KeyPoint> &old_points, const std::vector<KeyPoint> &new_points_estimates, - BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_OPTICAL_FLOW_H */ diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp index 6f122b1bf5..7aa3011d8f 100644 --- a/tests/validation/reference/Permute.cpp +++ b/tests/validation/reference/Permute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "Permute.h" #include "arm_compute/core/Types.h" + #include "tests/validation/Helpers.h" namespace arm_compute @@ -42,11 +43,11 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm) permute(dst_shape, perm); // Create reference - SimpleTensor<T> dst{ dst_shape, src.data_type(), src.num_channels(), src.quantization_info() }; + SimpleTensor<T> dst{dst_shape, src.data_type(), src.num_channels(), src.quantization_info()}; // Compute reference const uint32_t num_elements = src.num_elements(); - for(uint32_t i = 0; i < num_elements; ++i) + for (uint32_t i = 0; i < num_elements; ++i) { const Coordinates src_coords = index2coord(src.shape(), i); Coordinates dst_coords = src_coords; @@ -58,13 +59,14 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm) return dst; } -template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm); -template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm); -template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm); +template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm); +template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm); +template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm); template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm); template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm); -template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm); -template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm); +template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm); +template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm); +template SimpleTensor<bfloat16> permute(const SimpleTensor<bfloat16> &src, PermutationVector perm); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Phase.cpp b/tests/validation/reference/Phase.cpp deleted file mode 100644 index 228f73bb37..0000000000 --- a/tests/validation/reference/Phase.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Phase.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint8_t> phase(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, PhaseType phase_type) -{ - const float PI = std::atan(1) * 4; - SimpleTensor<uint8_t> phase(gx.shape(), DataType::U8); - - if(phase_type == PhaseType::UNSIGNED) // unsigned: map to [0-255) - { - for(int i = 0; i < gx.num_elements(); ++i) - { - float angle_deg = (std::atan2(float(gy[i]), float(gx[i])) / PI) * 180.0f; - phase[i] = (angle_deg < 0.0f) ? 180.f + angle_deg : angle_deg; - } - } - else // signed: map to [0-180) degrees - { - for(int i = 0; i < gx.num_elements(); ++i) - { - float angle_pi = std::atan2(gy[i], gx[i]) / PI; - angle_pi = (angle_pi < 0.0f) ? 2 + angle_pi : angle_pi; - phase[i] = lround(angle_pi * 128) & 0xFFu; - } - } - - return phase; -} - -template SimpleTensor<uint8_t> phase(const SimpleTensor<int16_t> &gx, const SimpleTensor<int16_t> &gy, PhaseType phase_type); -template SimpleTensor<uint8_t> phase(const SimpleTensor<int32_t> &gx, const SimpleTensor<int32_t> &gy, PhaseType phase_type); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Phase.h b/tests/validation/reference/Phase.h deleted file mode 100644 index 436c280f0a..0000000000 --- a/tests/validation/reference/Phase.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_PHASE_H -#define ARM_COMPUTE_TEST_PHASE_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<uint8_t> phase(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, PhaseType phase_type); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_PHASE_H */ diff --git a/tests/validation/reference/Pooling3dLayer.cpp b/tests/validation/reference/Pooling3dLayer.cpp new file mode 100644 index 0000000000..2e8f3a0b92 --- /dev/null +++ b/tests/validation/reference/Pooling3dLayer.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Pooling3dLayer.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +using namespace arm_compute::misc::shape_calculator; + +template <typename T> +SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices) +{ + TensorShape pooled_shape = compute_pool3d_shape(src.shape(), pool3d_info); + SimpleTensor<T> dst{ pooled_shape, src.data_type(), 1 }; + + if(indices != nullptr) + { + *indices = SimpleTensor<uint32_t> { pooled_shape, DataType::U32, 1 }; + } + + const int idx_channel = 0; + const int idx_width = 1; + const int idx_height = 2; + const int idx_depth = 3; + const int idx_batch = 4; + + const int pool_size_width = pool3d_info.is_global_pooling ? src.shape()[idx_width] : pool3d_info.pool_size.width; + const int pool_size_height = pool3d_info.is_global_pooling ? src.shape()[idx_height] : pool3d_info.pool_size.height; + const int pool_size_depth = pool3d_info.is_global_pooling ? src.shape()[idx_depth] : pool3d_info.pool_size.depth; + + const int pool_stride_width = static_cast<int>(pool3d_info.stride.width); + const int pool_stride_height = static_cast<int>(pool3d_info.stride.height); + const int pool_stride_depth = static_cast<int>(pool3d_info.stride.depth); + + const int pad_left = static_cast<int>(pool3d_info.padding.left); + const int pad_top = static_cast<int>(pool3d_info.padding.top); + const int pad_front = static_cast<int>(pool3d_info.padding.front); + + const int pad_right = static_cast<int>(pool3d_info.padding.right); + const int pad_bottom = static_cast<int>(pool3d_info.padding.bottom); + const int pad_back = static_cast<int>(pool3d_info.padding.back); + + const int num_channels = static_cast<int>(src.shape()[idx_channel]); + const int num_batches = static_cast<int>(src.shape()[idx_batch]); + + ARM_COMPUTE_ERROR_ON(num_channels != static_cast<int>(dst.shape()[idx_channel])); + ARM_COMPUTE_ERROR_ON(num_batches != static_cast<int>(dst.shape()[idx_batch])); + + const int w_src = static_cast<int>(src.shape()[idx_width]); + const int h_src = static_cast<int>(src.shape()[idx_height]); + const int d_src = static_cast<int>(src.shape()[idx_depth]); + const int w_dst = static_cast<int>(dst.shape()[idx_width]); + const int h_dst = static_cast<int>(dst.shape()[idx_height]); + const int d_dst = static_cast<int>(dst.shape()[idx_depth]); + + const bool exclude_padding = pool3d_info.exclude_padding; + + const int height_stride_src = num_channels * w_src; + const int depth_stride_src = height_stride_src * h_src; + const int batch_stride_src = depth_stride_src * d_src; + const int height_stride_dst = num_channels * w_dst; + const int depth_stride_dst = height_stride_dst * h_dst; + const int batch_stride_dst = depth_stride_dst * d_dst; + + for(int b = 0; b < num_batches; ++b) + { + const int batch_offset_dst = b * batch_stride_dst; + const int batch_offset_src = b * batch_stride_src; + for(int c = 0; c < num_channels; ++c) + { + for(int d = 0; d < d_dst; ++d) + { + const int depth_offset_dst = d * depth_stride_dst; + for(int h = 0; h < h_dst; ++h) + { + const int height_offset_dst = h * height_stride_dst; + for(int w = 0; w < w_dst; ++w) + { + int wstart = w * pool_stride_width - pad_left; + int hstart = h * pool_stride_height - pad_top; + int dstart = d * pool_stride_depth - pad_front; + int wend = std::min(wstart + pool_size_width, w_src + pad_right); + int hend = std::min(hstart + pool_size_height, h_src + pad_bottom); + int dend = std::min(dstart + pool_size_depth, d_src + pad_back); + + // this may not be equal to pool_w * pool_h * pool_d because of + // DimensionRoundingType choice (CEIL) + int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); + + // limit [start, end) to [0, w_src) + wstart = std::max(wstart, 0); + hstart = std::max(hstart, 0); + dstart = std::max(dstart, 0); + wend = std::min(wend, w_src); + hend = std::min(hend, h_src); + dend = std::min(dend, d_src); + + auto max_val = -std::numeric_limits<T>::infinity(); + int max_index{ 0 }; + T avg_val = static_cast<T>(0.f); + T l2_val = static_cast<T>(0.f); + + if(exclude_padding) + { + pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); + } + + for(int z = dstart; z < dend; ++z) + { + const int depth_offset_src = z * depth_stride_src; + for(int y = hstart; y < hend; ++y) + { + const int height_offset_src = y * height_stride_src; + for(int x = wstart; x < wend; ++x) + { + const auto val = static_cast<T>( + src[batch_offset_src + depth_offset_src + height_offset_src + x * num_channels + c]); + if(val > max_val) + { + max_val = val; + max_index = coord2index(src.shape(), Coordinates(c, x, y, z, 0)); + } + + avg_val += val; + l2_val += val * val; + } + } + } + + avg_val /= pool_size; + l2_val = static_cast<T>(std::sqrt(l2_val / pool_size)); + + int dst_index = batch_offset_dst + depth_offset_dst + height_offset_dst + w * num_channels + c; + switch(pool3d_info.pool_type) + { + case PoolingType::MAX: + dst[dst_index] = static_cast<T>(max_val); + break; + case PoolingType::AVG: + dst[dst_index] = static_cast<T>(avg_val); + break; + case PoolingType::L2: + dst[dst_index] = static_cast<T>(l2_val); + break; + default: + ARM_COMPUTE_ERROR("Pooling Type should be either MAX, AVG or L2"); + } + + if(indices != nullptr) + { + (*indices)[dst_index] = max_index; + } + } + } + } + } + } + + return dst; +} + +template SimpleTensor<float> pooling_3d_layer(const SimpleTensor<float> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices); +template SimpleTensor<half> pooling_3d_layer(const SimpleTensor<half> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices); + +template <typename T> +SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices) +{ + ARM_COMPUTE_UNUSED(output_qinfo); + return pooling_3d_layer_internal<T>(src, pool3d_info, indices); +} + +template <> +SimpleTensor<int8_t> pooling_3d_layer<int8_t>(const SimpleTensor<int8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices) +{ + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices); + return convert_to_asymmetric<int8_t>(dst_tmp, output_qinfo); +} + +template <> +SimpleTensor<uint8_t> pooling_3d_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices) +{ + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices); + return convert_to_asymmetric<uint8_t>(dst_tmp, output_qinfo); +} + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/HarrisCornerDetector.h b/tests/validation/reference/Pooling3dLayer.h index 2f464749f6..481a0d3024 100644 --- a/tests/validation/reference/HarrisCornerDetector.h +++ b/tests/validation/reference/Pooling3dLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_HARRIS_CORNER_DETECTOR_H -#define ARM_COMPUTE_TEST_HARRIS_CORNER_DETECTOR_H +#ifndef ARM_COMPUTE_TEST_POOL3D_LAYER_H +#define ARM_COMPUTE_TEST_POOL3D_LAYER_H +#include "Utils.h" #include "arm_compute/core/Types.h" #include "tests/SimpleTensor.h" - -#include <vector> +#include "tests/validation/Helpers.h" namespace arm_compute { @@ -38,11 +38,13 @@ namespace validation namespace reference { template <typename T> -std::vector<KeyPoint> harris_corner_detector(const SimpleTensor<T> &src, - float threshold, float min_dist, float sensitivity, int gradient_size, int block_size, - BorderMode border_mode, T constant_border_value = 0); +SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices = nullptr); + +template <typename T> +SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo = QuantizationInfo(), + SimpleTensor<uint32_t> *indices = nullptr); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_HARRIS_CORNER_DETECTOR_H */ +#endif /* ARM_COMPUTE_TEST_POOL3D_LAYER_H */ diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp index 5f4edfe49c..bf7bd0c1df 100644 --- a/tests/validation/reference/PoolingLayer.cpp +++ b/tests/validation/reference/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,6 @@ using namespace arm_compute::misc::shape_calculator; template <typename T, typename ACC_T, typename std::enable_if<is_floating_point<T>::value, int>::type> SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices, DataLayout data_layout) { - ARM_COMPUTE_ERROR_ON(info.is_global_pooling && (src.shape().x() != src.shape().y())); // Create reference SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 }; auto pooled_shape = compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info); @@ -84,20 +83,28 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling { int wstart = w * pool_stride_x - pad_left; int hstart = h * pool_stride_y - pad_top; + + // Used to calculate kernel indices + int kh_start = std::max(0, -hstart); + int kw_start = std::max(0, -wstart); + int max_ker_index{ 0 }; + int wend = std::min(wstart + pool_size_x, w_src); int hend = std::min(hstart + pool_size_y, h_src); wstart = std::max(wstart, 0); hstart = std::max(hstart, 0); - auto max_val = std::numeric_limits<ACC_T>::lowest(); + auto max_val = info.use_inf_as_limit ? -std::numeric_limits<ACC_T>::infinity() : std::numeric_limits<ACC_T>::lowest(); int max_index{ 0 }; - for(int y = hstart; y < hend; ++y) + + for(int y = hstart, kh = kh_start; y < hend; ++y, ++kh) { - for(int x = wstart; x < wend; ++x) + for(int x = wstart, kw = kw_start; x < wend; ++x, ++kw) { const auto val = static_cast<ACC_T>(src[b * z_src * h_src * w_src + r * h_src * w_src + y * w_src + x]); if(val > max_val) { - max_val = val; + max_val = val; + max_ker_index = pool_size_x * (kh) + (kw); if(data_layout == DataLayout::NCHW) { max_index = coord2index(src.shape(), Coordinates(x, y, r, 0)); @@ -113,7 +120,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling dst[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = static_cast<T>(max_val); if(indices) { - (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = max_index; + (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = (info.use_kernel_indices) ? max_ker_index : max_index; } } } diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index 27665375c3..ad7ba7ac43 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/reference/ROIPoolingLayer.cpp b/tests/validation/reference/ROIPoolingLayer.cpp new file mode 100644 index 0000000000..8dc3014763 --- /dev/null +++ b/tests/validation/reference/ROIPoolingLayer.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ROIPoolingLayer.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "tests/validation/Helpers.h" +#include <algorithm> + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template <> +SimpleTensor<float> roi_pool_layer(const SimpleTensor<float> &src, const SimpleTensor<uint16_t> &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo) +{ + ARM_COMPUTE_UNUSED(output_qinfo); + + const size_t num_rois = rois.shape()[1]; + const size_t values_per_roi = rois.shape()[0]; + DataType output_data_type = src.data_type(); + + TensorShape input_shape = src.shape(); + TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), src.shape()[2], num_rois); + SimpleTensor<float> output(output_shape, output_data_type); + + const int pooled_w = pool_info.pooled_width(); + const int pooled_h = pool_info.pooled_height(); + const float spatial_scale = pool_info.spatial_scale(); + + // get sizes of x and y dimensions in src tensor + const int width = src.shape()[0]; + const int height = src.shape()[1]; + + // Move pointer across the fourth dimension + const size_t input_stride_w = input_shape[0] * input_shape[1] * input_shape[2]; + const size_t output_stride_w = output_shape[0] * output_shape[1] * output_shape[2]; + + const auto *rois_ptr = reinterpret_cast<const uint16_t *>(rois.data()); + + // Iterate through pixel width (X-Axis) + for(size_t pw = 0; pw < num_rois; ++pw) + { + const unsigned int roi_batch = rois_ptr[values_per_roi * pw]; + const auto x1 = rois_ptr[values_per_roi * pw + 1]; + const auto y1 = rois_ptr[values_per_roi * pw + 2]; + const auto x2 = rois_ptr[values_per_roi * pw + 3]; + const auto y2 = rois_ptr[values_per_roi * pw + 4]; + + //Iterate through pixel height (Y-Axis) + for(size_t fm = 0; fm < input_shape[2]; ++fm) + { + // Iterate through regions of interest index + for(size_t py = 0; py < pool_info.pooled_height(); ++py) + { + // Scale ROI + const int roi_anchor_x = support::cpp11::round(x1 * spatial_scale); + const int roi_anchor_y = support::cpp11::round(y1 * spatial_scale); + const int roi_width = std::max(support::cpp11::round((x2 - x1) * spatial_scale), 1.f); + const int roi_height = std::max(support::cpp11::round((y2 - y1) * spatial_scale), 1.f); + + // Iterate over feature map (Z axis) + for(size_t px = 0; px < pool_info.pooled_width(); ++px) + { + auto region_start_x = static_cast<int>(std::floor((static_cast<float>(px) / pooled_w) * roi_width)); + auto region_end_x = static_cast<int>(std::floor((static_cast<float>(px + 1) / pooled_w) * roi_width)); + auto region_start_y = static_cast<int>(std::floor((static_cast<float>(py) / pooled_h) * roi_height)); + auto region_end_y = static_cast<int>(std::floor((static_cast<float>(py + 1) / pooled_h) * roi_height)); + + region_start_x = std::min(std::max(region_start_x + roi_anchor_x, 0), width); + region_end_x = std::min(std::max(region_end_x + roi_anchor_x, 0), width); + region_start_y = std::min(std::max(region_start_y + roi_anchor_y, 0), height); + region_end_y = std::min(std::max(region_end_y + roi_anchor_y, 0), height); + + // Iterate through the pooling region + if((region_end_x <= region_start_x) || (region_end_y <= region_start_y)) + { + /* Assign element in tensor 'output' at coordinates px, py, fm, roi_indx, to 0 */ + auto out_ptr = output.data() + px + py * output_shape[0] + fm * output_shape[0] * output_shape[1] + pw * output_stride_w; + *out_ptr = 0; + } + else + { + float curr_max = -std::numeric_limits<float>::max(); + for(int j = region_start_y; j < region_end_y; ++j) + { + for(int i = region_start_x; i < region_end_x; ++i) + { + /* Retrieve element from input tensor at coordinates(i, j, fm, roi_batch) */ + float in_element = *(src.data() + i + j * input_shape[0] + fm * input_shape[0] * input_shape[1] + roi_batch * input_stride_w); + curr_max = std::max(in_element, curr_max); + } + } + + /* Assign element in tensor 'output' at coordinates px, py, fm, roi_indx, to curr_max */ + auto out_ptr = output.data() + px + py * output_shape[0] + fm * output_shape[0] * output_shape[1] + pw * output_stride_w; + *out_ptr = curr_max; + } + } + } + } + } + + return output; +} + +/* + Template genericised method to allow calling of roi_pooling_layer with quantized 8 bit datatype +*/ +template <> +SimpleTensor<uint8_t> roi_pool_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint16_t> &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo) +{ + const SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = roi_pool_layer<float>(src_tmp, rois, pool_info, output_qinfo); + SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_qinfo); + return dst; +} + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/reference/FastCorners.h b/tests/validation/reference/ROIPoolingLayer.h index 2c4506de5c..ddbaee2d5e 100644 --- a/tests/validation/reference/FastCorners.h +++ b/tests/validation/reference/ROIPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,11 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_FAST_CORNERS_H -#define ARM_COMPUTE_TEST_FAST_CORNERS_H +#ifndef ARM_COMPUTE_TEST_ROIPOOLLAYER_H +#define ARM_COMPUTE_TEST_ROIPOOLLAYER_H #include "arm_compute/core/Types.h" #include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" namespace arm_compute { @@ -36,9 +37,10 @@ namespace validation namespace reference { template <typename T> -std::vector<KeyPoint> fast_corners(const SimpleTensor<T> &src, float input_thresh, bool suppress_nonmax, BorderMode border_mode, T constant_border_value = 0); +SimpleTensor<T> roi_pool_layer(const SimpleTensor<T> &src, const SimpleTensor<uint16_t> &rois, const ROIPoolingLayerInfo &pool_info, const QuantizationInfo &output_qinfo); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_FAST_CORNERS_H */ + +#endif /* ARM_COMPUTE_TEST_ROIPOOLLAYER_H */
\ No newline at end of file diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp index ffb79f86c5..c189bc2d47 100644 --- a/tests/validation/reference/ReductionOperation.cpp +++ b/tests/validation/reference/ReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,6 @@ * SOFTWARE. */ #include "ReductionOperation.h" - #include "tests/validation/Helpers.h" #include <algorithm> @@ -39,7 +38,7 @@ namespace reference namespace { template <typename T, typename OT> -OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride) +OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy) { using type = typename std::remove_cv<OT>::type; T res; @@ -99,7 +98,14 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in } if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0) { - int_res /= reduce_elements; + // Only use rounding in aarch64 to be consistent with kernel +#ifdef __aarch64__ + // Divide in float format, then rounded to nearest and implicitly cast back to int + int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy); +#else // defined(__aarch64__) + ARM_COMPUTE_UNUSED(policy); + int_res /= reduce_elements; // Legacy compatibility +#endif // __aarch64 } res = static_cast<type>(int_res); } @@ -175,12 +181,12 @@ OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOper } // namespace template <typename T, typename OT> -SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op) +SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type, RoundingPolicy policy) { // Create reference - const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX); - DataType output_data_type = is_arg_min_max ? DataType::S32 : src.data_type(); - SimpleTensor<OT> dst{ dst_shape, output_data_type, 1, src.quantization_info() }; + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX); + SimpleTensor<OT> dst{ dst_shape, output_type, 1, src.quantization_info() }; const unsigned int src_width = src.shape().x(); const unsigned int src_height = src.shape().y(); const unsigned int src_depth = src.shape().z(); @@ -197,7 +203,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + du * reduce_elems; dst[du] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy); } } break; @@ -213,7 +219,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + in_offset; dst[out_offset] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy); } } } @@ -232,7 +238,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + in_offset; dst[out_offset] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy); } } } @@ -254,7 +260,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + in_offset; dst[out_offset] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy); } } } @@ -269,74 +275,89 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T } template <typename T, typename OT> -SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output) +SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy) { ARM_COMPUTE_UNUSED(quantization_info_output); - return compute_reduction_operation<T, OT>(src, dst_shape, axis, op); + return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, output_type, policy); } template <> -SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output) +SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy) { if(src.data_type() == DataType::QASYMM8) { // If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output) { - return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy); } else { SimpleTensor<float> src_f = convert_from_asymmetric(src); - SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op); + SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type); return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output); } } else { - return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy); } } template <> -SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output) +SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, + ReductionOperation op, DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy) { if(src.data_type() == DataType::QASYMM8_SIGNED) { // If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output) { - return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy); } else { SimpleTensor<float> src_f = convert_from_asymmetric(src); - SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op); + SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type); return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output); } } else { - return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy); } } template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(), + RoundingPolicy policy = RoundingPolicy::TO_ZERO); + template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); + template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); +template SimpleTensor<int64_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(), + RoundingPolicy policy = RoundingPolicy::TO_ZERO); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h index 9c9e721b29..fb2e7a7093 100644 --- a/tests/validation/reference/ReductionOperation.h +++ b/tests/validation/reference/ReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_REDUCTION_OPERATION_H #define ARM_COMPUTE_TEST_REDUCTION_OPERATION_H +#include "arm_compute/core/Rounding.h" #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -36,8 +37,8 @@ namespace validation namespace reference { template <typename T, typename OT> -SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); +SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Remap.cpp b/tests/validation/reference/Remap.cpp deleted file mode 100644 index 5c4d3c186b..0000000000 --- a/tests/validation/reference/Remap.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Remap.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" - -#include <algorithm> -#include <array> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode, - T constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_MSG(border_mode == BorderMode::REPLICATE, "BorderMode not supported"); - SimpleTensor<T> out(in.shape(), in.data_type()); - ARM_COMPUTE_ERROR_ON(out.num_elements() != map_x.num_elements()); - const int width = in.shape().x(); - const int height = in.shape().y(); - const uint32_t num_elements = out.num_elements(); - for(uint32_t idx = 0; idx < num_elements; idx++) - { - const Coordinates id_out = index2coord(out.shape(), idx); - valid_mask[idx] = 1; - Coordinates src_idx = id_out; // need to setup all coordinates and not just xy - src_idx.set(0, static_cast<int>(std::floor(map_x[idx]))); - src_idx.set(1, static_cast<int>(std::floor(map_y[idx]))); - if((0 <= map_y[idx]) && (map_y[idx] < height) && (0 <= map_x[idx]) && (map_x[idx] < width)) - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - { - out[idx] = tensor_elem_at(in, src_idx, border_mode, constant_border_value); - break; - } - case InterpolationPolicy::BILINEAR: - { - (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ? - out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) : - valid_mask[idx] = 0; - break; - } - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - break; - } - } - else - { - if(border_mode == BorderMode::UNDEFINED) - { - valid_mask[idx] = 0; - } - else - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - out[idx] = constant_border_value; - break; - case InterpolationPolicy::BILINEAR: - out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value); - break; - case InterpolationPolicy::AREA: - default: - break; - } - } - } - } - - return out; -} - -template SimpleTensor<uint8_t> remap(const SimpleTensor<uint8_t> &src, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<uint8_t> &valid_mask, InterpolationPolicy policy, - BorderMode border_mode, - uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Remap.h b/tests/validation/reference/Remap.h deleted file mode 100644 index 0726f75965..0000000000 --- a/tests/validation/reference/Remap.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_REMAP_H -#define ARM_COMPUTE_TEST_REMAP_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode, - T constant_border_value = 0); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_REMAP_H */ diff --git a/tests/validation/reference/Reorder.cpp b/tests/validation/reference/Reorder.cpp new file mode 100644 index 0000000000..8abb372596 --- /dev/null +++ b/tests/validation/reference/Reorder.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Reorder.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ + +/* + * Generic transform. + * + * Assuming the untransposed case, this works by first reading <BlockBy> + * consecutive values from the first input row. This same number of values + * are then read from the next <IntBy-1> rows. Now return to the first + * input row and repeat. + * + * Need to cope with the work requested in either dimension not actually + * being a multiple of the block sizes. + */ +template <unsigned int tIntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize, typename d_type, arm_gemm::VLType vlt> +struct Transform_ref +{ + template <typename TOut, typename TIn> + static void Transform(TOut &out, const TIn in, const int stride, + const int y0, const int ymax, const int x0, const int xmax) + { + // NOTE: This code is disabled to avoid the call to get_vector_length(), so templated transforms will not be + // correct for SVE. This is not an issue as we have specializations for all SVE cases. + // For SVE cases we multiply the interleave factor by the vector length. + // const unsigned int IntBy = tIntBy * (vlt == VLType::SVE ? get_vector_length<TOut>() / BlockBy : 1); + const unsigned int IntBy = tIntBy; + int out_index = 0; + + const int n_whole_y_blocks = (ymax - y0) / IntBy; + const int y_remainders = (ymax - y0) % IntBy; + const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0); + + const int n_whole_x_blocks = (xmax - x0) / BlockBy; + const int x_remainders = (xmax - x0) % BlockBy; + const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0); + + // "Y" loop: advance down the rows of the source IntBy rows at a time. + // Set up fill_rows to show the number rows to copy from, and blank_rows + // for the number of blank rows to add. + for(int y_block = 0; y_block < n_y_blocks; y_block++) + { + const int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders; + const int blank_rows = IntBy - fill_rows; + + const int y_base = y0 + (y_block * IntBy); + + // So now advance along this block of rows, BlockBy columns at a time. + for(int x_block = 0; x_block < n_x_blocks; x_block++) + { + const int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders; + const int blank_cols = BlockBy - fill_cols; + + const int x_base = x0 + (x_block * BlockBy); + + for(int row = 0; row < fill_rows; row++) + { + for(int col = 0; col < fill_cols; col++) + { + // In-range copy. If it's transposed, we reverse the sense of rows and columns here. + if(Transposed) + { + out[out_index] = in[(x_base + col) * stride + y_base + row]; + out_index++; + } + else + { + out[out_index] = in[(y_base + row) * stride + x_base + col]; + out_index++; + } + } + // "col" tail - row is in range but column is out of range. + for(int col = 0; col < blank_cols; col++) + { + out[out_index] = 0; + out_index++; + } + } + // "row" tail - row is out of range so fill with zeros always. + const d_type zeroval = 0; + const int pads = blank_rows * (fill_cols + blank_cols); + + for(int i = 0; i < pads; i++) + { + out[out_index] = zeroval; + } + + out_index += pads; + } + } + } +}; + +template <typename T> +SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf) +{ + SimpleTensor<T> dst{ output_shape, src.data_type() }; + const int cols = src.shape()[0]; + const int rows = src.shape()[1]; + + switch(output_wf) + { + case WeightFormat::OHWIo4: + { + Transform_ref<4, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols); + break; + } + case WeightFormat::OHWIo8: + { + Transform_ref<8, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols); + break; + } + default: + break; + } + + return dst; +} + +template SimpleTensor<float> reorder_layer(const SimpleTensor<float> &src, const TensorShape &output_shape, WeightFormat output_wf); + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/Dilate.h b/tests/validation/reference/Reorder.h index 640bc9dd73..94ee5078f8 100644 --- a/tests/validation/reference/Dilate.h +++ b/tests/validation/reference/Reorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,10 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_DILATE_H -#define ARM_COMPUTE_TEST_DILATE_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_REORDER +#define ACL_TESTS_VALIDATION_REFERENCE_REORDER #include "tests/SimpleTensor.h" +#include "tests/Types.h" namespace arm_compute { @@ -35,9 +36,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> dilate(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value = 0); +SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_DILATE_H */ +#endif /* ACL_TESTS_VALIDATION_REFERENCE_REORDER */ diff --git a/tests/validation/reference/ReshapeLayer.cpp b/tests/validation/reference/ReshapeLayer.cpp index daea001be6..30a58dd65b 100644 --- a/tests/validation/reference/ReshapeLayer.cpp +++ b/tests/validation/reference/ReshapeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,14 +44,15 @@ SimpleTensor<T> reshape_layer(const SimpleTensor<T> &src, const TensorShape &out return dst; } -template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape); -template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape); +template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape); +template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape); template SimpleTensor<uint16_t> reshape_layer(const SimpleTensor<uint16_t> &src, const TensorShape &output_shape); -template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape); +template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape); template SimpleTensor<uint32_t> reshape_layer(const SimpleTensor<uint32_t> &src, const TensorShape &output_shape); -template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape); -template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape); -template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape); +template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape); +template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape); +template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape); +template SimpleTensor<bfloat16> reshape_layer(const SimpleTensor<bfloat16> &src, const TensorShape &output_shape); /** [ReshapeLayer] **/ } // namespace reference } // namespace validation diff --git a/tests/validation/reference/Reverse.cpp b/tests/validation/reference/Reverse.cpp index c6c4614278..7924f900d1 100644 --- a/tests/validation/reference/Reverse.cpp +++ b/tests/validation/reference/Reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis) +SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis) { + ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4); ARM_COMPUTE_ERROR_ON(axis.shape().num_dimensions() > 1); ARM_COMPUTE_ERROR_ON(axis.shape().x() > 4); @@ -48,10 +49,32 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> const unsigned int depth = src.shape()[2]; const unsigned int batches = src.shape()[3]; + const int rank = src.shape().num_dimensions(); + std::array<bool, 4> to_reverse = { { false, false, false, false } }; for(int i = 0; i < axis.num_elements(); ++i) { - to_reverse[axis[i]] = true; + int axis_i = axis[i]; + + // The values of axis tensor must be between [-rank, rank-1]. + if((axis_i < -rank) || (axis_i >= rank)) + { + ARM_COMPUTE_ERROR("the values of the axis tensor must be within [-rank, rank-1]."); + } + + // In case of negative axis value i.e targeted axis(i) = rank + axis(i) + if(axis_i < 0) + { + axis_i = rank + axis_i; + } + + // Reverse ACL axis indices convention i.e. (inverted)axis = (tensor_rank - 1) - axis + if(use_inverted_axis) + { + axis_i = (rank - 1) - axis_i; + } + + to_reverse[axis_i] = true; } const uint32_t num_elements = src.num_elements(); @@ -73,9 +96,9 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> return dst; } -template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint32_t> &axis); -template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<uint32_t> &axis); -template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<uint32_t> &axis); +template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis); +template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis); +template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Reverse.h b/tests/validation/reference/Reverse.h index 4a28da7270..30926b05a5 100644 --- a/tests/validation/reference/Reverse.h +++ b/tests/validation/reference/Reverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_REVERSE_H -#define ARM_COMPUTE_TEST_REVERSE_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H +#define ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H #include "tests/SimpleTensor.h" @@ -35,9 +35,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis); +SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis = false); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_REVERSE_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 71e98fd776..2f429cb29b 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #include "Scale.h" #include "Utils.h" -#include "arm_compute/core/utils/misc/Utility.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -183,14 +182,15 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale template <typename T> SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { + ARM_COMPUTE_UNUSED(output_quantization_info); return scale_core<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale, align_corners); } template <> SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { SimpleTensor<uint8_t> dst; if(src.quantization_info().uniform().scale != 0.f) @@ -198,7 +198,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo SimpleTensor<float> src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info()); SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners); - dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_quantization_info); } else { @@ -209,7 +209,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo template <> SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int8_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { SimpleTensor<int8_t> dst; if(src.quantization_info().uniform().scale != 0.f) @@ -217,7 +217,7 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float SimpleTensor<float> src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8_signed(constant_border_value, src.quantization_info()); SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners); - dst = convert_to_asymmetric<int8_t>(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric<int8_t>(dst_tmp, output_quantization_info); } else { @@ -227,11 +227,11 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float } template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Scale.h b/tests/validation/reference/Scale.h index c66af8d94e..c32c07d1c0 100644 --- a/tests/validation/reference/Scale.h +++ b/tests/validation/reference/Scale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace reference { template <typename T> SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0, - SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false); + SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false, QuantizationInfo output_quantization_info = QuantizationInfo()); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp new file mode 100644 index 0000000000..55c48a9002 --- /dev/null +++ b/tests/validation/reference/ScatterLayer.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ScatterLayer.h" +#include "tests/validation/Helpers.h" +#include "arm_compute/core/TensorShape.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +namespace +{ + +template <typename T> +T reduce_op(const T ¤t,const T &update,const ScatterFunction func) +{ + switch(func) + { + case ScatterFunction::Update: + return update; + break; + case ScatterFunction::Add: + return current + update; + break; + case ScatterFunction::Sub: + return current - update; + break; + case ScatterFunction::Max: + return std::max(current, update); + break; + case ScatterFunction::Min: + return std::min(current, update); + break; + default: + ARM_COMPUTE_ERROR("Unsupported Scatter function"); + break; + } +} + +template float reduce_op(const float ¤t,const float &update,const ScatterFunction func); +template half reduce_op(const half ¤t,const half &update,const ScatterFunction func); +} + +// NOTE: This function expects collapsed tensors as input. +// Batch dims for update/indices tensors should be collapsed into a single dim. +// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function. +template <typename T> +SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) +{ + // 1. If zero initialization variable is false, copy src data to dst. + SimpleTensor<T> dst{ out_shape, src.data_type(), 1 }; + if(!info.zero_initialization) + { + std::copy_n(src.data(), src.num_elements(), dst.data()); + } + + // Number of elements between each value of the dim being iterated through + const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1); + const unsigned int no_output_dims = out_shape.num_dimensions(); + + // Calculate output stride at given index for all output dims. + std::vector<unsigned int> out_stride_at_idx(no_output_dims); + for (unsigned int i = 0 ; i < no_output_dims; i++) + { + out_stride_at_idx[i] = out_shape.total_size_lower(i); + } + + const unsigned int indices_x_dim = static_cast<unsigned int>(indices.shape()[0]); + const unsigned int indices_y_dim = static_cast<unsigned int>(indices.shape()[1]); + + // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor. + for(unsigned int i = 0; i < indices_y_dim; i++) + { + // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim + // Starting index for both the update and indices tensors. + const unsigned int update_dim_start = i * data_stride; + const unsigned int indices_dim_start = i * indices_x_dim; + bool out_of_bounds = false; + unsigned int out_offset_acc = 0; + + // Iterate over each indices value for the relevant batch and accumulate the offset. + for(unsigned int j = 0; j < indices_x_dim; j++) + { + // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k + const int index_value = indices[indices_dim_start + j]; + const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value. + if(index_value < static_cast<int>(out_shape[out_dim]) && index_value >= 0) + { + out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation + } + else + { + out_of_bounds = true; + break; + } + } + + // If not out of bounds, copy update tensor elements to output + if(!out_of_bounds) + { + for (unsigned int j = 0 ; j < data_stride; j++) + { + dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func); + } + } + } + return dst; +} + +template <typename T> +SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) +{ + return scatter_layer_internal<T>(src, updates, indices, out_shape, info); +} + +template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<half> scatter_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int32_t> scatter_layer(const SimpleTensor<int32_t> &src, const SimpleTensor<int32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint32_t> scatter_layer(const SimpleTensor<uint32_t> &src, const SimpleTensor<uint32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int16_t> scatter_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint16_t> scatter_layer(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int8_t> scatter_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint8_t> scatter_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/HOGDescriptor.h b/tests/validation/reference/ScatterLayer.h index dffeb655b2..97d5e70b0d 100644 --- a/tests/validation/reference/HOGDescriptor.h +++ b/tests/validation/reference/ScatterLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,9 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H -#define ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H +#define ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H +#include "Utils.h" +#include "arm_compute/function_info/ScatterInfo.h" #include "tests/SimpleTensor.h" namespace arm_compute @@ -34,16 +36,13 @@ namespace validation { namespace reference { -template <typename T, typename U, typename V> -void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &phase, SimpleTensor<V> &hog_space, const HOGInfo &hog_info); - template <typename T> -void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_space, const HOGInfo &hog_info); +SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info); -template <typename T, typename U> -SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info); +template <typename T> +SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H diff --git a/tests/validation/reference/Scharr.cpp b/tests/validation/reference/Scharr.cpp deleted file mode 100644 index e9fbb73d49..0000000000 --- a/tests/validation/reference/Scharr.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Scharr.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" - -#include <array> -#include <map> -#include <utility> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -const std::array<int8_t, 9> scharr_3_x{ { -3, 0, 3, -10, 0, 10, -3, 0, 3 } }; -const std::array<int8_t, 9> scharr_3_y{ { -3, -10, -3, 0, 0, 0, 3, 10, 3 } }; - -const std::map<int, std::pair<const int8_t *, const int8_t *>> masks -{ - { 3, { scharr_3_x.data(), scharr_3_y.data() } } -}; - -template <typename T> -struct data_type; - -template <> -struct data_type<int16_t> -{ - const static DataType value = DataType::S16; -}; -} // namespace - -template <typename T, typename U> -std::pair<SimpleTensor<T>, SimpleTensor<T>> scharr(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension) -{ - const auto shape_size = static_cast<unsigned int>(filter_size); - - SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels()); - SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels()); - - ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2)); - - const uint32_t num_elements = src.num_elements(); - for(uint32_t i = 0; i < num_elements; ++i) - { - Coordinates coord = index2coord(src.shape(), i); - - if(!is_in_valid_region(valid_region, coord)) - { - continue; - } - - switch(gradient_dimension) - { - case GradientDimension::GRAD_X: - apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ shape_size, shape_size }, masks.at(filter_size).first, 1.f, border_mode, constant_border_value); - break; - case GradientDimension::GRAD_Y: - apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ shape_size, shape_size }, masks.at(filter_size).second, 1.f, border_mode, constant_border_value); - break; - case GradientDimension::GRAD_XY: - apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ shape_size, shape_size }, masks.at(filter_size).first, 1.f, border_mode, constant_border_value); - apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ shape_size, shape_size }, masks.at(filter_size).second, 1.f, border_mode, constant_border_value); - break; - default: - ARM_COMPUTE_ERROR("Gradient dimension not supported"); - } - } - - return std::make_pair(dst_x, dst_y); -} - -template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> scharr(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, - GradientDimension gradient_dimension); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Scharr.h b/tests/validation/reference/Scharr.h deleted file mode 100644 index 42b3202d64..0000000000 --- a/tests/validation/reference/Scharr.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_SCHARR_H -#define ARM_COMPUTE_TEST_SCHARR_H - -#include "tests/SimpleTensor.h" -#include "tests/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -std::pair<SimpleTensor<T>, SimpleTensor<T>> scharr(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SCHARR_H */ diff --git a/tests/validation/reference/Sobel.cpp b/tests/validation/reference/Sobel.cpp deleted file mode 100644 index d9c2532add..0000000000 --- a/tests/validation/reference/Sobel.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Sobel.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" - -#include <array> -#include <map> -#include <utility> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -namespace -{ -const std::array<int8_t, 9> sobel_3_x{ { -1, 0, 1, -2, 0, 2, -1, 0, 1 } }; -const std::array<int8_t, 9> sobel_3_y{ { -1, -2, -1, 0, 0, 0, 1, 2, 1 } }; - -const std::array<int8_t, 25> sobel_5_x{ { - -1, -2, 0, 2, 1, - -4, -8, 0, 8, 4, - -6, -12, 0, 12, 6, - -4, -8, 0, 8, 4, - -1, -2, 0, 2, 1 - } }; - -const std::array<int8_t, 25> sobel_5_y{ { - -1, -4, -6, -4, -1, - -2, -8, -12, -8, -2, - 0, 0, 0, 0, 0, - 2, 8, 12, 8, 2, - 1, 4, 6, 4, 1 - } }; - -const std::array<int8_t, 49> sobel_7_x{ { - -1, -4, -5, 0, 5, 4, 1, - -6, -24, -30, 0, 30, 24, 6, - -15, -60, -75, 0, 75, 60, 15, - -20, -80, -100, 0, 100, 80, 20, - -15, -60, -75, 0, 75, 60, 15, - -6, -24, -30, 0, 30, 24, 6, - -1, -4, -5, 0, 5, 4, 1 - } }; - -const std::array<int8_t, 49> sobel_7_y{ { - -1, -6, -15, -20, -15, -6, -1, - -4, -24, -60, -80, -60, -24, -4, - -5, -30, -75, -100, -75, -30, -5, - 0, 0, 0, 0, 0, 0, 0, - 5, 30, 75, 100, 75, 30, 5, - 4, 24, 60, 80, 60, 24, 4, - 1, 6, 15, 20, 15, 6, 1 - } }; - -const std::map<int, std::pair<const int8_t *, const int8_t *>> masks -{ - { 3, { sobel_3_x.data(), sobel_3_y.data() } }, - { 5, { sobel_5_x.data(), sobel_5_y.data() } }, - { 7, { sobel_7_x.data(), sobel_7_y.data() } }, -}; - -template <typename T> -struct data_type; - -template <> -struct data_type<int16_t> -{ - const static DataType value = DataType::S16; -}; - -template <> -struct data_type<int> -{ - const static DataType value = DataType::S32; -}; -} // namespace - -template <typename T, typename U> -std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension) -{ - SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels()); - SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels()); - - ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2)); - - const uint32_t num_elements = src.num_elements(); - for(uint32_t i = 0; i < num_elements; ++i) - { - Coordinates coord = index2coord(src.shape(), i); - - if(!is_in_valid_region(valid_region, coord)) - { - continue; - } - switch(gradient_dimension) - { - case GradientDimension::GRAD_X: - apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).first, 1.f, border_mode, - constant_border_value); - break; - case GradientDimension::GRAD_Y: - apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).second, 1.f, border_mode, - constant_border_value); - break; - case GradientDimension::GRAD_XY: - apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).first, 1.f, border_mode, - constant_border_value); - apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).second, 1.f, border_mode, - constant_border_value); - break; - default: - ARM_COMPUTE_ERROR("Gradient dimension not supported"); - } - } - - return std::make_pair(dst_x, dst_y); -} - -template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> sobel(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, - GradientDimension gradient_dimension); -template std::pair<SimpleTensor<int>, SimpleTensor<int>> sobel(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, - GradientDimension gradient_dimension); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Sobel.h b/tests/validation/reference/Sobel.h deleted file mode 100644 index 86d6d0bc11..0000000000 --- a/tests/validation/reference/Sobel.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_SOBEL_H -#define ARM_COMPUTE_TEST_SOBEL_H - -#include "arm_compute/core/Types.h" -#include "tests/SimpleTensor.h" -#include "tests/Types.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename U> -std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_SOBEL_H */ diff --git a/tests/validation/reference/Threshold.cpp b/tests/validation/reference/Threshold.cpp deleted file mode 100644 index 6bc6cf0b4a..0000000000 --- a/tests/validation/reference/Threshold.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2017 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal src the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included src all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. src NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER src AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * dst OF OR src CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS src THE - * SOFTWARE. - */ -#include "Threshold.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> threshold(const SimpleTensor<T> &src, T threshold, T false_value, T true_value, ThresholdType type, T upper) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - - switch(type) - { - case ThresholdType::BINARY: - for(int i = 0; i < src.num_elements(); ++i) - { - dst[i] = ((src[i] > threshold) ? true_value : false_value); - } - break; - case ThresholdType::RANGE: - for(int i = 0; i < src.num_elements(); ++i) - { - if(src[i] > upper) - { - dst[i] = false_value; - } - else if(src[i] < threshold) - { - dst[i] = false_value; - } - else - { - dst[i] = true_value; - } - } - break; - default: - ARM_COMPUTE_ERROR("Thresholding type not recognised"); - break; - } - - return dst; -} - -template SimpleTensor<uint8_t> threshold(const SimpleTensor<uint8_t> &src, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Threshold.h b/tests/validation/reference/Threshold.h deleted file mode 100644 index bee9531351..0000000000 --- a/tests/validation/reference/Threshold.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_THRESHOLD_H -#define ARM_COMPUTE_TEST_THRESHOLD_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> threshold(const SimpleTensor<T> &src, T threshold, T false_value, T true_value, ThresholdType type, T upper); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_THRESHOLD_H */ diff --git a/tests/validation/reference/Utils.h b/tests/validation/reference/Utils.h index 8e15faab8f..c83c6ea4b3 100644 --- a/tests/validation/reference/Utils.h +++ b/tests/validation/reference/Utils.h @@ -26,7 +26,6 @@ #include "arm_compute/core/Types.h" #include "tests/Globals.h" -#include "tests/ILutAccessor.h" #include "tests/Types.h" #include <array> @@ -123,26 +122,6 @@ void apply_2d_spatial_filter(Coordinates coord, const SimpleTensor<T> &src, Simp RawTensor transpose(const RawTensor &src, int chunk_width = 1); -/** Fill matrix random. - * - * @param[in,out] matrix Matrix - */ -template <std::size_t SIZE> -inline void fill_warp_matrix(std::array<float, SIZE> &matrix) -{ - std::mt19937 gen(library.get()->seed()); - std::uniform_real_distribution<float> dist(-1, 1); - for(auto &x : matrix) - { - x = dist(gen); - } - if(SIZE == 9) - { - // This is only used in Warp Perspective, we set M[3][3] = 1 so that Z0 is not 0 and we avoid division by 0. - matrix[8] = 1.f; - } -} - bool valid_bilinear_policy(float xn, float yn, int width, int height, BorderMode border_mode); } // namespace validation } // namespace test diff --git a/tests/validation/reference/UtilsQuantizedAsymm.h b/tests/validation/reference/UtilsQuantizedAsymm.h index 25873acc93..e5ecc66545 100644 --- a/tests/validation/reference/UtilsQuantizedAsymm.h +++ b/tests/validation/reference/UtilsQuantizedAsymm.h @@ -32,6 +32,22 @@ namespace test { namespace validation { +namespace +{ +#if __clang__ +// This has been tested on clang 7.0.2 (__clang_major__ == 7 && __clang_minor__ == 0 && __clang_patchlevel__ == 2) +inline int64_t to_int64(int32_t val) +{ + return static_cast<int64_t>(val) | ((val < 0) ? (((1ll << 32) - 1) << 32) : 0); +} +#else // __clang__ +inline int64_t to_int64(int32_t val) +{ + return static_cast<int64_t>(val); +} +#endif // __clang__ +} // namespace + /** Rounded to nearest division by a power-of-two. */ inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent) { @@ -40,15 +56,15 @@ inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent) return (x >> exponent) + ((x & mask) > threshold ? 1 : 0); } -/** Multiplication of two integers. The same as ARMv7 Neon VQRDMULH instruction. */ +/** Multiplication of two integers. The same as ARMv7 Arm® Neon™ VQRDMULH instruction. */ inline int32_t asymm_int_mult(int32_t a, int32_t b) { - bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); - int64_t a_64(a); - int64_t b_64(b); - int64_t ab_64 = a_64 * b_64; - int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); - int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); + const bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); + const int64_t a_64 = to_int64(a); + const int64_t b_64 = to_int64(b); + const int64_t ab_64 = a_64 * b_64; + const int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + const int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32; } diff --git a/tests/validation/reference/WarpAffine.cpp b/tests/validation/reference/WarpAffine.cpp deleted file mode 100644 index 3580b75d43..0000000000 --- a/tests/validation/reference/WarpAffine.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "WarpAffine.h" - -#include "Utils.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -bool valid_bilinear_policy(float xn, float yn, int width, int height, BorderMode border_mode) -{ - if(border_mode != BorderMode::UNDEFINED) - { - return true; - } - if((0 <= yn + 1) && (yn + 1 < height) && (0 <= xn + 1) && (xn + 1 < width)) - { - return true; - } - return false; -} - -template <typename T> -SimpleTensor<T> warp_affine(const SimpleTensor<T> &src, SimpleTensor<T> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - - // x0 = M00 * x + M01 * y + M02 - // y0 = M10 * x + M11 * y + M12 - const float M00 = matrix[0]; - const float M10 = matrix[1]; - const float M01 = matrix[0 + 1 * 2]; - const float M11 = matrix[1 + 1 * 2]; - const float M02 = matrix[0 + 2 * 2]; - const float M12 = matrix[1 + 2 * 2]; - - const int width = src.shape().x(); - const int height = src.shape().y(); - - const uint32_t num_elements = src.num_elements(); - for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx) - { - valid_mask[element_idx] = 1; - Coordinates id = index2coord(src.shape(), element_idx); - int idx = id.x(); - int idy = id.y(); - - float x0 = M00 * idx + M01 * idy + M02; - float y0 = M10 * idx + M11 * idy + M12; - - id.set(0, static_cast<int>(std::floor(x0))); - id.set(1, static_cast<int>(std::floor(y0))); - if((0 <= y0) && (y0 < height) && (0 <= x0) && (x0 < width)) - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - dst[element_idx] = tensor_elem_at(src, id, border_mode, constant_border_value); - break; - case InterpolationPolicy::BILINEAR: - (valid_bilinear_policy(x0, y0, width, height, border_mode)) ? dst[element_idx] = bilinear_policy(src, id, x0, y0, border_mode, constant_border_value) : - valid_mask[element_idx] = 0; - break; - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - else - { - if(border_mode == BorderMode::UNDEFINED) - { - valid_mask[element_idx] = 0; - } - else - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - if(border_mode == BorderMode::CONSTANT) - { - dst[element_idx] = constant_border_value; - } - else if(border_mode == BorderMode::REPLICATE) - { - id.set(0, std::max(0, std::min(static_cast<int>(x0), width - 1))); - id.set(1, std::max(0, std::min(static_cast<int>(y0), height - 1))); - dst[element_idx] = src[coord2index(src.shape(), id)]; - } - break; - case InterpolationPolicy::BILINEAR: - dst[element_idx] = bilinear_policy(src, id, x0, y0, border_mode, constant_border_value); - break; - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - } - } - } - } - - return dst; -} - -template SimpleTensor<uint8_t> warp_affine(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, - uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/reference/WarpAffine.h b/tests/validation/reference/WarpAffine.h deleted file mode 100644 index 90f765c283..0000000000 --- a/tests/validation/reference/WarpAffine.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_WARP_AFFINE_H -#define ARM_COMPUTE_TEST_WARP_AFFINE_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> warp_affine(const SimpleTensor<T> &src, SimpleTensor<T> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_WARP_AFFINE_H */ diff --git a/tests/validation/reference/WarpPerspective.cpp b/tests/validation/reference/WarpPerspective.cpp deleted file mode 100644 index e35d75e6e2..0000000000 --- a/tests/validation/reference/WarpPerspective.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/Helpers.h" - -#include "Utils.h" -#include "WarpPerspective.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> warp_perspective(const SimpleTensor<T> &src, SimpleTensor<T> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value) -{ - SimpleTensor<T> dst(src.shape(), src.data_type()); - - // x0 = M00 * x + M01 * y + M02 - // y0 = M10 * x + M11 * y + M12 - // z0 = M20 * x + M21 * y + M22 - // xn = x0 / z0 - // yn = y0 / z0 - const float M00 = matrix[0]; - const float M10 = matrix[1]; - const float M20 = matrix[2]; - const float M01 = matrix[0 + 1 * 3]; - const float M11 = matrix[1 + 1 * 3]; - const float M21 = matrix[2 + 1 * 3]; - const float M02 = matrix[0 + 2 * 3]; - const float M12 = matrix[1 + 2 * 3]; - const float M22 = matrix[2 + 2 * 3]; - - const int width = src.shape().x(); - const int height = src.shape().y(); - - const uint32_t num_elements = src.num_elements(); - for(uint32_t element_idx = 0; element_idx < num_elements; ++element_idx) - { - valid_mask[element_idx] = 1; - Coordinates id = index2coord(src.shape(), element_idx); - const int idx = id.x(); - const int idy = id.y(); - const float z0 = M20 * idx + M21 * idy + M22; - - const float x0 = (M00 * idx + M01 * idy + M02); - const float y0 = (M10 * idx + M11 * idy + M12); - - const float xn = x0 / z0; - const float yn = y0 / z0; - id.set(0, static_cast<int>(std::floor(xn))); - id.set(1, static_cast<int>(std::floor(yn))); - if((0 <= yn) && (yn < height) && (0 <= xn) && (xn < width)) - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - dst[element_idx] = tensor_elem_at(src, id, border_mode, constant_border_value); - break; - case InterpolationPolicy::BILINEAR: - (valid_bilinear_policy(xn, yn, width, height, border_mode)) ? dst[element_idx] = bilinear_policy(src, id, xn, yn, border_mode, constant_border_value) : valid_mask[element_idx] = 0; - break; - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - break; - } - } - else - { - if(border_mode == BorderMode::UNDEFINED) - { - valid_mask[element_idx] = 0; - } - else - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - if(border_mode == BorderMode::CONSTANT) - { - dst[element_idx] = constant_border_value; - } - else if(border_mode == BorderMode::REPLICATE) - { - id.set(0, std::max(0, std::min(static_cast<int>(xn), width - 1))); - id.set(1, std::max(0, std::min(static_cast<int>(yn), height - 1))); - dst[element_idx] = src[coord2index(src.shape(), id)]; - } - break; - case InterpolationPolicy::BILINEAR: - dst[element_idx] = bilinear_policy(src, id, xn, yn, border_mode, constant_border_value); - break; - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - break; - } - } - } - } - return dst; -} - -template SimpleTensor<uint8_t> warp_perspective(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, - uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/WarpPerspective.h b/tests/validation/reference/WarpPerspective.h deleted file mode 100644 index 7fcd5ddf7b..0000000000 --- a/tests/validation/reference/WarpPerspective.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2017-2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_TEST_WARP_PERSPECTIVE_H -#define ARM_COMPUTE_TEST_WARP_PERSPECTIVE_H - -#include "tests/SimpleTensor.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> warp_perspective(const SimpleTensor<T> &src, SimpleTensor<T> &valid_mask, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_WARP_PERSPECTIVE_H */ diff --git a/tests/validation/reference/YOLOLayer.cpp b/tests/validation/reference/YOLOLayer.cpp deleted file mode 100644 index fbc81f1af9..0000000000 --- a/tests/validation/reference/YOLOLayer.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "YOLOLayer.h" - -#include "ActivationLayer.h" - -#include "arm_compute/core/Types.h" -#include "tests/validation/Helpers.h" - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> -SimpleTensor<T> yolo_layer(const SimpleTensor<T> &src, const ActivationLayerInfo &info, int32_t num_classes) -{ - // Create reference - SimpleTensor<T> dst{ src.shape(), src.data_type() }; - - // Compute reference - const T a(info.a()); - const T b(info.b()); - - const uint32_t num_elements = src.num_elements(); -#if defined(_OPENMP) - #pragma omp parallel for -#endif /* _OPENMP */ - for(uint32_t i = 0; i < num_elements; ++i) - { - const size_t z = index2coord(dst.shape(), i).z() % (num_classes + 5); - - if(z != 2 && z != 3) - { - dst[i] = activate_float<T>(src[i], a, b, info.activation()); - } - else - { - dst[i] = src[i]; - } - } - - return dst; -} - -template <> -SimpleTensor<uint8_t> yolo_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const ActivationLayerInfo &info, int32_t num_classes) -{ - SimpleTensor<float> src_tmp = convert_from_asymmetric(src); - SimpleTensor<float> dst_tmp = yolo_layer<float>(src_tmp, info, num_classes); - SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info()); - return dst; -} - -template SimpleTensor<float> yolo_layer(const SimpleTensor<float> &src, const ActivationLayerInfo &info, int32_t num_classes); -template SimpleTensor<half> yolo_layer(const SimpleTensor<half> &src, const ActivationLayerInfo &info, int32_t num_classes); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute |