From ee939fb58e3fc50ae7c92c895f8abd1dd9f20eb3 Mon Sep 17 00:00:00 2001 From: Luca Foschiani Date: Tue, 28 Jan 2020 10:38:07 +0000 Subject: COMPMID-2774: Add support for QASYMM8_SIGNED in NEReductionOperation, NEReduceMean and NEArgMinMaxLayer Signed-off-by: Luca Foschiani Change-Id: Icf198a983c8ce2c6cd8451a1190bb99115eac3af Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2652 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins --- .../core/NEON/kernels/NEReductionOperationKernel.h | 6 +- arm_compute/core/NEON/wrapper/intrinsics/cvt.h | 61 ++++++ .../core/NEON/wrapper/intrinsics/intrinsics.h | 1 + arm_compute/core/Utils.h | 6 + .../runtime/NEON/functions/NEArgMinMaxLayer.h | 6 +- arm_compute/runtime/NEON/functions/NEReduceMean.h | 6 +- .../runtime/NEON/functions/NEReductionOperation.h | 6 +- .../NEON/kernels/NEReductionOperationKernel.cpp | 235 ++++++++++++--------- src/runtime/NEON/functions/NEReduceMean.cpp | 4 +- .../NEON/functions/NEReductionOperation.cpp | 46 +--- tests/validation/NEON/ArgMinMax.cpp | 24 +++ tests/validation/NEON/ReduceMean.cpp | 29 ++- tests/validation/NEON/ReductionOperation.cpp | 39 +++- tests/validation/fixtures/ArgMinMaxFixture.h | 10 +- .../fixtures/ReductionOperationFixture.h | 31 ++- tests/validation/reference/ReductionOperation.cpp | 1 + 16 files changed, 335 insertions(+), 176 deletions(-) create mode 100644 arm_compute/core/NEON/wrapper/intrinsics/cvt.h diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h index 36792abee3..28cca4987b 100644 --- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -59,7 +59,7 @@ public: /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 @@ -69,7 +69,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel. * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cvt.h b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h new file mode 100644 index 0000000000..1f22e09a11 --- /dev/null +++ b/arm_compute/core/NEON/wrapper/intrinsics/cvt.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_WRAPPER_CVT_H +#define ARM_COMPUTE_WRAPPER_CVT_H + +#include + +namespace arm_compute +{ +namespace wrapper +{ +#define VCVT_TO_F32_IMPL(ptype, vtype, prefix, postfix1, postfix2) \ + template \ + inline typename std::enable_if::value, float32x4_t>::type \ + vcvt(const vtype &a) \ + { \ + return prefix##_##postfix1##_##postfix2(a); \ + } + +VCVT_TO_F32_IMPL(float32x4_t, uint32x4_t, vcvtq, f32, u32) +VCVT_TO_F32_IMPL(float32x4_t, int32x4_t, vcvtq, f32, s32) +#undef VCVT_TO_F32_IMPL + +template +inline typename std::enable_if::value, uint32x4_t>::type +vcvt(const float32x4_t &a) +{ + return vcvtq_u32_f32(a); +} + +template +inline typename std::enable_if::value, int32x4_t>::type +vcvt(const float32x4_t &a) +{ + return vcvtq_s32_f32(a); +} + +} // namespace wrapper +} // namespace arm_compute +#endif /* ARM_COMPUTE_WRAPPER_CVT_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h index a7af352c76..51b1fcc1bd 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h @@ -33,6 +33,7 @@ #include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h" #include "arm_compute/core/NEON/wrapper/intrinsics/clt.h" #include "arm_compute/core/NEON/wrapper/intrinsics/combine.h" +#include "arm_compute/core/NEON/wrapper/intrinsics/cvt.h" #include "arm_compute/core/NEON/wrapper/intrinsics/div.h" #include "arm_compute/core/NEON/wrapper/intrinsics/dup_n.h" #include "arm_compute/core/NEON/wrapper/intrinsics/eor.h" diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 7ab78be908..4a3b01d21f 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -596,6 +596,12 @@ inline std::tuple get_min_max(DataType dt) max = PixelValue(std::numeric_limits::max()); break; } + case DataType::F16: + { + min = PixelValue(std::numeric_limits::lowest()); + max = PixelValue(std::numeric_limits::max()); + break; + } case DataType::F32: { min = PixelValue(std::numeric_limits::lowest()); diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index e4a7b94a7a..c50f358d1f 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ public: NEArgMinMaxLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Input source tensor. Data types supported: QASYMM8/S32/F16/F32. + * @param[in] input Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max @@ -62,7 +62,7 @@ public: void configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMaxLayer * - * @param[in] input Input source tensor info. Data types supported: QASYMM8/S32/F16/F32. + * @param[in] input Input source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[in] output Output source tensor info. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h index 69804b1fd8..3c7cc21929 100644 --- a/arm_compute/runtime/NEON/functions/NEReduceMean.h +++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32 * @param[in] reduction_axis Reduction axis vector. * @param[in] keep_dims If positive, retains reduced dimensions with length 1. * @param[out] output Destination tensor. Data type supported: Same as @p input @@ -54,7 +54,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReduceMean * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32 + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32 * @param[in] reduction_axis Reduction axis vector. * @param[in] keep_dims If positive, retains reduced dimensions with length 1. * @param[in] output Destination tensor. Data type supported: Same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index 24142315f4..abda4159ba 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,7 @@ public: NEReductionOperation(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] input Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. @@ -59,7 +59,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] input Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index da82bc2f6f..e2dee67d01 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/SaturateCast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -42,6 +43,22 @@ namespace arm_compute { namespace { +// Helper function that calls vqmovun/vqmvn, vcombine and vstore, allows templating of RedOpYZW_quantized +template +void combine_and_store(int16x8_t t1, int16x8_t t2, Iterator &output) +{ + if(std::is_same::value) + { + auto res = wrapper::vcombine(wrapper::vqmovun(t1), wrapper::vqmovun(t2)); + wrapper::vstore(output.ptr(), res); + } + else + { + auto res = wrapper::vcombine(wrapper::vqmovn(t1), wrapper::vqmovn(t2)); + wrapper::vstore(reinterpret_cast(output.ptr()), res); + } +} + template uint32x4x4_t calculate_index(uint32_t idx, T a, T b, uint32x4x4_t c, ReductionOperation op, int axis) { @@ -65,8 +82,8 @@ uint32x4x4_t calculate_index(uint32_t idx, T a, T b, uint32x4x4_t c, ReductionOp return res; } -template <> -uint32x4x4_t calculate_index(uint32_t idx, uint8x16_t a, uint8x16_t b, uint32x4x4_t c, ReductionOperation op, int axis) +template +uint32x4x4_t calculate_index_quantized(uint32_t idx, T a, T b, uint32x4x4_t c, ReductionOperation op, int axis) { uint32x4x4_t mask{ { 0 } }; uint8x16_t mask_u8{ 0 }; @@ -112,32 +129,49 @@ uint32x4x4_t calculate_index(uint32_t idx, uint8x16_t a, uint8x16_t b, uint32x4x } // Helper function to calculate the minimum value of the input vector. All the elements in the output vector contain the min value. -float32x2_t calculate_min(float32x4_t in) +template +inline typename std::enable_if < std::is_same::value || std::is_same::value, + typename std::conditional::value, float32x2_t, int32x2_t>::type >::type + calculate_min(T in) { auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); return wrapper::vpmin(pmin, pmin); } -// Helper function to calculate the maximum value of the input vector. All the elements in the output vector contain the max value. -float32x2_t calculate_max(float32x4_t in) -{ - auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); - return wrapper::vpmax(pmax, pmax); -} // Helper function to calculate the minimum value of the input vector. All the elements in the output vector contain the min value. -int32x2_t calculate_min(int32x4_t in) +template +inline typename std::enable_if < std::is_same::value || std::is_same::value, + typename std::conditional::value, uint8x8_t, int8x8_t>::type >::type + calculate_min(T in) { auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); + pmin = wrapper::vpmin(pmin, pmin); + pmin = wrapper::vpmin(pmin, pmin); return wrapper::vpmin(pmin, pmin); } // Helper function to calculate the maximum value of the input vector. All the elements in the output vector contain the max value. -int32x2_t calculate_max(int32x4_t in) +template +inline typename std::enable_if < std::is_same::value || std::is_same::value, + typename std::conditional::value, float32x2_t, int32x2_t>::type >::type + calculate_max(T in) { auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); return wrapper::vpmax(pmax, pmax); } +// Helper function to calculate the maximum value of the input vector. All the elements in the output vector contain the max value. +template +inline typename std::enable_if < std::is_same::value || std::is_same::value, + typename std::conditional::value, uint8x8_t, int8x8_t>::type >::type + calculate_max(T in) +{ + auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); + pmax = wrapper::vpmax(pmax, pmax); + pmax = wrapper::vpmax(pmax, pmax); + return wrapper::vpmax(pmax, pmax); +} + template uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, T vec_res_value, ReductionOperation op) { @@ -165,25 +199,8 @@ uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, T vec_res_value, Reduc return (res - 0xFFFFFFFF); } -// Helper function to calculate the minimum value of the input vector. All the elements in the output vector contain the min value. -inline uint8x8_t calculate_min(uint8x16_t in) -{ - auto pmin = wrapper::vpmin(wrapper::vgethigh(in), wrapper::vgetlow(in)); - pmin = wrapper::vpmin(pmin, pmin); - pmin = wrapper::vpmin(pmin, pmin); - return wrapper::vpmin(pmin, pmin); -} -// Helper function to calculate the maximum value of the input vector. All the elements in the output vector contain the max value. -inline uint8x8_t calculate_max(uint8x16_t in) -{ - auto pmax = wrapper::vpmax(wrapper::vgethigh(in), wrapper::vgetlow(in)); - pmax = wrapper::vpmax(pmax, pmax); - pmax = wrapper::vpmax(pmax, pmax); - return wrapper::vpmax(pmax, pmax); -} - -template <> -uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, uint8x16_t vec_res_value, ReductionOperation op) +template +uint32_t calculate_vector_index_quantized(uint32x4x4_t vec_res_idx, T vec_res_value, ReductionOperation op) { uint32x4x4_t res_idx_mask{ { 0 } }; uint32x4_t mask_ones = vdupq_n_u32(0xFFFFFFFF); @@ -228,6 +245,7 @@ uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, uint8x16_t vec_res_val return (res - 0xFFFFFFFF); } + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template <> uint32x4x4_t calculate_index(uint32_t idx, float16x8_t a, float16x8_t b, uint32x4x4_t c, ReductionOperation op, int axis) @@ -540,35 +558,38 @@ struct RedOpX } }; -struct RedOpX_qasymm8 +template +struct RedOpX_quantized { inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op) { ARM_COMPUTE_UNUSED(out_slice); + using PromotedType = typename wrapper::traits::promote::type>::type; + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); - auto vec_res_value1 = vdupq_n_u32(static_cast(0.f)); - auto vec_res_value2 = vdupq_n_u32(static_cast(0.f)); - auto vec_res_value3 = vdupq_n_u32(static_cast(0.f)); - auto vec_res_value4 = vdupq_n_u32(static_cast(0.f)); + auto vec_res_value1 = wrapper::vdup_n(static_cast(0.f), wrapper::traits::vector_128_tag{}); + auto vec_res_value2 = wrapper::vdup_n(static_cast(0.f), wrapper::traits::vector_128_tag{}); + auto vec_res_value3 = wrapper::vdup_n(static_cast(0.f), wrapper::traits::vector_128_tag{}); + auto vec_res_value4 = wrapper::vdup_n(static_cast(0.f), wrapper::traits::vector_128_tag{}); auto vec_res_value1_f = vdupq_n_f32(static_cast(1.f)); auto vec_res_value2_f = vdupq_n_f32(static_cast(1.f)); auto vec_res_value3_f = vdupq_n_f32(static_cast(1.f)); auto vec_res_value4_f = vdupq_n_f32(static_cast(1.f)); - uint8x16_t vec_res_value = { 0 }; + typename wrapper::traits::neon_vector::type vec_res_value = { 0 }; if(op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::MIN || op == ReductionOperation::MAX) { - vec_res_value = wrapper::vdup_n(*input.ptr(), wrapper::traits::vector_128_tag{}); + vec_res_value = wrapper::vdup_n(*reinterpret_cast(input.ptr()), wrapper::traits::vector_128_tag{}); } uint32x4x4_t vec_res_idx{ { 0 } }; execute_window_loop(in_slice, [&](const Coordinates & id) { - const auto vec_elements = wrapper::vloadq(input.ptr()); + const auto vec_elements = wrapper::vloadq(reinterpret_cast(input.ptr())); switch(op) { case ReductionOperation::SUM: @@ -593,18 +614,18 @@ struct RedOpX_qasymm8 const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); - const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); - const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); + const auto temp16x8t_1 = wrapper::vmovl(wrapper::vgetlow(vec_elements)); + const auto temp16x8t_2 = wrapper::vmovl(wrapper::vgethigh(vec_elements)); - const auto temp32x4t_1 = vmovl_u16(vget_low_u16(temp16x8t_1)); - const auto temp32x4t_2 = vmovl_u16(vget_high_u16(temp16x8t_1)); - const auto temp32x4t_3 = vmovl_u16(vget_low_u16(temp16x8t_2)); - const auto temp32x4t_4 = vmovl_u16(vget_high_u16(temp16x8t_2)); + const auto temp32x4t_1 = wrapper::vmovl(wrapper::vgetlow(temp16x8t_1)); + const auto temp32x4t_2 = wrapper::vmovl(wrapper::vgethigh(temp16x8t_1)); + const auto temp32x4t_3 = wrapper::vmovl(wrapper::vgetlow(temp16x8t_2)); + const auto temp32x4t_4 = wrapper::vmovl(wrapper::vgethigh(temp16x8t_2)); - auto temp32x4f_1 = vcvtq_f32_u32(temp32x4t_1); - auto temp32x4f_2 = vcvtq_f32_u32(temp32x4t_2); - auto temp32x4f_3 = vcvtq_f32_u32(temp32x4t_3); - auto temp32x4f_4 = vcvtq_f32_u32(temp32x4t_4); + auto temp32x4f_1 = wrapper::vcvt(temp32x4t_1); + auto temp32x4f_2 = wrapper::vcvt(temp32x4t_2); + auto temp32x4f_3 = wrapper::vcvt(temp32x4t_3); + auto temp32x4f_4 = wrapper::vcvt(temp32x4t_4); //de-quantize vec_elements temp32x4f_1 = vmulq_f32(vsubq_f32(temp32x4f_1, offset32x4f_4), scale32x4f_4); @@ -621,14 +642,14 @@ struct RedOpX_qasymm8 case ReductionOperation::ARG_IDX_MIN: { auto temp_vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - vec_res_idx = calculate_index(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); + vec_res_idx = calculate_index_quantized(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); vec_res_value = temp_vec_res_value; break; } case ReductionOperation::ARG_IDX_MAX: { auto temp_vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - vec_res_idx = calculate_index(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); + vec_res_idx = calculate_index_quantized(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); vec_res_value = temp_vec_res_value; break; } @@ -653,18 +674,18 @@ struct RedOpX_qasymm8 case ReductionOperation::ARG_IDX_MIN: case ReductionOperation::ARG_IDX_MAX: { - auto res = calculate_vector_index(vec_res_idx, vec_res_value, op); - *(reinterpret_cast(output.ptr())) = res; + auto res = calculate_vector_index_quantized(vec_res_idx, vec_res_value, op); + *(reinterpret_cast(output.ptr())) = res; break; } case ReductionOperation::MIN: { - *(output.ptr()) = static_cast(wrapper::vgetlane(calculate_min(vec_res_value), 0)); + *(output.ptr()) = static_cast(wrapper::vgetlane(calculate_min(vec_res_value), 0)); break; } case ReductionOperation::MAX: { - *(output.ptr()) = static_cast(wrapper::vgetlane(calculate_max(vec_res_value), 0)); + *(output.ptr()) = static_cast(wrapper::vgetlane(calculate_max(vec_res_value), 0)); break; } case ReductionOperation::PROD: @@ -679,8 +700,16 @@ struct RedOpX_qasymm8 res *= wrapper::vgetlane(carry_res, 3); //re-quantize result - res = quantize_qasymm8(res, iq_info); - *(output.ptr()) = static_cast(res); + if(std::is_same::value) + { + res = quantize_qasymm8(res, iq_info); + } + else + { + res = quantize_qasymm8_signed(res, iq_info); + } + + *reinterpret_cast(output.ptr()) = static_cast(res); break; } default: @@ -703,7 +732,7 @@ struct RedOpX_qasymm8 res -= (in_info.dimension(0) - 1) * iq_info.offset; } - *(output.ptr()) = utils::cast::saturate_cast(res); + *reinterpret_cast(output.ptr()) = utils::cast::saturate_cast(res); } } } @@ -873,33 +902,36 @@ struct RedOpYZW_complex } }; -struct RedOpYZW_qasymm8 +template +struct RedOpYZW_quantized { inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, int axis, const ReductionOperation op) { ARM_COMPUTE_UNUSED(out_slice); + using PromotedType = typename wrapper::traits::promote::type>::type; + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); execute_window_loop(in_slice, [&](const Coordinates &) { uint32x4x4_t vec_res_idx{ { 0 } }; - auto vec_res_value1 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); - auto vec_res_value2 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); - auto vec_res_value3 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); - auto vec_res_value4 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); + auto vec_res_value1 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); + auto vec_res_value2 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); + auto vec_res_value3 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); + auto vec_res_value4 = wrapper::vdup_n(static_cast(0), wrapper::traits::vector_128_tag{}); auto vec_res_value1_f = wrapper::vdup_n(static_cast(1), wrapper::traits::vector_128_tag{}); auto vec_res_value2_f = wrapper::vdup_n(static_cast(1), wrapper::traits::vector_128_tag{}); auto vec_res_value3_f = wrapper::vdup_n(static_cast(1), wrapper::traits::vector_128_tag{}); auto vec_res_value4_f = wrapper::vdup_n(static_cast(1), wrapper::traits::vector_128_tag{}); - auto vec_res_value = wrapper::vloadq(input.ptr()); + auto vec_res_value = wrapper::vloadq(reinterpret_cast(input.ptr())); for(unsigned int index_dim = 0; index_dim < in_info.dimension(axis); ++index_dim) { - const uint8_t *in_ptr = input.ptr() + in_info.strides_in_bytes()[axis] * index_dim; - const auto vec_elements = wrapper::vloadq(in_ptr); + const T *in_ptr = reinterpret_cast(input.ptr()) + in_info.strides_in_bytes()[axis] * index_dim; + const auto vec_elements = wrapper::vloadq(in_ptr); switch(op) { case ReductionOperation::SUM: @@ -932,10 +964,10 @@ struct RedOpYZW_qasymm8 const auto temp32x4t_3 = wrapper::vmovl(wrapper::vgetlow(temp16x8t_2)); const auto temp32x4t_4 = wrapper::vmovl(wrapper::vgethigh(temp16x8t_2)); - auto temp32x4f_1 = vcvtq_f32_u32(temp32x4t_1); - auto temp32x4f_2 = vcvtq_f32_u32(temp32x4t_2); - auto temp32x4f_3 = vcvtq_f32_u32(temp32x4t_3); - auto temp32x4f_4 = vcvtq_f32_u32(temp32x4t_4); + auto temp32x4f_1 = wrapper::vcvt(temp32x4t_1); + auto temp32x4f_2 = wrapper::vcvt(temp32x4t_2); + auto temp32x4f_3 = wrapper::vcvt(temp32x4t_3); + auto temp32x4f_4 = wrapper::vcvt(temp32x4t_4); //de-quantize vec_elements temp32x4f_1 = wrapper::vmul(wrapper::vsub(temp32x4f_1, offset32x4f_4), scale32x4f_4); @@ -952,14 +984,14 @@ struct RedOpYZW_qasymm8 case ReductionOperation::ARG_IDX_MIN: { auto temp_vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - vec_res_idx = calculate_index(index_dim, temp_vec_res_value, vec_res_value, vec_res_idx, op, axis); + vec_res_idx = calculate_index_quantized(index_dim, temp_vec_res_value, vec_res_value, vec_res_idx, op, axis); vec_res_value = temp_vec_res_value; break; } case ReductionOperation::ARG_IDX_MAX: { auto temp_vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - vec_res_idx = calculate_index(index_dim, temp_vec_res_value, vec_res_value, vec_res_idx, op, axis); + vec_res_idx = calculate_index_quantized(index_dim, temp_vec_res_value, vec_res_value, vec_res_idx, op, axis); vec_res_value = temp_vec_res_value; break; } @@ -981,15 +1013,15 @@ struct RedOpYZW_qasymm8 if(op == ReductionOperation::MEAN_SUM) { const auto vec_width_inv = wrapper::vinv(wrapper::vdup_n(static_cast(in_info.dimension(axis)), wrapper::traits::vector_128_tag{})); - vec_res_value1_f = wrapper::vmul(vcvtq_f32_u32(vec_res_value1), vec_width_inv); - vec_res_value2_f = wrapper::vmul(vcvtq_f32_u32(vec_res_value2), vec_width_inv); - vec_res_value3_f = wrapper::vmul(vcvtq_f32_u32(vec_res_value3), vec_width_inv); - vec_res_value4_f = wrapper::vmul(vcvtq_f32_u32(vec_res_value4), vec_width_inv); - - vec_res_value1 = vcvtq_u32_f32(vec_res_value1_f); - vec_res_value2 = vcvtq_u32_f32(vec_res_value2_f); - vec_res_value3 = vcvtq_u32_f32(vec_res_value3_f); - vec_res_value4 = vcvtq_u32_f32(vec_res_value4_f); + vec_res_value1_f = wrapper::vmul(wrapper::vcvt(vec_res_value1), vec_width_inv); + vec_res_value2_f = wrapper::vmul(wrapper::vcvt(vec_res_value2), vec_width_inv); + vec_res_value3_f = wrapper::vmul(wrapper::vcvt(vec_res_value3), vec_width_inv); + vec_res_value4_f = wrapper::vmul(wrapper::vcvt(vec_res_value4), vec_width_inv); + + vec_res_value1 = wrapper::vcvt(vec_res_value1_f); + vec_res_value2 = wrapper::vcvt(vec_res_value2_f); + vec_res_value3 = wrapper::vcvt(vec_res_value3_f); + vec_res_value4 = wrapper::vcvt(vec_res_value4_f); } else if(op == ReductionOperation::PROD) { @@ -1002,10 +1034,10 @@ struct RedOpYZW_qasymm8 vec_res_value3_f = wrapper::vadd(wrapper::vmul(vec_res_value3_f, iscale32x4f_4), offset32x4f_4); vec_res_value4_f = wrapper::vadd(wrapper::vmul(vec_res_value4_f, iscale32x4f_4), offset32x4f_4); - vec_res_value1 = vcvtq_u32_f32(vec_res_value1_f); - vec_res_value2 = vcvtq_u32_f32(vec_res_value2_f); - vec_res_value3 = vcvtq_u32_f32(vec_res_value3_f); - vec_res_value4 = vcvtq_u32_f32(vec_res_value4_f); + vec_res_value1 = wrapper::vcvt(vec_res_value1_f); + vec_res_value2 = wrapper::vcvt(vec_res_value2_f); + vec_res_value3 = wrapper::vcvt(vec_res_value3_f); + vec_res_value4 = wrapper::vcvt(vec_res_value4_f); } if(op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX) @@ -1017,7 +1049,7 @@ struct RedOpYZW_qasymm8 } else if(op == ReductionOperation::ARG_IDX_MIN) { - wrapper::vstore(output.ptr(), vec_res_value); + wrapper::vstore(reinterpret_cast(output.ptr()), vec_res_value); } else { @@ -1026,10 +1058,10 @@ struct RedOpYZW_qasymm8 // Subtract offsets auto offsets = vdupq_n_s32((in_info.dimension(axis) - 1) * iq_info.offset); - auto vec_res_s_value1 = vreinterpretq_s32_u32(vec_res_value1); - auto vec_res_s_value2 = vreinterpretq_s32_u32(vec_res_value2); - auto vec_res_s_value3 = vreinterpretq_s32_u32(vec_res_value3); - auto vec_res_s_value4 = vreinterpretq_s32_u32(vec_res_value4); + auto vec_res_s_value1 = wrapper::vreinterpret(vec_res_value1); + auto vec_res_s_value2 = wrapper::vreinterpret(vec_res_value2); + auto vec_res_s_value3 = wrapper::vreinterpret(vec_res_value3); + auto vec_res_s_value4 = wrapper::vreinterpret(vec_res_value4); vec_res_s_value1 = wrapper::vsub(vec_res_s_value1, offsets); vec_res_s_value2 = wrapper::vsub(vec_res_s_value2, offsets); @@ -1038,15 +1070,16 @@ struct RedOpYZW_qasymm8 const auto temp16x8t_1 = wrapper::vcombine(wrapper::vqmovn(vec_res_s_value1), wrapper::vqmovn(vec_res_s_value2)); const auto temp16x8t_2 = wrapper::vcombine(wrapper::vqmovn(vec_res_s_value3), wrapper::vqmovn(vec_res_s_value4)); - auto res = wrapper::vcombine(wrapper::vqmovun(temp16x8t_1), wrapper::vqmovun(temp16x8t_2)); - wrapper::vstore(output.ptr(), res); + + combine_and_store(temp16x8t_1, temp16x8t_2, output); } else { const auto temp16x8t_1 = wrapper::vcombine(wrapper::vqmovn(vec_res_value1), wrapper::vqmovn(vec_res_value2)); const auto temp16x8t_2 = wrapper::vcombine(wrapper::vqmovn(vec_res_value3), wrapper::vqmovn(vec_res_value4)); auto res = wrapper::vcombine(wrapper::vqmovn(temp16x8t_1), wrapper::vqmovn(temp16x8t_2)); - wrapper::vstore(output.ptr(), res); + + wrapper::vstore(reinterpret_cast(output.ptr()), res); } } @@ -1088,7 +1121,9 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi switch(input->info()->data_type()) { case DataType::QASYMM8: - return Reducer::reduceX(window, input, output, RedOpX_qasymm8(), op); + return Reducer>::reduceX(window, input, output, RedOpX_quantized(), op); + case DataType::QASYMM8_SIGNED: + return Reducer>::reduceX(window, input, output, RedOpX_quantized(), op); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: return Reducer>::reduceX(window, input, output, RedOpX(), op); @@ -1104,7 +1139,9 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi switch(input->info()->data_type()) { case DataType::QASYMM8: - return Reducer::reduceY(window, input, output, RedOpYZW_qasymm8(), op); + return Reducer>::reduceY(window, input, output, RedOpYZW_quantized(), op); + case DataType::QASYMM8_SIGNED: + return Reducer>::reduceY(window, input, output, RedOpYZW_quantized(), op); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: return Reducer>::reduceY(window, input, output, RedOpYZW(), op); @@ -1120,7 +1157,9 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi switch(input->info()->data_type()) { case DataType::QASYMM8: - return Reducer::reduceZ(window, input, output, RedOpYZW_qasymm8(), op); + return Reducer>::reduceZ(window, input, output, RedOpYZW_quantized(), op); + case DataType::QASYMM8_SIGNED: + return Reducer>::reduceZ(window, input, output, RedOpYZW_quantized(), op); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: return Reducer>::reduceZ(window, input, output, RedOpYZW(), op); @@ -1136,7 +1175,9 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi switch(input->info()->data_type()) { case DataType::QASYMM8: - return Reducer::reduceW(window, input, output, RedOpYZW_qasymm8(), op); + return Reducer>::reduceW(window, input, output, RedOpYZW_quantized(), op); + case DataType::QASYMM8_SIGNED: + return Reducer>::reduceW(window, input, output, RedOpYZW_quantized(), op); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: return Reducer>::reduceW(window, input, output, RedOpYZW(), op); @@ -1162,7 +1203,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u if(input->num_channels() == 1) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32); } else { diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp index 72c63a8e30..5c936af562 100644 --- a/src/runtime/NEON/functions/NEReduceMean.cpp +++ b/src/runtime/NEON/functions/NEReduceMean.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ Status validate_config(const ITensorInfo *input, const Coordinates &reduction_ax ARM_COMPUTE_UNUSED(keep_dims); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() < 1); ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions()); diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 9d29ee007c..80ebe6731a 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -142,54 +142,12 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i } case ReductionOperation::MIN: { - switch(input->info()->data_type()) - { - case DataType::F32: - { - pixelValue = PixelValue(std::numeric_limits::max()); - break; - } - case DataType::F16: - { - pixelValue = PixelValue(static_cast(65504.0f)); - break; - } - case DataType::QASYMM8: - { - pixelValue = std::get<1>(get_min_max(input->info()->data_type())); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported DataType"); - } - } + pixelValue = std::get<1>(get_min_max(input->info()->data_type())); break; } case ReductionOperation::MAX: { - switch(input->info()->data_type()) - { - case DataType::F32: - { - pixelValue = PixelValue(-std::numeric_limits::max()); - break; - } - case DataType::F16: - { - pixelValue = PixelValue(static_cast(-65504.0f)); - break; - } - case DataType::QASYMM8: - { - pixelValue = std::get<0>(get_min_max(input->info()->data_type())); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported DataType"); - } - } + pixelValue = std::get<0>(get_min_max(input->info()->data_type())); break; } case ReductionOperation::ARG_IDX_MAX: diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp index ec90ab0db0..e7ab4a4bbf 100644 --- a/tests/validation/NEON/ArgMinMax.cpp +++ b/tests/validation/NEON/ArgMinMax.cpp @@ -163,6 +163,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(Accessor(_target), _reference); } TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEArgMinMaxQuantizedValidationFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), + framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, + NEArgMinMaxQuantizedValidationFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), + framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 127.f, 20) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // ArgMinMax TEST_SUITE_END() // NEON } // namespace validation diff --git a/tests/validation/NEON/ReduceMean.cpp b/tests/validation/NEON/ReduceMean.cpp index 782b97220e..fade3613da 100644 --- a/tests/validation/NEON/ReduceMean.cpp +++ b/tests/validation/NEON/ReduceMean.cpp @@ -44,9 +44,9 @@ namespace { constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -constexpr AbsoluteTolerance tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ -#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric quantized type */ +constexpr AbsoluteTolerance tolerance_f16(0.03f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +constexpr AbsoluteTolerance tolerance_quantized(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric quantized type */ const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(0, 1, 2, 3) }), framework::dataset::make("KeepDims", { true })); @@ -162,7 +162,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 5) }))) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_quantized); } FIXTURE_DATA_TEST_CASE(RunLarge, @@ -171,9 +171,28 @@ FIXTURE_DATA_TEST_CASE(RunLarge, combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 5) }))) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_quantized); } TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEReduceMeanQuantizedFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 127, 0) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + NEReduceMeanQuantizedFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), concat(axis_keep, axis_drop)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 127, 0) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized); +} +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() // ReduceMean TEST_SUITE_END() // NEON diff --git a/tests/validation/NEON/ReductionOperation.cpp b/tests/validation/NEON/ReductionOperation.cpp index 93f1a80735..cd96a6abcc 100644 --- a/tests/validation/NEON/ReductionOperation.cpp +++ b/tests/validation/NEON/ReductionOperation.cpp @@ -44,9 +44,11 @@ namespace { /** Tolerance for float operations */ AbsoluteTolerance tolerance_f32(0.0001f); -RelativeTolerance rel_tolerance_f32(0.00001f); +RelativeTolerance rel_tolerance_f32(0.0001f); +AbsoluteTolerance tolerance_f16(0.1f); +RelativeTolerance rel_tolerance_f16(0.1f); /** Tolerance for quantized operations */ -RelativeTolerance tolerance_qasymm8(1); +RelativeTolerance tolerance_quantized(1.f); const auto ReductionOperations = framework::dataset::make("ReductionOperation", { @@ -58,7 +60,7 @@ const auto ReductionOperations = framework::dataset::make("ReductionOperation", const auto QuantizationInfos = framework::dataset::make("QuantizationInfo", { - QuantizationInfo(1.f / 128, 10), + QuantizationInfo(1.f / 117, 10), // Numbers chosen so that the quantized values are in range of qasymm8_signed data type QuantizationInfo(1.f / 64, 5), QuantizationInfo(1.f / 32, 2) }); @@ -123,6 +125,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture, framework:: } TEST_SUITE_END() // FP32 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), Axises), ReductionOperations), KeepDims)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F16)), Axises), ReductionOperations), KeepDims)) +{ + // Validate output + validate(Accessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16); +} +TEST_SUITE_END() // FP16 +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + template using NEReductionOperationQuantizedFixture = ReductionOperationQuantizedFixture; @@ -134,10 +153,22 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationQuantizedFixture, KeepDims)) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8); + validate(Accessor(_target), _reference, tolerance_quantized); } TEST_SUITE_END() // QASYMM8 +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationQuantizedFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), Axises), + ReductionOperations), + QuantizationInfos), + KeepDims)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized); +} +TEST_SUITE_END() // QASYMM8_SIGNED + TEST_SUITE_END() // ReductionOperation TEST_SUITE_END() // NEON } // namespace validation diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h index a4d03fba02..2932ba4508 100644 --- a/tests/validation/fixtures/ArgMinMaxFixture.h +++ b/tests/validation/fixtures/ArgMinMaxFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -80,6 +80,14 @@ protected: library->fill(tensor, distribution, 0); break; } + case DataType::QASYMM8_SIGNED: + { + std::pair bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution distribution(bounds.first, bounds.second); + + library->fill(tensor, distribution, 0); + break; + } default: ARM_COMPUTE_ERROR("DataType for Elementwise Negation Not implemented"); } diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h index 2802cd4c0a..a93bf49afd 100644 --- a/tests/validation/fixtures/ReductionOperationFixture.h +++ b/tests/validation/fixtures/ReductionOperationFixture.h @@ -61,22 +61,31 @@ protected: template void fill(U &&tensor) { - if(tensor.data_type() == DataType::QASYMM8) + if(!is_data_type_quantized(tensor.data_type())) { - std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution distribution(bounds.first, bounds.second); - library->fill(tensor, distribution, 0); - } - else if(tensor.data_type() == DataType::QASYMM8_SIGNED) - { - std::pair bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution distribution(bounds.first, bounds.second); + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); library->fill(tensor, distribution, 0); } else { - std::uniform_real_distribution<> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, 0); + if(tensor.data_type() == DataType::QASYMM8) + { + std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution distribution(bounds.first, bounds.second); + + library->fill(tensor, distribution, 0); + } + else if(tensor.data_type() == DataType::QASYMM8_SIGNED) + { + std::pair bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution distribution(bounds.first, bounds.second); + + library->fill(tensor, distribution, 0); + } + else + { + ARM_COMPUTE_ERROR("Not supported"); + } } } diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp index a0b6a8eaed..68352cc645 100644 --- a/tests/validation/reference/ReductionOperation.cpp +++ b/tests/validation/reference/ReductionOperation.cpp @@ -327,6 +327,7 @@ template SimpleTensor reduction_operation(const SimpleTensor &sr template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); +template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); } // namespace reference } // namespace validation -- cgit v1.2.1