From b9626ab169a168a7c1ca57edd1996e1e80938bf1 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Mon, 13 May 2019 17:41:01 +0100 Subject: COMPMID-2243 ArgMinMaxLayer: support new datatypes Change-Id: I846e833e0c94090cbbdcd6aee6061cea8295f4f9 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/1131 Reviewed-by: Giuseppe Rossini Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../core/CL/kernels/CLReductionOperationKernel.h | 6 +- arm_compute/core/NEON/wrapper/intrinsics/inv.h | 10 +++- .../runtime/NEON/functions/NEArgMinMaxLayer.h | 4 +- src/core/CL/cl_kernels/reduction_operation.cl | 67 +++++++++++++--------- src/core/CL/kernels/CLReductionOperationKernel.cpp | 6 +- .../NEON/kernels/NEReductionOperationKernel.cpp | 25 +++++--- tests/validation/CL/ArgMinMax.cpp | 13 ++++- tests/validation/NEON/ArgMinMax.cpp | 21 +++++++ tests/validation/fixtures/ArgMinMaxFixture.h | 34 +++++++---- tests/validation/reference/ReductionOperation.cpp | 1 + 10 files changed, 133 insertions(+), 54 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h index b5b90a15ce..aba11e1ad1 100644 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor. Data types supported: QASYMM8/S32/F16/F32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 @@ -61,7 +61,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/S32/F16/F32. * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 diff --git a/arm_compute/core/NEON/wrapper/intrinsics/inv.h b/arm_compute/core/NEON/wrapper/intrinsics/inv.h index a86a9d4671..acb2c91feb 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/inv.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/inv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,12 +37,20 @@ namespace wrapper return prefix##_##postfix(a); \ } +#define VINV_IMPL_INT(vtype, prefix, postfix) \ + inline vtype vinv(const vtype &a) \ + { \ + ARM_COMPUTE_ERROR("Not supported"); \ + } + VINV_IMPL(float32x2_t, vinv, f32) +VINV_IMPL_INT(int32x2_t, vinv, s32) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VINV_IMPL(float16x4_t, vinv, f16) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VINV_IMPL(float32x4_t, vinvq, f32) +VINV_IMPL_INT(int32x4_t, vinvq, s32) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC VINV_IMPL(float16x8_t, vinvq, f16) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index 87d77a5e13..55b39e45ec 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -48,7 +48,7 @@ public: NEArgMinMaxLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Input source tensor. Data types supported: F16/F32. + * @param[in] input Input source tensor. Data types supported: QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32. * @param[in] op Operation to perform: min or max @@ -56,7 +56,7 @@ public: void configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMaxLayer * - * @param[in] input Input source tensor info. Data types supported: F16/F32. + * @param[in] input Input source tensor info. Data types supported: QASYMM8/S32/F16/F32. * @param[in] axis Axis to find max/min index. * @param[in] output Output source tensor info. Data types supported: U32. * @param[in] op Operation to perform: min or max diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl index 2651123cf5..749e3cdaa3 100644 --- a/src/core/CL/cl_kernels/reduction_operation.cl +++ b/src/core/CL/cl_kernels/reduction_operation.cl @@ -23,6 +23,19 @@ */ #include "helpers.h" +#if FLOAT_DATA_TYPE +#define ISGREATER(x, y) isgreater(x, y) +#define ISLESS(x, y) isless(x, y) +#else // !FLOAT_DATA_TYPE +#if defined(WIDTH) +#define ISGREATER(x, y) (x > y) ? 1 : 0 +#define ISLESS(x, y) (x < y) ? 1 : 0 +#else // !defined(WIDTH) +#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y) +#define ISLESS(x, y) select((int16)0, (int16)-1, x < y) +#endif // defined(WIDTH) +#endif // FLOAT_DATA_TYPE + /** Calculate square sum of a vector * * @param[in] input Pointer to the first pixel. @@ -124,9 +137,9 @@ __kernel void reduction_operation_x( { #if defined(PROD) local_results[lid] *= local_results[lid + i]; -#else //!defined(PROD) +#else // !defined(PROD) local_results[lid] += local_results[lid + i]; -#endif //defined(PROD) +#endif // defined(PROD) } barrier(CLK_LOCAL_MEM_FENCE); } @@ -138,7 +151,7 @@ __kernel void reduction_operation_x( { local_results[0] /= WIDTH; } -#endif /* defined(MEAN) && defined(WIDTH) */ +#endif // defined(MEAN) && defined(WIDTH) ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0]; } } @@ -153,7 +166,7 @@ __kernel void reduction_operation_x( * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used * @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 and QASYMM8 for operation MEAN + * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor @@ -179,11 +192,11 @@ __kernel void reduction_operation_non_parallel_x( { DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x)); #if defined(ARG_MAX) - indx = select(indx, x, isgreater(in, res)); - res = select(res, in, CONVERT(isgreater(in, res), COND_DATA_TYPE)); + indx = select(indx, x, ISGREATER(in, res)); + res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE)); #elif defined(ARG_MIN) - indx = select(indx, x, isless(in, res)); - res = select(res, in, CONVERT(isless(in, res), COND_DATA_TYPE)); + indx = select(indx, x, ISLESS(in, res)); + res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) res += in; #endif // defined(ARG_MAX) || defined(ARG_MIN) @@ -199,7 +212,7 @@ __kernel void reduction_operation_non_parallel_x( *((__global uchar *)output.ptr) = convert_uchar(res); #endif // defined(ARG_MAX) || defined(ARG_MIN) } -#endif /* defined(WIDTH) */ +#endif // defined(WIDTH) #if defined(HEIGHT) /** This kernel performs reduction on y-axis. @@ -207,7 +220,7 @@ __kernel void reduction_operation_non_parallel_x( * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128 * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -243,22 +256,22 @@ __kernel void reduction_operation_y( VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16) in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); #if defined(ARG_MAX) - uint16 cond_conv = CONVERT(isgreater(in, res), uint16); + uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16); indx = select(indx, y, cond_conv); - res = select(res, in, isgreater(in, res)); + res = select(res, in, ISGREATER(in, res)); #elif defined(ARG_MIN) - uint16 cond_conv = CONVERT(isless(in, res), uint16); + uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, y, cond_conv); - res = select(res, in, isless(in, res)); + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; #endif // defined(SUM_SQUARE) #if defined(PROD) res *= in; -#else //!defined(PROD) +#else // !defined(PROD) res += in; -#endif //defined(PROD) +#endif // defined(PROD) #endif // defined(ARG_MAX) || defined(ARG_MIN) } @@ -272,7 +285,7 @@ __kernel void reduction_operation_y( vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr); #endif // defined(ARG_MAX) || defined(ARG_MIN) } -#endif /* defined(HEIGHT) */ +#endif // defined(HEIGHT) #if defined(DEPTH) /** This kernel performs reduction on z-axis. @@ -280,7 +293,7 @@ __kernel void reduction_operation_y( * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -330,13 +343,13 @@ __kernel void reduction_operation_z( #endif // defined(COMPLEX) #if defined(ARG_MAX) - uint16 cond_conv = CONVERT(isgreater(in, res), uint16); + uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16); indx = select(indx, z, cond_conv); - res = select(res, in, isgreater(in, res)); + res = select(res, in, ISGREATER(in, res)); #elif defined(ARG_MIN) - uint16 cond_conv = CONVERT(isless(in, res), uint16); + uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, z, cond_conv); - res = select(res, in, isless(in, res)); + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; @@ -374,7 +387,7 @@ __kernel void reduction_operation_z( * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -419,13 +432,13 @@ __kernel void reduction_operation_w( in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); #if defined(ARG_MAX) - uint16 cond_conv = CONVERT(isgreater(in, res), uint16); + uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16); indx = select(indx, w, cond_conv); - res = select(res, in, isgreater(in, res)); + res = select(res, in, ISGREATER(in, res)); #elif defined(ARG_MIN) - uint16 cond_conv = CONVERT(isless(in, res), uint16); + uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, w, cond_conv); - res = select(res, in, isless(in, res)); + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index db4850f14e..cb57070612 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -49,7 +49,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); if(input->num_channels() == 1) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32); } else { @@ -160,8 +160,10 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou { data_type_promoted = "uint"; } + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE_PROMOTED=" + data_type_promoted); + build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DFLOAT_DATA_TYPE"); build_opts.add_option_if(op == ReductionOperation::SUM_SQUARE, "-DSUM_SQUARE"); build_opts.add_option_if(op == ReductionOperation::MEAN_SUM, "-DMEAN"); build_opts.add_option_if(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX"); @@ -199,7 +201,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou if(is_serial_op) { build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.add_option_if_else(_input->info()->data_type() == DataType::F32, "-DCOND_DATA_TYPE=int", "-DCOND_DATA_TYPE=short"); + build_opts.add_option_if_else(_input->info()->data_type() == DataType::F16, "-DCOND_DATA_TYPE=short", "-DCOND_DATA_TYPE=int"); kernel_axis_name = "non_parallel_x"; } else diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index aa20d1f40d..5f0a4dd371 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -41,7 +41,8 @@ namespace arm_compute { namespace { -uint32x4x4_t calculate_index(uint32_t idx, float32x4_t a, float32x4_t b, uint32x4x4_t c, ReductionOperation op, int axis) +template +uint32x4x4_t calculate_index(uint32_t idx, T a, T b, uint32x4x4_t c, ReductionOperation op, int axis) { uint32x4_t mask{ 0 }; if(op == ReductionOperation::ARG_IDX_MIN) @@ -107,8 +108,8 @@ uint32x4x4_t calculate_index(uint32_t idx, uint8x16_t a, uint8x16_t b, uint32x4x return res; } - -uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, float32x4_t vec_res_value, ReductionOperation op) +template +uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, T vec_res_value, ReductionOperation op) { uint32x4_t res_idx_mask{ 0 }; uint32x4_t mask_ones = vdupq_n_u32(0xFFFFFFFF); @@ -124,7 +125,7 @@ uint32_t calculate_vector_index(uint32x4x4_t vec_res_idx, float32x4_t vec_res_va { auto pmax = wrapper::vpmax(wrapper::vgethigh(vec_res_value), wrapper::vgetlow(vec_res_value)); pmax = wrapper::vpmax(pmax, pmax); - auto mask = vceqq_f32(vec_res_value, wrapper::vcombine(pmax, pmax)); + auto mask = wrapper::vceq(vec_res_value, wrapper::vcombine(pmax, pmax)); res_idx_mask = wrapper::vand(vec_res_idx.val[0], mask); } @@ -394,14 +395,14 @@ struct RedOpX case ReductionOperation::ARG_IDX_MIN: { auto temp_vec_res_value = wrapper::vmin(vec_elements, vec_res_value); - vec_res_idx = calculate_index(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); + vec_res_idx = calculate_index(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); vec_res_value = temp_vec_res_value; break; } case ReductionOperation::ARG_IDX_MAX: { auto temp_vec_res_value = wrapper::vmax(vec_elements, vec_res_value); - vec_res_idx = calculate_index(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); + vec_res_idx = calculate_index(id.x(), temp_vec_res_value, vec_res_value, vec_res_idx, op, 0); vec_res_value = temp_vec_res_value; break; } @@ -446,7 +447,7 @@ struct RedOpX case ReductionOperation::ARG_IDX_MIN: case ReductionOperation::ARG_IDX_MAX: { - auto res = calculate_vector_index(vec_res_idx, vec_res_value, op); + auto res = calculate_vector_index(vec_res_idx, vec_res_value, op); *(reinterpret_cast(output.ptr())) = res; break; } @@ -943,6 +944,8 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: return Reducer>::reduceX(window, input, output, RedOpX(), op); + case DataType::S32: + return Reducer>::reduceX(window, input, output, RedOpX(), op); default: ARM_COMPUTE_ERROR("Not supported"); } @@ -957,6 +960,8 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: return Reducer>::reduceY(window, input, output, RedOpYZW(), op); + case DataType::S32: + return Reducer>::reduceY(window, input, output, RedOpYZW(), op); default: ARM_COMPUTE_ERROR("Not supported"); } @@ -971,6 +976,8 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: return Reducer>::reduceZ(window, input, output, RedOpYZW(), op); + case DataType::S32: + return Reducer>::reduceZ(window, input, output, RedOpYZW(), op); default: ARM_COMPUTE_ERROR("Not supported"); } @@ -985,6 +992,8 @@ void reduce_op(const Window &window, const ITensor *input, ITensor *output, unsi #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: return Reducer>::reduceW(window, input, output, RedOpYZW(), op); + case DataType::S32: + return Reducer>::reduceW(window, input, output, RedOpYZW(), op); default: ARM_COMPUTE_ERROR("Not supported"); } @@ -1002,7 +1011,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u if(input->num_channels() == 1) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::S32, DataType::F16, DataType::F32); } else { diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp index 0b873945d3..6de09bed25 100644 --- a/tests/validation/CL/ArgMinMax.cpp +++ b/tests/validation/CL/ArgMinMax.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -90,6 +90,17 @@ DATA_TEST_CASE(Configuration, template using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture; +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, + CLArgMinMaxValidationFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S32 + TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp index d3f70e6424..71fb39a30d 100644 --- a/tests/validation/NEON/ArgMinMax.cpp +++ b/tests/validation/NEON/ArgMinMax.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/Traits.h" #include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" @@ -90,6 +91,26 @@ DATA_TEST_CASE(Configuration, template using NEArgMinMaxValidationFixture = ArgMinMaxValidationFixture; +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, + NEArgMinMaxValidationFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, + NEArgMinMaxValidationFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32 + TEST_SUITE(Float) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(FP16) diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h index e263b25bf2..ed6b51abe5 100644 --- a/tests/validation/fixtures/ArgMinMaxFixture.h +++ b/tests/validation/fixtures/ArgMinMaxFixture.h @@ -56,17 +56,31 @@ protected: template void fill(U &&tensor) { - if(!is_data_type_quantized(tensor.data_type())) + switch(tensor.data_type()) { - std::uniform_real_distribution<> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, 0); - } - else - { - std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); - std::uniform_int_distribution distribution(bounds.first, bounds.second); - - library->fill(tensor, distribution, 0); + case DataType::F32: + case DataType::F16: + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, 0); + break; + } + case DataType::S32: + { + std::uniform_int_distribution distribution(-100, 100); + library->fill(tensor, distribution, 0); + break; + } + case DataType::QASYMM8: + { + std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution distribution(bounds.first, bounds.second); + + library->fill(tensor, distribution, 0); + break; + } + default: + ARM_COMPUTE_ERROR("DataType for Elementwise Negation Not implemented"); } } diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp index fb7a6d6997..c7624a4628 100644 --- a/tests/validation/reference/ReductionOperation.cpp +++ b/tests/validation/reference/ReductionOperation.cpp @@ -238,6 +238,7 @@ template SimpleTensor reduction_operation(const SimpleTensor &src, template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); +template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); template SimpleTensor reduction_operation(const SimpleTensor &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op); -- cgit v1.2.1