From 25ef7217ec4e13682bf37c87c0c6075a799ba1c0 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 2 Jun 2020 23:00:41 +0100 Subject: COMPMID-3180: Remove padding from NEThreshold - Removes padding from NEThresholdKernel - Alters configuration interface to use a descriptor Change-Id: I394d5e1375454813856d9d206e61dc9a87c2cadc Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3300 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/CL/kernels/CLThresholdKernel.h | 30 ++-- arm_compute/core/KernelDescriptors.h | 24 +++ arm_compute/core/NEON/kernels/NEThresholdKernel.h | 39 +++-- arm_compute/core/NEON/wrapper/intrinsics/cle.h | 64 ++++++++ .../core/NEON/wrapper/intrinsics/intrinsics.h | 1 + arm_compute/runtime/CL/functions/CLThreshold.h | 22 +-- arm_compute/runtime/NEON/functions/NEThreshold.h | 21 ++- docs/00_introduction.dox | 6 + src/core/CL/kernels/CLThresholdKernel.cpp | 25 ++- src/core/NEON/kernels/NEThresholdKernel.cpp | 171 ++++++++++++++++----- src/runtime/CL/functions/CLThreshold.cpp | 17 +- src/runtime/NEON/functions/NEThreshold.cpp | 17 +- tests/validation/CL/Threshold.cpp | 4 +- tests/validation/NEON/Threshold.cpp | 9 +- tests/validation/fixtures/ThresholdFixture.h | 4 +- 15 files changed, 330 insertions(+), 124 deletions(-) create mode 100644 arm_compute/core/NEON/wrapper/intrinsics/cle.h diff --git a/arm_compute/core/CL/kernels/CLThresholdKernel.h b/arm_compute/core/CL/kernels/CLThresholdKernel.h index 3db48706a3..6dc218b16e 100644 --- a/arm_compute/core/CL/kernels/CLThresholdKernel.h +++ b/arm_compute/core/CL/kernels/CLThresholdKernel.h @@ -25,45 +25,33 @@ #define ARM_COMPUTE_CLTHRESHOLDKERNEL_H #include "arm_compute/core/CL/ICLSimple2DKernel.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" -#include - namespace arm_compute { +// Forward declarations class ICLTensor; -/** Interface for the thresholding kernel. - * - */ +/** Interface for the thresholding kernel. */ class CLThresholdKernel : public ICLSimple2DKernel { public: /**Initialise the kernel's input, output and threshold parameters. * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. - * @param[in] false_value value to set when the condition is not respected. - * @param[in] true_value value to set when the condition is respected. - * @param[in] type Thresholding type. Either RANGE or BINARY. - * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + * @param[in] input An input tensor. Data types supported: U8 + * @param[out] output The output tensor. Data types supported: U8. + * @param[in] info Threshold descriptor */ - void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); /**Initialise the kernel's input, output and threshold parameters. * * @param[in] compile_context The compile context to be used. * @param[in] input An input tensor. Data types supported: U8 * @param[out] output The output tensor. Data types supported: U8. - * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. - * @param[in] false_value value to set when the condition is not respected. - * @param[in] true_value value to set when the condition is respected. - * @param[in] type Thresholding type. Either RANGE or BINARY. - * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + * @param[in] info Threshold descriptor */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); }; } // namespace arm_compute #endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index de08288dec..79aa450310 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -203,5 +203,29 @@ struct ScaleKernelInfo bool use_padding; /**< Indication of using padding */ bool align_corners; /**< Align corners of input and output */ }; + +struct ThresholdKernelInfo +{ + /** Default constructor */ + ThresholdKernelInfo() = default; + /** Constructor + * + * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold. + * @param[in] false_value value to set when the condition is not respected. + * @param[in] true_value value to set when the condition is respected. + * @param[in] type Thresholding type. Either RANGE or BINARY. + * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + */ + ThresholdKernelInfo(uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) + : threshold(threshold), false_value(false_value), true_value(true_value), type(type), upper(upper) + { + } + + uint8_t threshold{ 0 }; + uint8_t false_value{ 0 }; + uint8_t true_value{ 0 }; + ThresholdType type{ ThresholdType::BINARY }; + uint8_t upper{ 0 }; +}; } // namespace arm_compute #endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */ diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h index a6d1e9071c..fc97c3a75f 100644 --- a/arm_compute/core/NEON/kernels/NEThresholdKernel.h +++ b/arm_compute/core/NEON/kernels/NEThresholdKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,18 +24,15 @@ #ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H #define ARM_COMPUTE_NETHRESHOLDKERNEL_H +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Types.h" -#include - namespace arm_compute { class ITensor; -/** Interface for the thresholding kernel - * - */ +/** Interface for the thresholding kernel */ class NEThresholdKernel : public INEKernel { public: @@ -53,15 +50,20 @@ public: NEThresholdKernel &operator=(const NEThresholdKernel &) = delete; /** Initialise the kernel's input, output and threshold parameters. * - * @param[in] input An input tensor. Data type supported: U8 - * @param[out] output The output tensor. Data type supported: U8. - * @param[in] threshold Threshold. When the threhold type is RANGE, this is used as the lower threshold. - * @param[in] false_value value to set when the condition is not respected. - * @param[in] true_value value to set when the condition is respected. - * @param[in] type Thresholding type. Either RANGE or BINARY. - * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE. + * @param[in] input An input tensor. Data type supported: U8 + * @param[out] output The output tensor. Data type supported: U8. + * @param[in] info Threshold kernel descriptor + */ + void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel + * + * @param[in] input Input tensor info. Data type supported: U8 + * @param[in] output Output tensor info. Data type supported: U8 + * @param[in] info Threshold kernel descriptor + * + * @return A status containing an error code in case of failure */ - void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -74,12 +76,9 @@ private: void (NEThresholdKernel::*_func)(const Window &window); - const ITensor *_input; /**< Input */ - ITensor *_output; /**< Output */ - uint8_t _threshold; - uint8_t _false_value; - uint8_t _true_value; - uint8_t _upper; + const ITensor *_input; /**< Input */ + ITensor *_output; /**< Output */ + ThresholdKernelInfo _info; /**< Threshold descriptor */ }; } // namespace arm_compute #endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/cle.h b/arm_compute/core/NEON/wrapper/intrinsics/cle.h new file mode 100644 index 0000000000..83c92d6891 --- /dev/null +++ b/arm_compute/core/NEON/wrapper/intrinsics/cle.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_WRAPPER_CLE_H +#define ARM_COMPUTE_WRAPPER_CLE_H + +#include + +namespace arm_compute +{ +namespace wrapper +{ +#define VCLE_IMPL(stype, vtype, rtype, prefix, postfix) \ + inline rtype vcle(const vtype &a, const vtype &b) \ + { \ + return prefix##_##postfix(a, b); \ + } + +VCLE_IMPL(uint8_t, uint8x8_t, uint8x8_t, vcle, u8) +VCLE_IMPL(int8_t, int8x8_t, uint8x8_t, vcle, s8) +VCLE_IMPL(uint16_t, uint16x4_t, uint16x4_t, vcle, u16) +VCLE_IMPL(int16_t, int16x4_t, uint16x4_t, vcle, s16) +VCLE_IMPL(uint32_t, uint32x2_t, uint32x2_t, vcle, u32) +VCLE_IMPL(int32_t, int32x2_t, uint32x2_t, vcle, s32) +VCLE_IMPL(float32x2_t, float32x2_t, uint32x2_t, vcle, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VCLE_IMPL(float16x4_t, float16x4_t, uint16x4_t, vcle, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +VCLE_IMPL(uint8_t, uint8x16_t, uint8x16_t, vcleq, u8) +VCLE_IMPL(int8_t, int8x16_t, uint8x16_t, vcleq, s8) +VCLE_IMPL(uint16_t, uint16x8_t, uint16x8_t, vcleq, u16) +VCLE_IMPL(int16_t, int16x8_t, uint16x8_t, vcleq, s16) +VCLE_IMPL(uint32_t, uint32x4_t, uint32x4_t, vcleq, u32) +VCLE_IMPL(int32_t, int32x4_t, uint32x4_t, vcleq, s32) +VCLE_IMPL(float32x4_t, float32x4_t, uint32x4_t, vcleq, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VCLE_IMPL(float16x8_t, float16x8_t, uint16x8_t, vcleq, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#undef VCLE_IMPL +} // namespace wrapper +} // namespace arm_compute +#endif /* ARM_COMPUTE_WRAPPER_CLE_H */ diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h index 1150daa073..14c5d615be 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h @@ -31,6 +31,7 @@ #include "arm_compute/core/NEON/wrapper/intrinsics/ceq.h" #include "arm_compute/core/NEON/wrapper/intrinsics/cge.h" #include "arm_compute/core/NEON/wrapper/intrinsics/cgt.h" +#include "arm_compute/core/NEON/wrapper/intrinsics/cle.h" #include "arm_compute/core/NEON/wrapper/intrinsics/clt.h" #include "arm_compute/core/NEON/wrapper/intrinsics/combine.h" #include "arm_compute/core/NEON/wrapper/intrinsics/cvt.h" diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h index d8ae6fbb34..e0449a047d 100644 --- a/arm_compute/runtime/CL/functions/CLThreshold.h +++ b/arm_compute/runtime/CL/functions/CLThreshold.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CLTHRESHOLD_H #define ARM_COMPUTE_CLTHRESHOLD_H +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/ICLSimpleFunction.h" @@ -31,6 +32,7 @@ namespace arm_compute { +// Forward declarations class ICLTensor; /** Basic function to run @ref CLThresholdKernel */ @@ -47,23 +49,25 @@ public: * @param[in] type Thresholding type. Can either be BINARY or RANGE. * @param[in] upper Upper threshold. Only used with RANGE thresholding */ + ARM_COMPUTE_DEPRECATED_REL(20.08) void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data types supported: U8. + * @param[out] output Output tensor. Data types supported: U8. + * @param[in] info Threshold descriptor + */ + void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); /** Initialise the function's source, destination, thresholds and threshold type * * @param[in] compile_context The compile context to be used. * @param[in] input First tensor input. Data types supported: U8. * @param[out] output Output tensor. Data types supported: U8. - * @param[in] threshold Threshold. If upper threshold is specified, this will be used as the lower threshold. - * @param[in] false_value Value to assign when the condition is false. - * @param[in] true_value value to assign when the condition is true. - * @param[in] type Thresholding type. Can either be BINARY or RANGE. - * @param[in] upper Upper threshold. Only used with RANGE thresholding + * @param[in] info Threshold descriptor */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value = 0, uint8_t true_value = 0, - ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); + void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_CLTHRESHOLD_H */ diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h index c955283e9e..e4f3ce34a4 100644 --- a/arm_compute/runtime/NEON/functions/NEThreshold.h +++ b/arm_compute/runtime/NEON/functions/NEThreshold.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_NETHRESHOLD_H #define ARM_COMPUTE_NETHRESHOLD_H +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" @@ -31,6 +32,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEThresholdKernel */ @@ -47,8 +49,25 @@ public: * @param[in] type Thresholding type. Can either be BINARY or RANGE. * @param[in] upper Upper threshold. Only used with RANGE thresholding */ + ARM_COMPUTE_DEPRECATED_REL(20.08) void configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value = 0, uint8_t true_value = 0, ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0); + /** Initialise the function's source, destination, thresholds and threshold type + * + * @param[in] input First tensor input. Data type supported: U8. + * @param[out] output Output tensor. Data type supported: U8. + * @param[in] info Threshold descriptor + */ + void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info); + /** Static function to check if given info will lead to a valid configuration of @ref NEThreshold + * + * @param[in] input First tensor input. Data type supported: U8. + * @param[in] output Output tensor. Data type supported: U8. + * @param[in] info Threshold descriptor. + * + * @return A status, containing an error code in case of failure + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info); }; } // namespace arm_compute #endif /*ARM_COMPUTE_NETHRESHOLD_H */ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index f5b8825558..9833b1a2d1 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -237,6 +237,12 @@ If there is more than one release in a month then an extra sequential number is @subsection S2_2_changelog Changelog +v20.08 Public major release + - Various bug fixes. + - Various optimisations. + - Deprecated interfaces + - Non-descriptor based interfaces for @ref NEThreshold, @ref CLThreshold + v20.05 Public major release - Various bug fixes. - Various optimisations. diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp index 4f984632bc..4ca0bbd0e6 100644 --- a/src/core/CL/kernels/CLThresholdKernel.cpp +++ b/src/core/CL/kernels/CLThresholdKernel.cpp @@ -32,16 +32,14 @@ #include -using namespace arm_compute; - -void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +namespace arm_compute +{ +void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info) { - configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, false_value, true_value, type, upper); + configure(CLKernelLibrary::get().get_compile_context(), input, output, info); } -void CLThresholdKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, - uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +void CLThresholdKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -49,7 +47,7 @@ void CLThresholdKernel::configure(const CLCompileContext &compile_context, const // Construct kernel name std::string kernel_name = "threshold"; - switch(type) + switch(info.type) { case ThresholdType::BINARY: kernel_name += "_binary"; @@ -67,16 +65,17 @@ void CLThresholdKernel::configure(const CLCompileContext &compile_context, const // Set arguments unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, false_value); - _kernel.setArg(idx++, true_value); - _kernel.setArg(idx++, threshold); + _kernel.setArg(idx++, info.false_value); + _kernel.setArg(idx++, info.true_value); + _kernel.setArg(idx++, info.threshold); - if(ThresholdType::RANGE == type) + if(ThresholdType::RANGE == info.type) { - _kernel.setArg(idx++, upper); + _kernel.setArg(idx++, info.upper); } // Make sure _kernel is initialized before calling the parent's configure constexpr unsigned int num_elems_processed_per_iteration = 16; ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp index 5c3b2a7540..b8adc15e77 100644 --- a/src/core/NEON/kernels/NEThresholdKernel.cpp +++ b/src/core/NEON/kernels/NEThresholdKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,30 +28,60 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" -#include +#include "arm_compute/core/NEON/wrapper/wrapper.h" namespace arm_compute { -class Coordinates; +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + + // Checks performed when output is configured + if((output != nullptr) && (output->total_size() != 0)) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + } + + return Status{}; +} + +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + // Configure kernel window + Window win = calculate_max_window(*input, Steps()); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, *input->clone()); + + // NEThresholdKernel doesn't need padding so update_window_and_padding() can be skipped + Coordinates coord; + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); + + return std::make_pair(Status{}, win); +} +} // namespace NEThresholdKernel::NEThresholdKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _threshold(0), _false_value(0), _true_value(0), _upper(0) + : _func(nullptr), _input(nullptr), _output(nullptr), _info() { } -void NEThresholdKernel::configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +void NEThresholdKernel::configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), info)); - _input = input; - _output = output; - _threshold = threshold; - _false_value = false_value; - _true_value = true_value; - _upper = upper; + _input = input; + _output = output; + _info = info; - switch(type) + switch(_info.type) { case ThresholdType::BINARY: _func = &NEThresholdKernel::run_binary; @@ -64,54 +94,111 @@ void NEThresholdKernel::configure(const ITensor *input, ITensor *output, uint8_t break; } - constexpr unsigned int num_elems_processed_per_iteration = 16; + // Configure kernel window + auto win_config = validate_and_configure_window(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICPPKernel::configure(win_config.second); +} - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), output_access); - output_access.set_valid_region(win, input->info()->valid_region()); +Status NEThresholdKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); - INEKernel::configure(win); + return Status{}; } inline void NEThresholdKernel::run_binary(const Window &window) { - const uint8x16_t threshold = vdupq_n_u8(_threshold); - const uint8x16_t true_value = vdupq_n_u8(_true_value); - const uint8x16_t false_value = vdupq_n_u8(_false_value); + /** NEON vector tag type. */ + using Type = uint8_t; + using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; - Iterator input(_input, window); - Iterator output(_output, window); + const int window_step_x = 16 / sizeof(Type); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr()); - const uint8x16_t mask = vcgtq_u8(data, threshold); + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + const uint8_t threshold = _info.threshold; + const uint8_t true_value = _info.true_value; + const uint8_t false_value = _info.false_value; - vst1q_u8(output.ptr(), vbslq_u8(mask, true_value, false_value)); + const auto vthreshold = wrapper::vdup_n(threshold, ExactTagType{}); + const auto vtrue_value = wrapper::vdup_n(true_value, ExactTagType{}); + const auto vfalse_value = wrapper::vdup_n(false_value, ExactTagType{}); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vdata = wrapper::vloadq(input_ptr + x); + const auto vmask = wrapper::vcgt(vdata, vthreshold); + wrapper::vstore(output_ptr + x, wrapper::vbsl(vmask, vtrue_value, vfalse_value)); + } + + for(; x < window_end_x; ++x) + { + const Type data = *(reinterpret_cast(input_ptr + x)); + *(output_ptr + x) = (data > threshold) ? true_value : false_value; + } }, input, output); } inline void NEThresholdKernel::run_range(const Window &window) { - const uint8x16_t lower_threshold = vdupq_n_u8(_threshold); - const uint8x16_t upper_threshold = vdupq_n_u8(_upper); - const uint8x16_t true_value = vdupq_n_u8(_true_value); - const uint8x16_t false_value = vdupq_n_u8(_false_value); + /** NEON vector tag type. */ + using Type = uint8_t; + using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; - Iterator input(_input, window); - Iterator output(_output, window); + const int window_step_x = 16 / sizeof(Type); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); - execute_window_loop(window, [&](const Coordinates &) - { - const uint8x16_t data = vld1q_u8(input.ptr()); + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + const uint8_t lower_threshold = _info.threshold; + const uint8_t upper_threshold = _info.upper; + const uint8_t true_value = _info.true_value; + const uint8_t false_value = _info.false_value; - uint8x16_t mask = vcleq_u8(data, upper_threshold); + const auto vlower_threshold = wrapper::vdup_n(lower_threshold, ExactTagType{}); + const auto vupper_threshold = wrapper::vdup_n(upper_threshold, ExactTagType{}); + const auto vtrue_value = wrapper::vdup_n(true_value, ExactTagType{}); + const auto vfalse_value = wrapper::vdup_n(false_value, ExactTagType{}); - mask = vandq_u8(vcgeq_u8(data, lower_threshold), mask); + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); - vst1q_u8(output.ptr(), vbslq_u8(mask, true_value, false_value)); + execute_window_loop(win_collapsed, [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vdata = wrapper::vloadq(input_ptr + x); + auto vmask = wrapper::vcle(vdata, vupper_threshold); + vmask = wrapper::vand(wrapper::vcge(vdata, vlower_threshold), vmask); + wrapper::vstore(output_ptr + x, wrapper::vbsl(vmask, vtrue_value, vfalse_value)); + } + + for(; x < window_end_x; ++x) + { + const Type data = *(reinterpret_cast(input_ptr + x)); + *(output_ptr + x) = (data <= upper_threshold && data >= lower_threshold) ? true_value : false_value; + } }, input, output); } diff --git a/src/runtime/CL/functions/CLThreshold.cpp b/src/runtime/CL/functions/CLThreshold.cpp index 57c92724fa..2a4221f590 100644 --- a/src/runtime/CL/functions/CLThreshold.cpp +++ b/src/runtime/CL/functions/CLThreshold.cpp @@ -28,17 +28,22 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void CLThreshold::configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) { - configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, false_value, true_value, type, upper); + configure(CLKernelLibrary::get().get_compile_context(), input, output, ThresholdKernelInfo(threshold, false_value, true_value, type, upper)); +} + +void CLThreshold::configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input, output, info); } -void CLThreshold::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, - uint8_t upper) +void CLThreshold::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(compile_context, input, output, threshold, false_value, true_value, type, upper); + k->configure(compile_context, input, output, info); _kernel = std::move(k); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEThreshold.cpp b/src/runtime/NEON/functions/NEThreshold.cpp index f4fd85722c..54481567f3 100644 --- a/src/runtime/NEON/functions/NEThreshold.cpp +++ b/src/runtime/NEON/functions/NEThreshold.cpp @@ -28,11 +28,22 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void NEThreshold::configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper) +{ + configure(input, output, ThresholdKernelInfo(threshold, false_value, true_value, type, upper)); +} + +void NEThreshold::configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, threshold, false_value, true_value, type, upper); + k->configure(input, output, info); _kernel = std::move(k); } + +Status NEThreshold::validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info) +{ + return NEThresholdKernel::validate(input, output, info); +} +} // namespace arm_compute diff --git a/tests/validation/CL/Threshold.cpp b/tests/validation/CL/Threshold.cpp index 9c68ffe34c..ec4baf4265 100644 --- a/tests/validation/CL/Threshold.cpp +++ b/tests/validation/CL/Threshold.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,7 +52,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas // Create and configure function CLThreshold thrsh; - thrsh.configure(&src, &dst, threshold, false_value, true_value, type, upper); + thrsh.configure(&src, &dst, ThresholdKernelInfo(threshold, false_value, true_value, type, upper)); // Validate valid region const ValidRegion valid_region = shape_to_valid_region(shape); diff --git a/tests/validation/NEON/Threshold.cpp b/tests/validation/NEON/Threshold.cpp index 7cddf7ccb7..fd6d936050 100644 --- a/tests/validation/NEON/Threshold.cpp +++ b/tests/validation/NEON/Threshold.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,16 +53,15 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datas // Create and configure function NEThreshold thrsh; - thrsh.configure(&src, &dst, threshold, false_value, true_value, type, upper); + thrsh.configure(&src, &dst, ThresholdKernelInfo(threshold, false_value, true_value, type, upper)); // Validate valid region const ValidRegion valid_region = shape_to_valid_region(shape); validate(dst.info()->valid_region(), valid_region); // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); + validate(src.info()->padding(), PaddingSize()); + validate(dst.info()->padding(), PaddingSize()); } template diff --git a/tests/validation/fixtures/ThresholdFixture.h b/tests/validation/fixtures/ThresholdFixture.h index 9a92175728..1aef6fa4ee 100644 --- a/tests/validation/fixtures/ThresholdFixture.h +++ b/tests/validation/fixtures/ThresholdFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -65,7 +65,7 @@ protected: // Create and configure function FunctionType thrsh; - thrsh.configure(&src, &dst, threshold, false_value, true_value, type, upper); + thrsh.configure(&src, &dst, ThresholdKernelInfo(threshold, false_value, true_value, type, upper)); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); -- cgit v1.2.1