aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEThresholdKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-02 23:00:41 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-06-08 16:42:03 +0000
commit25ef7217ec4e13682bf37c87c0c6075a799ba1c0 (patch)
tree8ed3e4d7b811a36322560298ab6c8a0484d10ab8 /src/core/NEON/kernels/NEThresholdKernel.cpp
parentebfb2f8701c131294b3c5c1e36547fa3658d09dd (diff)
downloadComputeLibrary-25ef7217ec4e13682bf37c87c0c6075a799ba1c0.tar.gz
COMPMID-3180: Remove padding from NEThreshold
- Removes padding from NEThresholdKernel - Alters configuration interface to use a descriptor Change-Id: I394d5e1375454813856d9d206e61dc9a87c2cadc Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3300 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEThresholdKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEThresholdKernel.cpp171
1 files changed, 129 insertions, 42 deletions
diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp
index 5c3b2a7540..b8adc15e77 100644
--- a/src/core/NEON/kernels/NEThresholdKernel.cpp
+++ b/src/core/NEON/kernels/NEThresholdKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,30 +28,60 @@
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Validate.h"
-#include <arm_neon.h>
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
namespace arm_compute
{
-class Coordinates;
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+
+ // Checks performed when output is configured
+ if((output != nullptr) && (output->total_size() != 0))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, *input->clone());
+
+ // NEThresholdKernel doesn't need padding so update_window_and_padding() can be skipped
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+
+ return std::make_pair(Status{}, win);
+}
+} // namespace
NEThresholdKernel::NEThresholdKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _threshold(0), _false_value(0), _true_value(0), _upper(0)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _info()
{
}
-void NEThresholdKernel::configure(const ITensor *input, ITensor *output, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+void NEThresholdKernel::configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), info));
- _input = input;
- _output = output;
- _threshold = threshold;
- _false_value = false_value;
- _true_value = true_value;
- _upper = upper;
+ _input = input;
+ _output = output;
+ _info = info;
- switch(type)
+ switch(_info.type)
{
case ThresholdType::BINARY:
_func = &NEThresholdKernel::run_binary;
@@ -64,54 +94,111 @@ void NEThresholdKernel::configure(const ITensor *input, ITensor *output, uint8_t
break;
}
- constexpr unsigned int num_elems_processed_per_iteration = 16;
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input->info(), output->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICPPKernel::configure(win_config.second);
+}
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), output_access);
- output_access.set_valid_region(win, input->info()->valid_region());
+Status NEThresholdKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
- INEKernel::configure(win);
+ return Status{};
}
inline void NEThresholdKernel::run_binary(const Window &window)
{
- const uint8x16_t threshold = vdupq_n_u8(_threshold);
- const uint8x16_t true_value = vdupq_n_u8(_true_value);
- const uint8x16_t false_value = vdupq_n_u8(_false_value);
+ /** NEON vector tag type. */
+ using Type = uint8_t;
+ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<Type, wrapper::traits::BitWidth::W128>;
- Iterator input(_input, window);
- Iterator output(_output, window);
+ const int window_step_x = 16 / sizeof(Type);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
- const uint8x16_t mask = vcgtq_u8(data, threshold);
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ const uint8_t threshold = _info.threshold;
+ const uint8_t true_value = _info.true_value;
+ const uint8_t false_value = _info.false_value;
- vst1q_u8(output.ptr(), vbslq_u8(mask, true_value, false_value));
+ const auto vthreshold = wrapper::vdup_n(threshold, ExactTagType{});
+ const auto vtrue_value = wrapper::vdup_n(true_value, ExactTagType{});
+ const auto vfalse_value = wrapper::vdup_n(false_value, ExactTagType{});
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const Type *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<Type *>(output.ptr());
+
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vdata = wrapper::vloadq(input_ptr + x);
+ const auto vmask = wrapper::vcgt(vdata, vthreshold);
+ wrapper::vstore(output_ptr + x, wrapper::vbsl(vmask, vtrue_value, vfalse_value));
+ }
+
+ for(; x < window_end_x; ++x)
+ {
+ const Type data = *(reinterpret_cast<const Type *>(input_ptr + x));
+ *(output_ptr + x) = (data > threshold) ? true_value : false_value;
+ }
},
input, output);
}
inline void NEThresholdKernel::run_range(const Window &window)
{
- const uint8x16_t lower_threshold = vdupq_n_u8(_threshold);
- const uint8x16_t upper_threshold = vdupq_n_u8(_upper);
- const uint8x16_t true_value = vdupq_n_u8(_true_value);
- const uint8x16_t false_value = vdupq_n_u8(_false_value);
+ /** NEON vector tag type. */
+ using Type = uint8_t;
+ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<Type, wrapper::traits::BitWidth::W128>;
- Iterator input(_input, window);
- Iterator output(_output, window);
+ const int window_step_x = 16 / sizeof(Type);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
- execute_window_loop(window, [&](const Coordinates &)
- {
- const uint8x16_t data = vld1q_u8(input.ptr());
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ const uint8_t lower_threshold = _info.threshold;
+ const uint8_t upper_threshold = _info.upper;
+ const uint8_t true_value = _info.true_value;
+ const uint8_t false_value = _info.false_value;
- uint8x16_t mask = vcleq_u8(data, upper_threshold);
+ const auto vlower_threshold = wrapper::vdup_n(lower_threshold, ExactTagType{});
+ const auto vupper_threshold = wrapper::vdup_n(upper_threshold, ExactTagType{});
+ const auto vtrue_value = wrapper::vdup_n(true_value, ExactTagType{});
+ const auto vfalse_value = wrapper::vdup_n(false_value, ExactTagType{});
- mask = vandq_u8(vcgeq_u8(data, lower_threshold), mask);
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
- vst1q_u8(output.ptr(), vbslq_u8(mask, true_value, false_value));
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const Type *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<Type *>(output.ptr());
+
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vdata = wrapper::vloadq(input_ptr + x);
+ auto vmask = wrapper::vcle(vdata, vupper_threshold);
+ vmask = wrapper::vand(wrapper::vcge(vdata, vlower_threshold), vmask);
+ wrapper::vstore(output_ptr + x, wrapper::vbsl(vmask, vtrue_value, vfalse_value));
+ }
+
+ for(; x < window_end_x; ++x)
+ {
+ const Type data = *(reinterpret_cast<const Type *>(input_ptr + x));
+ *(output_ptr + x) = (data <= upper_threshold && data >= lower_threshold) ? true_value : false_value;
+ }
},
input, output);
}