aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2023-08-03 14:47:31 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2023-08-08 15:49:54 +0000
commit29e27b0544d99e5d98f044a9e606db8abcfb8900 (patch)
tree3749d3f3640d55fceda4dcd04a2916c87414b045 /src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
parent66b4a6a8ca1ee55e5b7f05bae2543cf99fe22d6d (diff)
downloadComputeLibrary-29e27b0544d99e5d98f044a9e606db8abcfb8900.tar.gz
Add support for S64 output in NEArgMinMaxLayer
* NEArgMinMaxLayer uses NEReductionOperation to compute its result in S32 * We need to call NECast to convert from S32 to S64 * Resolves MLCE-1089 Change-Id: I6fded869b6076d7af1b9b3e70eb384f4ee82fd8a Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10054 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEArgMinMaxLayer.cpp')
-rw-r--r--src/runtime/NEON/functions/NEArgMinMaxLayer.cpp44
1 files changed, 38 insertions, 6 deletions
diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
index 3876ae6e87..3ac127b02e 100644
--- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
+++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,22 +29,49 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/NEON/functions/NECast.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/Tensor.h"
#include "src/common/utils/Log.h"
#include "src/core/NEON/kernels/NEReductionOperationKernel.h"
namespace arm_compute
{
+struct NEArgMinMaxLayer::Impl
+{
+ MemoryGroup memory_group{};
+ std::shared_ptr<IMemoryManager> memory_manager{};
+ std::unique_ptr<NEReductionOperation> reduction_function{};
+ std::unique_ptr<NECast> cast_function{};
+ std::unique_ptr<Tensor> tmp_reduction_result{};
+};
+
NEArgMinMaxLayer::~NEArgMinMaxLayer() = default;
NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _reduction_function(std::make_unique<NEReductionOperation>())
+ : _impl(std::make_unique<Impl>())
{
- ARM_COMPUTE_UNUSED(memory_manager);
+ _impl->memory_manager = std::move(memory_manager);
}
+
void NEArgMinMaxLayer::configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op)
{
ARM_COMPUTE_LOG_PARAMS(input, axis, output, op);
- _reduction_function->configure(input, output, axis, op, false);
+ _impl->reduction_function = std::make_unique<NEReductionOperation>();
+ if(output->info() && (output->info()->data_type() == DataType::S64 || output->info()->data_type() == DataType::U64))
+ {
+ _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager));
+ _impl->cast_function = std::make_unique<NECast>();
+ _impl->tmp_reduction_result = std::make_unique<Tensor>();
+ _impl->reduction_function->configure(input, _impl->tmp_reduction_result.get(), axis, op, false);
+ _impl->cast_function->configure(_impl->tmp_reduction_result.get(), output, ConvertPolicy::SATURATE);
+ _impl->memory_group.manage(_impl->tmp_reduction_result.get());
+ _impl->tmp_reduction_result->allocator()->allocate();
+ }
+ else
+ {
+ _impl->reduction_function->configure(input, output, axis, op, false);
+ }
}
Status NEArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op)
@@ -55,7 +82,12 @@ Status NEArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITen
void NEArgMinMaxLayer::run()
{
- _reduction_function->run();
+ MemoryGroupResourceScope scope_mg(_impl->memory_group);
+ _impl->reduction_function->run();
+ if(_impl->tmp_reduction_result != nullptr)
+ {
+ _impl->cast_function->run();
+ }
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute