aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2019-09-06 17:51:37 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2019-09-10 09:14:34 +0000
commit70ad61972d4e7b5ff69e9f3b2924de0df462e6ee (patch)
tree554f2da2c6c56ab4d9d64928483cc29f3ff87595 /src
parent9032ee32da54804806a3f26cbbf5a62b3c764f72 (diff)
downloadComputeLibrary-70ad61972d4e7b5ff69e9f3b2924de0df462e6ee.tar.gz
COMPMID-2635: Add support for QASYMM8 in CPPBoxWithNonMaximaSuppressionLimit
Change-Id: Ife35cf865e6573ff7f921eb0b39af89dbf0f5dda Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/1873 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp1
-rw-r--r--src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp226
2 files changed, 220 insertions, 7 deletions
diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
index 02150ff275..62568b4b45 100644
--- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
+++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
@@ -351,6 +351,7 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_
{
ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, boxes_in, scores_out);
const unsigned int num_classes = scores_in->info()->dimension(0);
ARM_COMPUTE_UNUSED(num_classes);
diff --git a/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp b/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp
index 2e10152793..158f45a320 100644
--- a/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp
+++ b/src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,226 @@
#include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
-#include "support/ToolchainSupport.h"
+#include "arm_compute/runtime/Scheduler.h"
-using namespace arm_compute;
+namespace arm_compute
+{
+namespace
+{
+void dequantize_tensor(const ITensor *input, ITensor *output, DataType data_type)
+{
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+
+ Window window;
+ window.use_tensor_dimensions(input->info()->tensor_shape());
+ Iterator input_it(input, window);
+ Iterator output_it(output, window);
+
+ switch(data_type)
+ {
+ case DataType::QASYMM8:
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint8_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
+ },
+ input_it, output_it);
+ break;
+ case DataType::QASYMM16:
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ *reinterpret_cast<float *>(output_it.ptr()) = dequantize(*reinterpret_cast<const uint16_t *>(input_it.ptr()), qinfo.scale, qinfo.offset);
+ },
+ input_it, output_it);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data type");
+ }
+}
+
+void quantize_tensor(const ITensor *input, ITensor *output, DataType data_type)
+{
+ const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+
+ Window window;
+ window.use_tensor_dimensions(input->info()->tensor_shape());
+ Iterator input_it(input, window);
+ Iterator output_it(output, window);
+
+ switch(data_type)
+ {
+ case DataType::QASYMM8:
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ *reinterpret_cast<uint8_t *>(output_it.ptr()) = quantize_qasymm8(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
+ },
+ input_it, output_it);
+ break;
+ case DataType::QASYMM16:
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ *reinterpret_cast<uint16_t *>(output_it.ptr()) = quantize_qasymm16(*reinterpret_cast<const float *>(input_it.ptr()), qinfo);
+ },
+ input_it, output_it);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data type");
+ }
+}
+} // namespace
+
+CPPBoxWithNonMaximaSuppressionLimit::CPPBoxWithNonMaximaSuppressionLimit(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)),
+ _box_with_nms_limit_kernel(),
+ _scores_in(),
+ _boxes_in(),
+ _batch_splits_in(),
+ _scores_out(),
+ _boxes_out(),
+ _classes(),
+ _batch_splits_out(),
+ _keeps(),
+ _keeps_size(),
+ _scores_in_f32(),
+ _boxes_in_f32(),
+ _batch_splits_in_f32(),
+ _scores_out_f32(),
+ _boxes_out_f32(),
+ _classes_f32(),
+ _batch_splits_out_f32(),
+ _keeps_f32(),
+ _keeps_size_f32(),
+ _is_qasymm8(false)
+{
+}
void CPPBoxWithNonMaximaSuppressionLimit::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
{
- auto k = arm_compute::support::cpp14::make_unique<CPPBoxWithNonMaximaSuppressionLimitKernel>();
- k->configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
- _kernel = std::move(k);
-} \ No newline at end of file
+ ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes);
+
+ _is_qasymm8 = scores_in->info()->data_type() == DataType::QASYMM8;
+
+ _scores_in = scores_in;
+ _boxes_in = boxes_in;
+ _batch_splits_in = batch_splits_in;
+ _scores_out = scores_out;
+ _boxes_out = boxes_out;
+ _classes = classes;
+ _batch_splits_out = batch_splits_out;
+ _keeps = keeps;
+ _keeps_size = keeps_size;
+
+ if(_is_qasymm8)
+ {
+ // Manage intermediate buffers
+ _memory_group.manage(&_scores_in_f32);
+ _memory_group.manage(&_boxes_in_f32);
+ _memory_group.manage(&_batch_splits_in_f32);
+ _memory_group.manage(&_scores_out_f32);
+ _memory_group.manage(&_boxes_out_f32);
+ _memory_group.manage(&_classes_f32);
+ _scores_in_f32.allocator()->init(scores_in->info()->clone()->set_data_type(DataType::F32));
+ _boxes_in_f32.allocator()->init(boxes_in->info()->clone()->set_data_type(DataType::F32));
+ _batch_splits_in_f32.allocator()->init(batch_splits_in->info()->clone()->set_data_type(DataType::F32));
+ _scores_out_f32.allocator()->init(scores_out->info()->clone()->set_data_type(DataType::F32));
+ _boxes_out_f32.allocator()->init(boxes_out->info()->clone()->set_data_type(DataType::F32));
+ _classes_f32.allocator()->init(classes->info()->clone()->set_data_type(DataType::F32));
+ if(batch_splits_out != nullptr)
+ {
+ _memory_group.manage(&_batch_splits_out_f32);
+ _batch_splits_out_f32.allocator()->init(batch_splits_out->info()->clone()->set_data_type(DataType::F32));
+ }
+ if(keeps != nullptr)
+ {
+ _memory_group.manage(&_keeps_f32);
+ _keeps_f32.allocator()->init(keeps->info()->clone()->set_data_type(DataType::F32));
+ }
+ if(keeps_size != nullptr)
+ {
+ _memory_group.manage(&_keeps_size_f32);
+ _keeps_size_f32.allocator()->init(keeps_size->info()->clone()->set_data_type(DataType::F32));
+ }
+
+ _box_with_nms_limit_kernel.configure(&_scores_in_f32, &_boxes_in_f32, &_batch_splits_in_f32, &_scores_out_f32, &_boxes_out_f32, &_classes_f32,
+ (batch_splits_out != nullptr) ? &_batch_splits_out_f32 : nullptr, (keeps != nullptr) ? &_keeps_f32 : nullptr,
+ (keeps_size != nullptr) ? &_keeps_size_f32 : nullptr, info);
+ }
+ else
+ {
+ _box_with_nms_limit_kernel.configure(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes, batch_splits_out, keeps, keeps_size, info);
+ }
+
+ if(_is_qasymm8)
+ {
+ _scores_in_f32.allocator()->allocate();
+ _boxes_in_f32.allocator()->allocate();
+ _batch_splits_in_f32.allocator()->allocate();
+ _scores_out_f32.allocator()->allocate();
+ _boxes_out_f32.allocator()->allocate();
+ _classes_f32.allocator()->allocate();
+ if(batch_splits_out != nullptr)
+ {
+ _batch_splits_out_f32.allocator()->allocate();
+ }
+ if(keeps != nullptr)
+ {
+ _keeps_f32.allocator()->allocate();
+ }
+ if(keeps_size != nullptr)
+ {
+ _keeps_size_f32.allocator()->allocate();
+ }
+ }
+}
+
+Status validate(const ITensorInfo *scores_in, const ITensorInfo *boxes_in, const ITensorInfo *batch_splits_in, const ITensorInfo *scores_out, const ITensorInfo *boxes_out, const ITensorInfo *classes,
+ const ITensorInfo *batch_splits_out, const ITensorInfo *keeps, const ITensorInfo *keeps_size, const BoxNMSLimitInfo info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores_in, boxes_in, batch_splits_in, scores_out, boxes_out, classes);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+
+ const bool is_qasymm8 = scores_in->data_type() == DataType::QASYMM8;
+ if(is_qasymm8)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(boxes_in, 1, DataType::QASYMM16);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes_in, boxes_out);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(boxes_in, boxes_out);
+ const UniformQuantizationInfo boxes_qinfo = boxes_in->quantization_info().uniform();
+ ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f);
+ ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.offset != 0);
+ }
+
+ return Status{};
+}
+
+void CPPBoxWithNonMaximaSuppressionLimit::run()
+{
+ if(_is_qasymm8)
+ {
+ dequantize_tensor(_scores_in, &_scores_in_f32, _scores_in->info()->data_type());
+ dequantize_tensor(_boxes_in, &_boxes_in_f32, _boxes_in->info()->data_type());
+ dequantize_tensor(_batch_splits_in, &_batch_splits_in_f32, _batch_splits_in->info()->data_type());
+ }
+
+ Scheduler::get().schedule(&_box_with_nms_limit_kernel, Window::DimY);
+
+ if(_is_qasymm8)
+ {
+ quantize_tensor(&_scores_out_f32, _scores_out, _scores_out->info()->data_type());
+ quantize_tensor(&_boxes_out_f32, _boxes_out, _boxes_out->info()->data_type());
+ quantize_tensor(&_classes_f32, _classes, _classes->info()->data_type());
+ if(_batch_splits_out != nullptr)
+ {
+ quantize_tensor(&_batch_splits_out_f32, _batch_splits_out, _batch_splits_out->info()->data_type());
+ }
+ if(_keeps != nullptr)
+ {
+ quantize_tensor(&_keeps_f32, _keeps, _keeps->info()->data_type());
+ }
+ if(_keeps_size != nullptr)
+ {
+ quantize_tensor(&_keeps_size_f32, _keeps_size, _keeps_size->info()->data_type());
+ }
+ }
+}
+} // namespace arm_compute