aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Wort <george.wort@arm.com>2019-01-16 11:39:57 +0000
committerGeorge Wort <george.wort@arm.com>2019-01-23 15:15:20 +0000
commitdc9b52867b26e59ba34365f53820ddd7edf01ded (patch)
treef5fbd33c9bc04c1c46a37b975e5d07e3045e1b34
parent1d4f3853dfd16f55338d772ad757db0ee8710d78 (diff)
downloadComputeLibrary-dc9b52867b26e59ba34365f53820ddd7edf01ded.tar.gz
COMPMID-1795: Add support for NHWC in CLGenerateProposalsLayer
Change-Id: Idd805513783fa1323e239eac8a899d8ab04fb14f Reviewed-on: https://review.mlplatform.org/536 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h4
-rw-r--r--src/runtime/CL/functions/CLGenerateProposalsLayer.cpp77
-rw-r--r--tests/validation/CL/GenerateProposalsLayer.cpp47
3 files changed, 96 insertions, 32 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index 343229fe10..02f5bd1ea8 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -124,6 +124,8 @@ private:
// CPP kernels
CPPBoxWithNonMaximaSuppressionLimitKernel _cpp_nms_kernel;
+ bool _is_nhwc;
+
// Temporary tensors
CLTensor _deltas_permuted;
CLTensor _deltas_flattened;
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 5dd120277a..c25a6c616e 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManage
_memset_kernel(),
_padded_copy_kernel(),
_cpp_nms_kernel(),
+ _is_nhwc(false),
_deltas_permuted(),
_deltas_flattened(),
_scores_permuted(),
@@ -60,10 +61,11 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
ARM_COMPUTE_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
ARM_COMPUTE_ERROR_THROW_ON(CLGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info));
+ _is_nhwc = scores->info()->data_layout() == DataLayout::NHWC;
const DataType data_type = deltas->info()->data_type();
- const int num_anchors = scores->info()->dimension(2);
- const int feat_width = scores->info()->dimension(0);
- const int feat_height = scores->info()->dimension(1);
+ const int num_anchors = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::CHANNEL));
+ const int feat_width = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::WIDTH));
+ const int feat_height = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::HEIGHT));
const int total_num_anchors = num_anchors * feat_width * feat_height;
const int pre_nms_topN = info.pre_nms_topN();
const int post_nms_topN = info.post_nms_topN();
@@ -77,21 +79,37 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso
_deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, data_type));
// Permute and reshape deltas
- _memory_group.manage(&_deltas_permuted);
- _memory_group.manage(&_deltas_flattened);
- _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
- _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened);
- _deltas_permuted.allocator()->allocate();
+ if(!_is_nhwc)
+ {
+ _memory_group.manage(&_deltas_permuted);
+ _memory_group.manage(&_deltas_flattened);
+ _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
+ _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened);
+ _deltas_permuted.allocator()->allocate();
+ }
+ else
+ {
+ _memory_group.manage(&_deltas_flattened);
+ _flatten_deltas_kernel.configure(deltas, &_deltas_flattened);
+ }
const TensorShape flatten_shape_scores(1, total_num_anchors);
_scores_flattened.allocator()->init(TensorInfo(flatten_shape_scores, 1, data_type));
// Permute and reshape scores
- _memory_group.manage(&_scores_permuted);
- _memory_group.manage(&_scores_flattened);
- _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
- _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened);
- _scores_permuted.allocator()->allocate();
+ if(!_is_nhwc)
+ {
+ _memory_group.manage(&_scores_permuted);
+ _memory_group.manage(&_scores_flattened);
+ _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
+ _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened);
+ _scores_permuted.allocator()->allocate();
+ }
+ else
+ {
+ _memory_group.manage(&_scores_flattened);
+ _flatten_scores_kernel.configure(scores, &_scores_flattened);
+ }
// Bounding box transform
_memory_group.manage(&_all_proposals);
@@ -141,11 +159,12 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW, DataLayout::NHWC);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(scores, deltas);
- const int num_anchors = scores->dimension(2);
- const int feat_width = scores->dimension(0);
- const int feat_height = scores->dimension(1);
+ const int num_anchors = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::CHANNEL));
+ const int feat_width = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::WIDTH));
+ const int feat_height = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::HEIGHT));
const int num_images = scores->dimension(3);
const int total_num_anchors = num_anchors * feat_width * feat_height;
const int values_per_roi = info.values_per_roi();
@@ -156,14 +175,21 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens
ARM_COMPUTE_RETURN_ON_ERROR(CLComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())));
TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true);
- ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
+ TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true);
+ if(scores->data_layout() == DataLayout::NHWC)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(deltas, &deltas_permuted_info);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(scores, &scores_permuted_info);
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 }));
+ ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
+ }
TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info));
- TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true);
- ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
-
TensorInfo scores_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true));
TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
@@ -236,9 +262,12 @@ void CLGenerateProposalsLayer::run()
CLScheduler::get().enqueue(_compute_anchors_kernel, false);
// Transpose and reshape the inputs
- CLScheduler::get().enqueue(_permute_deltas_kernel, false);
+ if(!_is_nhwc)
+ {
+ CLScheduler::get().enqueue(_permute_deltas_kernel, false);
+ CLScheduler::get().enqueue(_permute_scores_kernel, false);
+ }
CLScheduler::get().enqueue(_flatten_deltas_kernel, false);
- CLScheduler::get().enqueue(_permute_scores_kernel, false);
CLScheduler::get().enqueue(_flatten_scores_kernel, false);
// Build the boxes
diff --git a/tests/validation/CL/GenerateProposalsLayer.cpp b/tests/validation/CL/GenerateProposalsLayer.cpp
index b4772fcf79..ac2d0ef969 100644
--- a/tests/validation/CL/GenerateProposalsLayer.cpp
+++ b/tests/validation/CL/GenerateProposalsLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h"
#include "arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/CL/functions/CLSlice.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/CLArrayAccessor.h"
@@ -48,6 +49,31 @@ inline void fill_tensor(U &&tensor, const std::vector<T> &v)
std::memcpy(tensor.data(), v.data(), sizeof(T) * v.size());
}
+template <typename T>
+inline void fill_tensor(CLAccessor &&tensor, const std::vector<T> &v)
+{
+ if(tensor.data_layout() == DataLayout::NCHW)
+ {
+ std::memcpy(tensor.data(), v.data(), sizeof(T) * v.size());
+ }
+ else
+ {
+ const int channels = tensor.shape()[0];
+ const int width = tensor.shape()[1];
+ const int height = tensor.shape()[2];
+ for(int x = 0; x < width; ++x)
+ {
+ for(int y = 0; y < height; ++y)
+ {
+ for(int c = 0; c < channels; ++c)
+ {
+ *(reinterpret_cast<T *>(tensor(Coordinates(c, x, y)))) = *(reinterpret_cast<const T *>(v.data() + x + y * width + c * height * width));
+ }
+ }
+ }
+ }
+}
+
const auto ComputeAllInfoDataset = framework::dataset::make("ComputeAllInfo",
{
ComputeAnchorsInfo(10U, 10U, 1. / 16.f),
@@ -165,8 +191,9 @@ DATA_TEST_CASE(IntegrationTestCaseAllAnchors, framework::DatasetMode::ALL, frame
validate(CLAccessor(all_anchors), anchors_expected);
}
-DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL, framework::dataset::make("DataType", { DataType::F32 }),
- data_type)
+DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ data_type, data_layout)
{
const int values_per_roi = 4;
const int num_anchors = 2;
@@ -260,9 +287,17 @@ DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL
8.91025957e-06f
});
+ TensorShape scores_shape = TensorShape(feature_width, feature_height, num_anchors);
+ TensorShape deltas_shape = TensorShape(feature_width, feature_height, values_per_roi * num_anchors);
+ if(data_layout == DataLayout::NHWC)
+ {
+ permute(scores_shape, PermutationVector(2U, 0U, 1U));
+ permute(deltas_shape, PermutationVector(2U, 0U, 1U));
+ }
+
// Inputs
- CLTensor scores = create_tensor<CLTensor>(TensorShape(feature_width, feature_height, num_anchors), data_type);
- CLTensor bbox_deltas = create_tensor<CLTensor>(TensorShape(feature_width, feature_height, values_per_roi * num_anchors), data_type);
+ CLTensor scores = create_tensor<CLTensor>(scores_shape, data_type, 1, QuantizationInfo(), data_layout);
+ CLTensor bbox_deltas = create_tensor<CLTensor>(deltas_shape, data_type, 1, QuantizationInfo(), data_layout);
CLTensor anchors = create_tensor<CLTensor>(TensorShape(values_per_roi, num_anchors), data_type);
// Outputs
@@ -282,7 +317,6 @@ DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL
proposals.allocator()->allocate();
num_valid_proposals.allocator()->allocate();
scores_out.allocator()->allocate();
-
// Fill inputs
fill_tensor(CLAccessor(scores), scores_vector);
fill_tensor(CLAccessor(bbox_deltas), bbx_vector);
@@ -290,7 +324,6 @@ DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL
// Run operator
generate_proposals.run();
-
// Gather num_valid_proposals
num_valid_proposals.map();
const uint32_t N = *reinterpret_cast<uint32_t *>(num_valid_proposals.ptr_to_element(Coordinates(0, 0)));