From dc9b52867b26e59ba34365f53820ddd7edf01ded Mon Sep 17 00:00:00 2001 From: George Wort Date: Wed, 16 Jan 2019 11:39:57 +0000 Subject: COMPMID-1795: Add support for NHWC in CLGenerateProposalsLayer Change-Id: Idd805513783fa1323e239eac8a899d8ab04fb14f Reviewed-on: https://review.mlplatform.org/536 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- .../CL/functions/CLGenerateProposalsLayer.h | 4 +- .../CL/functions/CLGenerateProposalsLayer.cpp | 77 +++++++++++++++------- tests/validation/CL/GenerateProposalsLayer.cpp | 47 +++++++++++-- 3 files changed, 96 insertions(+), 32 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index 343229fe10..02f5bd1ea8 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -124,6 +124,8 @@ private: // CPP kernels CPPBoxWithNonMaximaSuppressionLimitKernel _cpp_nms_kernel; + bool _is_nhwc; + // Temporary tensors CLTensor _deltas_permuted; CLTensor _deltas_flattened; diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp index 5dd120277a..c25a6c616e 100644 --- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp +++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,7 @@ CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptrinfo(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info)); + _is_nhwc = scores->info()->data_layout() == DataLayout::NHWC; const DataType data_type = deltas->info()->data_type(); - const int num_anchors = scores->info()->dimension(2); - const int feat_width = scores->info()->dimension(0); - const int feat_height = scores->info()->dimension(1); + const int num_anchors = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::CHANNEL)); + const int feat_width = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::WIDTH)); + const int feat_height = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::HEIGHT)); const int total_num_anchors = num_anchors * feat_width * feat_height; const int pre_nms_topN = info.pre_nms_topN(); const int post_nms_topN = info.post_nms_topN(); @@ -77,21 +79,37 @@ void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTenso _deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, data_type)); // Permute and reshape deltas - _memory_group.manage(&_deltas_permuted); - _memory_group.manage(&_deltas_flattened); - _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); - _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened); - _deltas_permuted.allocator()->allocate(); + if(!_is_nhwc) + { + _memory_group.manage(&_deltas_permuted); + _memory_group.manage(&_deltas_flattened); + _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); + _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened); + _deltas_permuted.allocator()->allocate(); + } + else + { + _memory_group.manage(&_deltas_flattened); + _flatten_deltas_kernel.configure(deltas, &_deltas_flattened); + } const TensorShape flatten_shape_scores(1, total_num_anchors); _scores_flattened.allocator()->init(TensorInfo(flatten_shape_scores, 1, data_type)); // Permute and reshape scores - _memory_group.manage(&_scores_permuted); - _memory_group.manage(&_scores_flattened); - _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); - _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened); - _scores_permuted.allocator()->allocate(); + if(!_is_nhwc) + { + _memory_group.manage(&_scores_permuted); + _memory_group.manage(&_scores_flattened); + _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); + _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened); + _scores_permuted.allocator()->allocate(); + } + else + { + _memory_group.manage(&_scores_flattened); + _flatten_scores_kernel.configure(scores, &_scores_flattened); + } // Bounding box transform _memory_group.manage(&_all_proposals); @@ -141,11 +159,12 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW, DataLayout::NHWC); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(scores, deltas); - const int num_anchors = scores->dimension(2); - const int feat_width = scores->dimension(0); - const int feat_height = scores->dimension(1); + const int num_anchors = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::CHANNEL)); + const int feat_width = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::WIDTH)); + const int feat_height = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::HEIGHT)); const int num_images = scores->dimension(3); const int total_num_anchors = num_anchors * feat_width * feat_height; const int values_per_roi = info.values_per_roi(); @@ -156,14 +175,21 @@ Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens ARM_COMPUTE_RETURN_ON_ERROR(CLComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true); - ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); + TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true); + if(scores->data_layout() == DataLayout::NHWC) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(deltas, &deltas_permuted_info); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(scores, &scores_permuted_info); + } + else + { + ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); + ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); + } TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info)); - TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true); - ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); - TensorInfo scores_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true)); TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); @@ -236,9 +262,12 @@ void CLGenerateProposalsLayer::run() CLScheduler::get().enqueue(_compute_anchors_kernel, false); // Transpose and reshape the inputs - CLScheduler::get().enqueue(_permute_deltas_kernel, false); + if(!_is_nhwc) + { + CLScheduler::get().enqueue(_permute_deltas_kernel, false); + CLScheduler::get().enqueue(_permute_scores_kernel, false); + } CLScheduler::get().enqueue(_flatten_deltas_kernel, false); - CLScheduler::get().enqueue(_permute_scores_kernel, false); CLScheduler::get().enqueue(_flatten_scores_kernel, false); // Build the boxes diff --git a/tests/validation/CL/GenerateProposalsLayer.cpp b/tests/validation/CL/GenerateProposalsLayer.cpp index b4772fcf79..ac2d0ef969 100644 --- a/tests/validation/CL/GenerateProposalsLayer.cpp +++ b/tests/validation/CL/GenerateProposalsLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h" #include "arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/CL/functions/CLSlice.h" #include "tests/CL/CLAccessor.h" #include "tests/CL/CLArrayAccessor.h" @@ -48,6 +49,31 @@ inline void fill_tensor(U &&tensor, const std::vector &v) std::memcpy(tensor.data(), v.data(), sizeof(T) * v.size()); } +template +inline void fill_tensor(CLAccessor &&tensor, const std::vector &v) +{ + if(tensor.data_layout() == DataLayout::NCHW) + { + std::memcpy(tensor.data(), v.data(), sizeof(T) * v.size()); + } + else + { + const int channels = tensor.shape()[0]; + const int width = tensor.shape()[1]; + const int height = tensor.shape()[2]; + for(int x = 0; x < width; ++x) + { + for(int y = 0; y < height; ++y) + { + for(int c = 0; c < channels; ++c) + { + *(reinterpret_cast(tensor(Coordinates(c, x, y)))) = *(reinterpret_cast(v.data() + x + y * width + c * height * width)); + } + } + } + } +} + const auto ComputeAllInfoDataset = framework::dataset::make("ComputeAllInfo", { ComputeAnchorsInfo(10U, 10U, 1. / 16.f), @@ -165,8 +191,9 @@ DATA_TEST_CASE(IntegrationTestCaseAllAnchors, framework::DatasetMode::ALL, frame validate(CLAccessor(all_anchors), anchors_expected); } -DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL, framework::dataset::make("DataType", { DataType::F32 }), - data_type) +DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL, combine(framework::dataset::make("DataType", { DataType::F32 }), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + data_type, data_layout) { const int values_per_roi = 4; const int num_anchors = 2; @@ -260,9 +287,17 @@ DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL 8.91025957e-06f }); + TensorShape scores_shape = TensorShape(feature_width, feature_height, num_anchors); + TensorShape deltas_shape = TensorShape(feature_width, feature_height, values_per_roi * num_anchors); + if(data_layout == DataLayout::NHWC) + { + permute(scores_shape, PermutationVector(2U, 0U, 1U)); + permute(deltas_shape, PermutationVector(2U, 0U, 1U)); + } + // Inputs - CLTensor scores = create_tensor(TensorShape(feature_width, feature_height, num_anchors), data_type); - CLTensor bbox_deltas = create_tensor(TensorShape(feature_width, feature_height, values_per_roi * num_anchors), data_type); + CLTensor scores = create_tensor(scores_shape, data_type, 1, QuantizationInfo(), data_layout); + CLTensor bbox_deltas = create_tensor(deltas_shape, data_type, 1, QuantizationInfo(), data_layout); CLTensor anchors = create_tensor(TensorShape(values_per_roi, num_anchors), data_type); // Outputs @@ -282,7 +317,6 @@ DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL proposals.allocator()->allocate(); num_valid_proposals.allocator()->allocate(); scores_out.allocator()->allocate(); - // Fill inputs fill_tensor(CLAccessor(scores), scores_vector); fill_tensor(CLAccessor(bbox_deltas), bbx_vector); @@ -290,7 +324,6 @@ DATA_TEST_CASE(IntegrationTestCaseGenerateProposals, framework::DatasetMode::ALL // Run operator generate_proposals.run(); - // Gather num_valid_proposals num_valid_proposals.map(); const uint32_t N = *reinterpret_cast(num_valid_proposals.ptr_to_element(Coordinates(0, 0))); -- cgit v1.2.1