diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-05-21 15:02:36 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-06-16 11:42:09 +0000 |
commit | bcd2352d7fd99a2f6aab220fa0c3b3f3119a1a4c (patch) | |
tree | a3e1880071bca828b1c58be71805ccce4b205e53 /src/runtime/NEON/functions | |
parent | eae658453199d67a41deccbeb78e55b8eea9e966 (diff) | |
download | ComputeLibrary-bcd2352d7fd99a2f6aab220fa0c3b3f3119a1a4c.tar.gz |
COMPMID-3391: Implement Async interfaces
Change-Id: I8168cea5056ff48a0253ebb8c88ea549a3ea69a2
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3335
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions')
-rw-r--r-- | src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp | 20 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEReductionOperation.cpp | 8 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEReshapeLayer.cpp | 38 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NESoftmaxLayer.cpp | 27 |
4 files changed, 63 insertions, 30 deletions
diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 82880bac85..dabbebacb4 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -31,9 +31,9 @@ namespace arm_compute NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(memory_manager), _permute_deltas_kernel(), - _flatten_deltas_kernel(), + _flatten_deltas(), _permute_scores_kernel(), - _flatten_scores_kernel(), + _flatten_scores(), _compute_anchors_kernel(), _bounding_box_kernel(), _pad_kernel(), @@ -95,12 +95,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d { _memory_group.manage(&_deltas_permuted); _permute_deltas_kernel.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); - _flatten_deltas_kernel.configure(&_deltas_permuted, &_deltas_flattened); + _flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened); _deltas_permuted.allocator()->allocate(); } else { - _flatten_deltas_kernel.configure(deltas, &_deltas_flattened); + _flatten_deltas.configure(deltas, &_deltas_flattened); } const TensorShape flatten_shape_scores(1, total_num_anchors); @@ -112,12 +112,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d { _memory_group.manage(&_scores_permuted); _permute_scores_kernel.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); - _flatten_scores_kernel.configure(&_scores_permuted, &_scores_flattened); + _flatten_scores.configure(&_scores_permuted, &_scores_flattened); _scores_permuted.allocator()->allocate(); } else { - _flatten_scores_kernel.configure(scores, &_scores_flattened); + _flatten_scores.configure(scores, &_scores_flattened); } Tensor *anchors_to_use = &_all_anchors; @@ -244,12 +244,12 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&deltas_permuted_info, &deltas_flattened_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&deltas_permuted_info, &deltas_flattened_info)); TensorInfo scores_flattened_info(scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true)); TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(&scores_permuted_info, &scores_flattened_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&scores_permuted_info, &scores_flattened_info)); TensorInfo *proposals_4_roi_values_to_use = &proposals_4_roi_values; TensorInfo proposals_4_roi_values_quantized(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); @@ -327,8 +327,8 @@ void NEGenerateProposalsLayer::run() NEScheduler::get().schedule(&_permute_scores_kernel, Window::DimY); } - NEScheduler::get().schedule(&_flatten_deltas_kernel, Window::DimY); - NEScheduler::get().schedule(&_flatten_scores_kernel, Window::DimY); + _flatten_deltas.run(); + _flatten_scores.run(); if(_is_qasymm8) { diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 80ebe6731a..a895147cc9 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -54,7 +54,7 @@ size_t reduction_window_split_dimension(unsigned int axis) } // namespace NEReductionOperation::NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape_kernel(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) + : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) { } @@ -91,7 +91,7 @@ Status NEReductionOperation::validate(const ITensorInfo *input, const ITensorInf if(is_reshape_required) { - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(output_internal, output)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(output_internal, output)); } return Status{}; @@ -171,7 +171,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i if(_is_reshape_required) { - _reshape_kernel.configure(output_internal, output); + _reshape.configure(output_internal, output); _output_internal.allocator()->allocate(); } } @@ -185,7 +185,7 @@ void NEReductionOperation::run() NEScheduler::get().schedule(&_reduction_kernel, _window_split); if(_is_reshape_required) { - NEScheduler::get().schedule(&_reshape_kernel, Window::DimY); + _reshape.run(); } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index 0a9f42d510..680abef026 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -25,13 +25,17 @@ #include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/runtime/Types.h" #include "support/MemorySupport.h" #include <utility> namespace arm_compute { -void NEReshapeLayer::configure(const ITensor *input, ITensor *output) +namespace experimental +{ +void NEReshapeLayer::configure(const ITensorInfo *input, ITensorInfo *output) { auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>(); k->configure(input, output); @@ -40,9 +44,41 @@ void NEReshapeLayer::configure(const ITensor *input, ITensor *output) Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { + return arm_compute::NEReshapeLayer::validate(input, output); +} + +MemoryRequirements NEReshapeLayer::workspace() const +{ + return MemoryRequirements{}; +} +} // namespace experimental + +void NEReshapeLayer::configure(const ITensor *input, ITensor *output) +{ + _input = input; + _output = output; + + auto k = arm_compute::support::cpp14::make_unique<NEReshapeLayerKernel>(); + k->configure(input->info(), output->info()); + _kernel = std::move(k); +} + +Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *output) +{ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, output)); return Status{}; } + +void NEReshapeLayer::run() +{ + InputOperatorTensors src_0 = std::make_pair(TensorType::ACL_SRC, _input); + OutputOperatorTensors dst_0 = std::make_pair(TensorType::ACL_DST, _output); + + std::vector<InputOperatorTensors *> inputs = { &src_0 }; + std::vector<OutputOperatorTensors *> outputs = { &dst_0 }; + + NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, inputs, outputs); +} } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index 5509edec87..5cd6a550af 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -32,8 +32,8 @@ namespace arm_compute { template <bool IS_LOG> NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_kernel_ptr(nullptr), _fill_border_kernel(), _reshape_kernel(), _max(), _tmp(), _input_flattened(), - _output_flattened(), _needs_flattening(false) + : _memory_group(std::move(memory_manager)), _max_kernel(), _softmax_kernel(), _flat_or_reshape_ptr(nullptr), _fill_border_kernel(), _reshape(), _max(), _tmp(), _input_flattened(), _output_flattened(), + _needs_flattening(false) { } @@ -46,23 +46,20 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ITensor // Initialize the flat input _input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten)); - // If we need to flatten the input, we can use NEFlattenKernel or NEReshapeKernel - // If the number of reduced axes is 3 (max dimension), which means collapsing all axes except the batch axis, we use NEFlattenKernel. - // In all other cases we have to use NEReshapeKernel // Note that the "other cases" include both: // 1. first_n_reduce_axes < 3: Reduce the first 1 (no need to reduce) or 2 dimensions (inclusive) // 2. first_n_reduce_axes == 4: Reduce all 4 dimensions. This can only be handled by NEReshapeKernel instead of NEFlattenKernel. if(first_n_reduce_axes == 3) { - auto flatten_kernel_ptr = support::cpp14::make_unique<NEFlattenLayerKernel>(); + auto flatten_kernel_ptr = support::cpp14::make_unique<NEFlattenLayer>(); flatten_kernel_ptr->configure(input, &_input_flattened); - _flat_or_reshape_kernel_ptr = std::move(flatten_kernel_ptr); + _flat_or_reshape_ptr = std::move(flatten_kernel_ptr); } else { - auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayerKernel>(); + auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayer>(); reshape_kernel_ptr->configure(input, &_input_flattened); - _flat_or_reshape_kernel_ptr = std::move(reshape_kernel_ptr); + _flat_or_reshape_ptr = std::move(reshape_kernel_ptr); } // We need to init the output tensor here. Indeed, the reshape kernel expects @@ -127,7 +124,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f _input_flattened.allocator()->allocate(); // Reshape the flat output into the requested (4D) output - _reshape_kernel.configure(&_output_flattened, output); + _reshape.configure(&_output_flattened, output); // Allocate the intermediate flat tensors _output_flattened.allocator()->allocate(); @@ -174,11 +171,11 @@ Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I if(first_n_reduce_axes == 3) { - ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &tensor_info_flat)); + ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &tensor_info_flat)); } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, &tensor_info_flat)); + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(input, &tensor_info_flat)); } } @@ -195,7 +192,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::run() if(_needs_flattening) { - NEScheduler::get().schedule(_flat_or_reshape_kernel_ptr.get(), Window::DimY); + _flat_or_reshape_ptr->run(); } NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); @@ -204,11 +201,11 @@ void NESoftmaxLayerGeneric<IS_LOG>::run() if(_needs_flattening) { - NEScheduler::get().schedule(&_reshape_kernel, Window::DimY); + _reshape.run(); } } template class NESoftmaxLayerGeneric<false>; template class NESoftmaxLayerGeneric<true>; -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute |