From 09f24975437e2e141ba51a07055a9372b0d173a2 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 17 May 2019 18:14:40 +0100 Subject: COMPMID-2109: Remove CL/NE Width/Depth ConcatenateLayer functions. Change-Id: Icbda771abffbb45d4ed0958933c60ff9ace01314 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/1178 Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/graph/backends/GLES/GCFunctionsFactory.cpp | 41 +----- src/runtime/CL/functions/CLConcatenateLayer.cpp | 6 +- .../CL/functions/CLDepthConcatenateLayer.cpp | 107 --------------- src/runtime/CL/functions/CLLSTMLayer.cpp | 4 +- .../CL/functions/CLWidthConcatenateLayer.cpp | 143 --------------------- .../functions/GCDepthConcatenateLayer.cpp | 75 ----------- src/runtime/NEON/functions/NEConcatenateLayer.cpp | 31 ++++- .../NEON/functions/NEDepthConcatenateLayer.cpp | 108 ---------------- src/runtime/NEON/functions/NELSTMLayer.cpp | 27 ++-- .../NEON/functions/NEWidthConcatenateLayer.cpp | 117 ----------------- 10 files changed, 50 insertions(+), 609 deletions(-) delete mode 100644 src/runtime/CL/functions/CLDepthConcatenateLayer.cpp delete mode 100644 src/runtime/CL/functions/CLWidthConcatenateLayer.cpp delete mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp delete mode 100644 src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp delete mode 100644 src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp (limited to 'src') diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp index 0de58f5c28..13543dbf15 100644 --- a/src/graph/backends/GLES/GCFunctionsFactory.cpp +++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,43 +68,6 @@ struct GCEltwiseFunctions namespace detail { -// Specialize functions -template <> -std::unique_ptr create_concatenate_layer(ConcatenateLayerNode &node) -{ - ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl); - ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1); - - // Return nullptr if depth concatenate is switched off - if(!node.is_enabled()) - { - return nullptr; - } - - // Extract IO and info - std::vector inputs; - for(unsigned int i = 0; i < node.num_inputs(); ++i) - { - inputs.push_back(get_backing_tensor(node.input(i))); - } - typename GCTargetInfo::TensorType *output = get_backing_tensor(node.output(0)); - - // Create and configure function - auto func = support::cpp14::make_unique(); - func->configure(inputs, output); - - // Log info - ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " - << node.name() - << " Target " << GCTargetInfo::TargetType - << " Data Type: " << output->info()->data_type() - << " Shape: " << output->info()->tensor_shape() - << " Num Inputs: " << inputs.size() - << std::endl); - - return std::move(func); -} - template <> std::unique_ptr create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx) { @@ -282,7 +245,7 @@ std::unique_ptr GCFunctionFactory::create(INode *node, GraphContext & case NodeType::ConvolutionLayer: return detail::create_convolution_layer(*polymorphic_downcast(node), ctx); case NodeType::ConcatenateLayer: - return detail::create_concatenate_layer(*polymorphic_downcast(node)); + return detail::create_concatenate_layer(*polymorphic_downcast(node)); case NodeType::DepthwiseConvolutionLayer: return detail::create_depthwise_convolution_layer(*polymorphic_downcast(node)); case NodeType::EltwiseLayer: diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index b8224d2cce..0594a17a7a 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -23,11 +23,13 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" +#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Error.h" diff --git a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp deleted file mode 100644 index f687e54552..0000000000 --- a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLDepthConcatenateLayer::CLDepthConcatenateLayer() // NOLINT - : _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void CLDepthConcatenateLayer::configure(const std::vector &inputs_vector, ICLTensor *output) // NOLINT -{ - _num_inputs = inputs_vector.size(); - - std::vector inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - - _concat_kernels_vector.resize(_num_inputs); - _border_handlers_vector.resize(_num_inputs); - - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector_info, Window::DimZ); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLDepthConcatenateLayer::validate(inputs_vector_info, output->info())); - - unsigned int depth_offset = 0; - for(unsigned int i = 0; i < _num_inputs; i++) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); - _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue()); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } - - // Set valid region from shape - output->info()->set_valid_region(ValidRegion(Coordinates(), output_shape)); -} - -Status CLDepthConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimZ); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - unsigned int depth_offset = 0; - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, depth_offset, &tmp_output_info)); - depth_offset += input->dimension(2); - } - - return Status{}; -} - -void CLDepthConcatenateLayer::run() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - for(unsigned i = 0; i < _num_inputs; i++) - { - CLScheduler::get().enqueue(_border_handlers_vector[i], false); - CLScheduler::get().enqueue(_concat_kernels_vector[i], true); - } -} diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 4606a66bf2..85a81a8cd4 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -316,7 +316,7 @@ void CLLSTMLayer::configure(const ICLTensor *input, scratch_inputs.emplace_back(&_cell_state_out1); scratch_inputs.emplace_back(forget_gate_out); scratch_inputs.emplace_back(output_gate_out); - _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer); + _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX); input_gate_out->allocator()->allocate(); _cell_state_out1.allocator()->allocate(); forget_gate_out->allocator()->allocate(); @@ -497,7 +497,7 @@ Status CLLSTMLayer::validate(const ITensorInfo *input, inputs_vector_info_raw.push_back(&forget_gate); inputs_vector_info_raw.push_back(&output_gate_tmp); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer)); + ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX)); return Status{}; } diff --git a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp deleted file mode 100644 index a8667c3138..0000000000 --- a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLWidthConcatenateLayer::CLWidthConcatenateLayer() // NOLINT - : _concat_kernels_vector(), - _concat_x2_kernel(), - _concat_x4_kernel(), - _num_inputs(0) -{ -} - -Status CLWidthConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output) // NOLINT -{ - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], &tmp_output_info)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], &tmp_output_info)); - break; - default: - unsigned int width_offset = 0; - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info)); - width_offset += input->dimension(0); - } - break; - } - - return Status{}; -} - -void CLWidthConcatenateLayer::configure(std::vector inputs_vector, ICLTensor *output) // NOLINT -{ - _num_inputs = inputs_vector.size(); - - std::vector inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - - ARM_COMPUTE_ERROR_THROW_ON(CLWidthConcatenateLayer::validate(inputs_vector_info, output->info())); - - switch(_num_inputs) - { - case 2: - // Configure WidthConcatenate2Tensors kernel - _concat_x2_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), output); - break; - case 4: - // Configure WidthConcatenate4Tensors kernel - _concat_x4_kernel.configure(inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - break; - default: - // Configure generic case WidthConcatenate kernels - _concat_kernels_vector.resize(_num_inputs); - - unsigned int width_offset = 0; - for(unsigned int i = 0; i < _num_inputs; ++i) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output); - width_offset += inputs_vector.at(i)->info()->dimension(0); - } - break; - } -} - -void CLWidthConcatenateLayer::run() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - switch(_num_inputs) - { - case 2: - CLScheduler::get().enqueue(_concat_x2_kernel, true); - break; - case 4: - CLScheduler::get().enqueue(_concat_x4_kernel, true); - break; - default: - for(unsigned int i = 0; i < _num_inputs; ++i) - { - CLScheduler::get().enqueue(_concat_kernels_vector[i], true); - } - break; - } -} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp deleted file mode 100755 index b89aafa2e5..0000000000 --- a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -GCDepthConcatenateLayer::GCDepthConcatenateLayer() //NOLINT - : _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void GCDepthConcatenateLayer::configure(std::vector inputs_vector, IGCTensor *output) //NOLINT -{ - ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); - - _num_inputs = inputs_vector.size(); - - unsigned int depth_offset = 0; - - _concat_kernels_vector.reserve(_num_inputs); - _border_handlers_vector.reserve(_num_inputs); - - for(unsigned int i = 0; i < _num_inputs; i++) - { - auto concat_kernel = support::cpp14::make_unique(); - auto border_kernel = support::cpp14::make_unique(); - - concat_kernel->configure(inputs_vector.at(i), depth_offset, output); - border_kernel->configure(inputs_vector.at(i), concat_kernel->border_size(), BorderMode::CONSTANT, PixelValue()); - _concat_kernels_vector.emplace_back(std::move(concat_kernel)); - _border_handlers_vector.emplace_back(std::move(border_kernel)); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } -} - -void GCDepthConcatenateLayer::run() -{ - for(unsigned i = 0; i < _num_inputs; i++) - { - GCScheduler::get().dispatch(*_border_handlers_vector[i].get(), false); - GCScheduler::get().memory_barrier(); - GCScheduler::get().dispatch(*_concat_kernels_vector[i].get(), true); - } -} diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 71af560fb0..d338493e51 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -23,8 +23,9 @@ */ #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" +#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" +#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -44,7 +45,28 @@ NEConcatenateLayer::NEConcatenateLayer() { } -void NEConcatenateLayer::configure(const std::vector &inputs_vector, ITensor *output, size_t axis) +void NEConcatenateLayer::configure(std::vector inputs_vector, ITensor *output, size_t axis) +{ + configure_internal(std::move(inputs_vector), output, axis); +} + +void NEConcatenateLayer::configure(std::vector inputs_vector, ITensor *output, size_t axis) +{ + configure_internal(std::move(inputs_vector), output, axis); +} + +Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) +{ + return validate_internal(inputs_vector, output, axis); +} + +Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) +{ + return validate_internal(inputs_vector, output, axis); +} + +template +void NEConcatenateLayer::configure_internal(std::vector &&inputs_vector, ITensor *output, size_t axis) { ARM_COMPUTE_ERROR_ON(output == nullptr); _axis = axis; @@ -97,7 +119,8 @@ void NEConcatenateLayer::configure(const std::vector &inputs_vector, } } -Status NEConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) +template +Status NEConcatenateLayer::validate_internal(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); diff --git a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp deleted file mode 100644 index 8f070a2d7d..0000000000 --- a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -NEDepthConcatenateLayer::NEDepthConcatenateLayer() // NOLINT - : _inputs_vector(), - _concat_kernels_vector(), - _border_handlers_vector(), - _num_inputs(0) -{ -} - -void NEDepthConcatenateLayer::configure(const std::vector &inputs_vector, ITensor *output) // NOLINT -{ - _num_inputs = inputs_vector.size(); - - std::vector inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; i++) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector_info, Window::DimZ); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(NEDepthConcatenateLayer::validate(inputs_vector_info, output->info())); - - unsigned int depth_offset = 0; - _concat_kernels_vector.reserve(_num_inputs); - _border_handlers_vector.reserve(_num_inputs); - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto concat_kernel = support::cpp14::make_unique(); - auto border_kernel = support::cpp14::make_unique(); - concat_kernel->configure(inputs_vector.at(i), depth_offset, output); - border_kernel->configure(inputs_vector.at(i), concat_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast(0.f))); - _border_handlers_vector.emplace_back(std::move(border_kernel)); - _concat_kernels_vector.emplace_back(std::move(concat_kernel)); - - depth_offset += inputs_vector.at(i)->info()->dimension(2); - } - - // Set valid region from shape - output->info()->set_valid_region(ValidRegion(Coordinates(), output_shape)); -} - -Status NEDepthConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimZ); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - unsigned int depth_offset = 0; - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(NEDepthConcatenateLayerKernel::validate(input, depth_offset, &tmp_output_info)); - depth_offset += input->dimension(2); - } - - return Status{}; -} - -void NEDepthConcatenateLayer::run() -{ - for(unsigned i = 0; i < _num_inputs; ++i) - { - NEScheduler::get().schedule(_border_handlers_vector[i].get(), Window::DimX); - NEScheduler::get().schedule(_concat_kernels_vector[i].get(), Window::DimX); - } -} diff --git a/src/runtime/NEON/functions/NELSTMLayer.cpp b/src/runtime/NEON/functions/NELSTMLayer.cpp index 3d3c6a12fa..42b805794b 100644 --- a/src/runtime/NEON/functions/NELSTMLayer.cpp +++ b/src/runtime/NEON/functions/NELSTMLayer.cpp @@ -107,14 +107,14 @@ void NELSTMLayer::configure(const ITensor *input, inputs_vector.emplace_back(output_state_in); _memory_group.manage(&_forget_gate_out2); - _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2); + _concat_inputs_forget_gate.configure(inputs_vector, &_forget_gate_out2, Window::DimX); std::vector weights_vector; weights_vector.emplace_back(input_to_forget_weights); weights_vector.emplace_back(recurrent_to_forget_weights); - _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6); + _concat_weights_forget_gate.configure(weights_vector, &_forget_gate_out6, Window::DimX); _memory_group.manage(&_forget_gate_out5); _fully_connected_forget_gate.configure(&_forget_gate_out2, &_forget_gate_out6, forget_gate_bias, &_forget_gate_out5); @@ -165,7 +165,7 @@ void NELSTMLayer::configure(const ITensor *input, lstm_weights.emplace_back(lstm_params.input_to_input_weights()); lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights()); - _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2); + _concat_weights_input_gate.configure(lstm_weights, &_input_gate_out2, Window::DimX); _memory_group.manage(&_input_gate_out1); _memory_group.manage(&_input_gate_out4); @@ -234,7 +234,7 @@ void NELSTMLayer::configure(const ITensor *input, in_out_weights.emplace_back(input_to_output_weights); in_out_weights.emplace_back(recurrent_to_output_weights); - _concat_weights_output.configure(in_out_weights, &_output2); + _concat_weights_output.configure(in_out_weights, &_output2, Window::DimX); _memory_group.manage(&_output1); _memory_group.manage(&_output4); @@ -308,7 +308,7 @@ void NELSTMLayer::configure(const ITensor *input, scratch_inputs.emplace_back(&_cell_state_out1); scratch_inputs.emplace_back(forget_gate_out); scratch_inputs.emplace_back(output_gate_out); - _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer); + _concat_scratch_buffer.configure(scratch_inputs, scratch_buffer, Window::DimX); input_gate_out->allocator()->allocate(); _cell_state_out1.allocator()->allocate(); forget_gate_out->allocator()->allocate(); @@ -383,8 +383,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input, std::vector inputs_vector; inputs_vector.emplace_back(input); inputs_vector.emplace_back(output_state_in); - TensorInfo forget_gate_concat; - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector, &forget_gate_concat)); + const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0); + TensorInfo forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX)); // Validate forget gate ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_forget_weights, forget_gate_bias, &forget_gate)); @@ -409,8 +410,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input, std::vector lstm_weights; lstm_weights.emplace_back(lstm_params.input_to_input_weights()); lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights()); - TensorInfo lstm_gate_concat; - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(lstm_weights, &lstm_gate_concat)); + TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0); + TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX)); ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), lstm_params.input_gate_bias(), &input_gate)); if(lstm_params.has_peephole_opt()) @@ -445,8 +447,9 @@ Status NELSTMLayer::validate(const ITensorInfo *input, std::vector in_out_weights; in_out_weights.emplace_back(input_to_output_weights); in_out_weights.emplace_back(recurrent_to_output_weights); - TensorInfo in_out_gate_concat; - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(in_out_weights, &in_out_gate_concat)); + TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0); + TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX)); ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, input_to_output_weights, output_gate_bias, &output_gate_tmp)); @@ -485,7 +488,7 @@ Status NELSTMLayer::validate(const ITensorInfo *input, inputs_vector_info_raw.push_back(&forget_gate); inputs_vector_info_raw.push_back(&output_gate_tmp); - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer)); + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX)); return Status{}; } diff --git a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp deleted file mode 100644 index 25b5216305..0000000000 --- a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Tensor.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -NEWidthConcatenateLayer::NEWidthConcatenateLayer() - : _concat_kernels_vector(), - _num_inputs(0) -{ -} - -template -inline Status NEWidthConcatenateLayer::validate_internal(const std::vector &inputs_vector, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); - - // Output auto inizialitation if not yet initialized - TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); - - unsigned int width_offset = 0; - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayerKernel::validate(input, width_offset, &tmp_output_info)); - width_offset += input->dimension(0); - } - - return Status{}; -} -template -inline void NEWidthConcatenateLayer::configure_internal(std::vector &&inputs_vector, ITensor *output) -{ - _num_inputs = inputs_vector.size(); - - std::vector inputs_vector_info; - for(unsigned int i = 0; i < _num_inputs; ++i) - { - inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); - } - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(NEWidthConcatenateLayer::validate(inputs_vector_info, output->info())); - - unsigned int width_offset = 0; - - _concat_kernels_vector.resize(_num_inputs); - - for(unsigned int i = 0; i < _num_inputs; ++i) - { - _concat_kernels_vector[i].configure(inputs_vector.at(i), width_offset, output); - width_offset += inputs_vector.at(i)->info()->dimension(0); - } -} - -void NEWidthConcatenateLayer::configure(std::vector inputs_vector, ITensor *output) -{ - configure_internal(std::move(inputs_vector), output); -} - -void NEWidthConcatenateLayer::configure(std::vector inputs_vector, ITensor *output) -{ - configure_internal(std::move(inputs_vector), output); -} - -Status NEWidthConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output) -{ - return validate_internal(inputs_vector, output); -} - -Status NEWidthConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output) -{ - return validate_internal(inputs_vector, output); -} - -void NEWidthConcatenateLayer::run() -{ - for(unsigned i = 0; i < _num_inputs; ++i) - { - NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimY); - } -} -- cgit v1.2.1