diff options
19 files changed, 611 insertions, 108 deletions
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h index 5b1b701a9d..1ce3821e81 100644 --- a/arm_compute/core/NEON/NEKernels.h +++ b/arm_compute/core/NEON/NEKernels.h @@ -89,6 +89,7 @@ #include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h" #include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h" #include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h" +#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEHistogramKernel.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h" diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h new file mode 100644 index 0000000000..540de68cb6 --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H__ +#define __ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the height concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEHeightConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEHeightConcatenateLayerKernel"; + } + /** Default constructor */ + NEHeightConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEHeightConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + */ + void configure(const ITensor *input, unsigned int height_offset, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] height_offset The starting offset on the Y axis for the output tensor. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + const ITensor *_input; + ITensor *_output; + unsigned int _height_offset; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 9d36405041..6782cda7fe 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1222,30 +1222,30 @@ inline TensorShape calculate_depth_concatenate_shape(const std::vector<T *> &inp return out_shape; } -/** Calculate the width concatenate output shape of a vector of tensors +/** Calculate the concatenate output shape of the concatenate operation along a single axis * - * @param[in] inputs_vector Vector containing the shapes of the inputs + * @param[in] input Vector containing the shapes of the inputs + * @param[in] axis Axis along which to concatenate the input tensors * * @return the calculated shape */ template <typename T> -inline TensorShape calculate_width_concatenate_shape(const std::vector<T *> &inputs_vector) +inline TensorShape calculate_concatenate_shape(const std::vector<T *> &input, size_t axis) { - TensorShape out_shape = extract_shape(inputs_vector[0]); + TensorShape out_shape = extract_shape(input[0]); - size_t width = 0; - for(const auto &tensor : inputs_vector) + size_t new_size = 0; + for(const auto &tensor : input) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); const TensorShape shape = extract_shape(tensor); - width += shape.x(); + new_size += shape[axis]; } - out_shape.set(0, width); + out_shape.set(axis, new_size); return out_shape; } - /** Calculate the stack output shape of a tensor * * @param[in] a Input tensor info diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 2cdc720fb6..b70d6ebc7c 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" #include "arm_compute/core/Types.h" #include <memory> @@ -41,6 +42,7 @@ class Status; /** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: * * -# @ref NEWidthConcatenateLayer (if underlying concatenation axis is 0). + * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). * -# @ref NEDepthConcatenateLayer (if underlying concatenation axis is 2). */ class NEConcatenateLayer : public IFunction @@ -51,21 +53,21 @@ public: /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer and @ref NEDepthConcatenateLayer. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayer. * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0 and 2. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. */ void configure(const std::vector<ITensor *> &inputs_vector, ITensor *output, DataLayoutDimension axis); /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer and @ref NEDepthConcatenateLayer. + * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayer, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayer. * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0 and 2. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. * * @return a status */ @@ -75,7 +77,13 @@ public: void run() override; private: - std::unique_ptr<IFunction> _concat_function; + void configure_h_concatenate(std::vector<ITensor *> inputs_vector, ITensor *output); + static Status validate_h_concatenate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output); + + std::unique_ptr<IFunction> _concat_function; + std::unique_ptr<NEHeightConcatenateLayerKernel[]> _hconcat_kernels; + unsigned int _num_inputs; + unsigned int _axis; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_NECONCATENATELAYER_H__ */ diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp new file mode 100644 index 0000000000..b8e204cfd8 --- /dev/null +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" +#include "arm_compute/core/NEON/wrapper/wrapper.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include <cstdint> + +using namespace arm_compute; + +namespace +{ +std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + const unsigned int num_elems_processed_per_iteration = 16 / output->element_size(); + + // The window needs to be based on input as we copy all the widths of input + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + bool window_changed = update_window_and_padding(win, input_access, output_access); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} + +Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, + DataType::U8, DataType::S8, DataType::QASYMM8, + DataType::U16, DataType::S16, DataType::F16, + DataType::U32, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY)); + for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); + } + + return Status{}; +} +} // namespace + +NEHeightConcatenateLayerKernel::NEHeightConcatenateLayerKernel() + : _input(nullptr), _output(nullptr), _height_offset(0) +{ +} + +void NEHeightConcatenateLayerKernel::configure(const ITensor *input, unsigned int height_offset, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), height_offset, output->info())); + + _input = input; + _output = output; + _height_offset = height_offset; + + // Configure kernel window + auto win_config = validate_and_configure_window(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); + + INEKernel::configure(std::get<1>(win_config)); +} + +Status NEHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); + return Status{}; +} + +void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + + // Offset output pointer to the correct position + uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; + + // Create iterators + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + vst1q_u8(output_ptr + output.offset(), vquantize(vdequantize(vld1q_u8(input.ptr()), input_qinfo), output_qinfo)); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = input.ptr(); + const auto out_ptr = output_ptr + output.offset(); + + wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); + }, + input, output); + } +} diff --git a/src/graph/nodes/ConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp index ade3f6e1a9..3ce09d0073 100644 --- a/src/graph/nodes/ConcatenateLayerNode.cpp +++ b/src/graph/nodes/ConcatenateLayerNode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -71,10 +71,9 @@ TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vect shapes.emplace_back(&input_descriptor.shape); } - // Calculate output shape - if(axis_idx == 0) + if(axis_idx < 2) { - output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(shapes); + output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(shapes, axis_idx); } else if(axis_idx == 2) { @@ -138,4 +137,4 @@ void ConcatenateLayerNode::accept(INodeVisitor &v) v.visit(*this); } } // namespace graph -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp index d0801a6768..6e42377a07 100644 --- a/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLWidthConcatenateLayer.cpp @@ -51,7 +51,7 @@ Status CLWidthConcatenateLayer::validate(const std::vector<ITensorInfo *> &input // Output auto inizialitation if not yet initialized TensorInfo tmp_output_info = *output->clone(); - const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector); + const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); switch(num_inputs) @@ -90,7 +90,7 @@ void CLWidthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector, { inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); } - const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector); + const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 21ab47d3fe..f764a126a0 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,9 @@ #include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h" #include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" @@ -35,15 +38,66 @@ namespace arm_compute { NEConcatenateLayer::NEConcatenateLayer() - : _concat_function(nullptr) + : _concat_function(nullptr), + _hconcat_kernels(), + _num_inputs(0), + _axis(Window::DimX) { } +Status NEConcatenateLayer::validate_h_concatenate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON(inputs_vector.size() < 2); + + // Output auto inizialitation if not yet initialized + TensorInfo tmp_output_info = *output->clone(); + TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimY); + auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); + + unsigned int offset = 0; + for(const auto &input : inputs_vector) + { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); + ARM_COMPUTE_RETURN_ON_ERROR(NEHeightConcatenateLayerKernel::validate(input, offset, &tmp_output_info)); + offset += input->dimension(Window::DimY); + } + + return Status{}; +} + +void NEConcatenateLayer::configure_h_concatenate(std::vector<ITensor *> inputs_vector, ITensor *output) +{ + _num_inputs = inputs_vector.size(); + + std::vector<ITensorInfo *> inputs_vector_info; + for(unsigned int i = 0; i < _num_inputs; ++i) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(inputs_vector.at(i)); + inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); + } + TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimY); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); + ARM_COMPUTE_ERROR_THROW_ON(validate_h_concatenate(inputs_vector_info, output->info())); + + unsigned int offset = 0; + + _hconcat_kernels = arm_compute::support::cpp14::make_unique<NEHeightConcatenateLayerKernel[]>(_num_inputs); + + for(unsigned int i = 0; i < _num_inputs; ++i) + { + _hconcat_kernels[i].configure(inputs_vector.at(i), offset, output); + offset += inputs_vector.at(i)->info()->dimension(Window::DimY); + } +} + void NEConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, ITensor *output, DataLayoutDimension axis) { ARM_COMPUTE_ERROR_ON(output == nullptr); - - switch(get_data_layout_dimension_index(output->info()->data_layout(), axis)) + _axis = get_data_layout_dimension_index(output->info()->data_layout(), axis); + switch(_axis) { case 0: { @@ -52,6 +106,11 @@ void NEConcatenateLayer::configure(const std::vector<ITensor *> &inputs_vector, _concat_function = std::move(func); break; } + case 1: + { + configure_h_concatenate(inputs_vector, output); + break; + } case 2: { auto func = support::cpp14::make_unique<NEDepthConcatenateLayer>(); @@ -73,6 +132,9 @@ Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vec case 0: ARM_COMPUTE_RETURN_ON_ERROR(NEWidthConcatenateLayer::validate(inputs_vector, output)); break; + case 1: + ARM_COMPUTE_RETURN_ON_ERROR(NEConcatenateLayer::validate_h_concatenate(inputs_vector, output)); + break; case 2: ARM_COMPUTE_RETURN_ON_ERROR(NEDepthConcatenateLayer::validate(inputs_vector, output)); break; @@ -84,7 +146,28 @@ Status NEConcatenateLayer::validate(const std::vector<ITensorInfo *> &inputs_vec void NEConcatenateLayer::run() { - ARM_COMPUTE_ERROR_ON(_concat_function == nullptr); - _concat_function->run(); + switch(_axis) + { + case 0: + case 2: + { + ARM_COMPUTE_ERROR_ON(_concat_function == nullptr); + _concat_function->run(); + break; + } + case 1: + { + for(unsigned i = 0; i < _num_inputs; ++i) + { + NEScheduler::get().schedule(_hconcat_kernels.get() + i, Window::DimY); + } + break; + } + default: + { + ARM_COMPUTE_ERROR("Axis not supported."); + break; + } + } } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp index 17c352b8f3..9fce13cbd7 100644 --- a/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEWidthConcatenateLayer.cpp @@ -48,7 +48,7 @@ inline Status NEWidthConcatenateLayer::validate_internal(const std::vector<Tenso // Output auto inizialitation if not yet initialized TensorInfo tmp_output_info = *output->clone(); - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector); + TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); auto_init_if_empty(tmp_output_info, output_shape, 1, inputs_vector[0]->data_type()); unsigned int width_offset = 0; @@ -71,7 +71,7 @@ inline void NEWidthConcatenateLayer::configure_internal(std::vector<TensorType * { inputs_vector_info.emplace_back(inputs_vector.at(i)->info()); } - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(inputs_vector); + TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, Window::DimX); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type()); diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index fddc7731fe..b6923c15c9 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -732,11 +732,11 @@ public: } }; -/** Data set containing tensor shapes for WidthConcatenateLayer. */ -class WidthConcatenateLayerShapes final : public ShapeDataset +/** Data set containing tensor shapes for ConcatenateLayer. */ +class ConcatenateLayerShapes final : public ShapeDataset { public: - WidthConcatenateLayerShapes() + ConcatenateLayerShapes() : ShapeDataset("Shape", { TensorShape{ 232U, 65U, 3U }, diff --git a/tests/validation/CL/WidthConcatenateLayer.cpp b/tests/validation/CL/WidthConcatenateLayer.cpp index 647e0413a1..493320b9ad 100644 --- a/tests/validation/CL/WidthConcatenateLayer.cpp +++ b/tests/validation/CL/WidthConcatenateLayer.cpp @@ -24,14 +24,14 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" -#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h" +#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/WidthConcatenateLayerFixture.h" +#include "tests/validation/fixtures/ConcatenateLayerFixture.h" namespace arm_compute { @@ -72,8 +72,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( inputs_vector_info_raw.emplace_back(&input); } - bool is_valid = bool(CLWidthConcatenateLayer::validate(inputs_vector_info_raw, - &output_info.clone()->set_is_resizable(false))); + bool is_valid = bool(CLConcatenateLayer::validate(inputs_vector_info_raw,&output_info.clone()->set_is_resizable(false),DataLayoutDimension::WIDTH )); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -93,26 +92,30 @@ TEST_CASE(Configuration, framework::DatasetMode::ALL) ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); // Create and configure function - CLWidthConcatenateLayer concat_layer; + CLConcatenateLayer concat_layer; - concat_layer.configure({ &src1, &src2, &src3 }, &dst); + concat_layer.configure({ &src1, &src2, &src3 }, &dst, DataLayoutDimension::WIDTH); } template <typename T> -using CLWidthConcatenateLayerFixture = WidthConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLWidthConcatenateLayer, T>; +using CLWidthConcatenateLayerFixture = ConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLConcatenateLayer, T>; TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), +FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", - DataType::F16))) + DataType::F16)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()), - framework::dataset::make("DataType", - DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(CLAccessor(_target), _reference); @@ -120,15 +123,18 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework TEST_SUITE_END() TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), +FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType", - DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 0))) { // Validate output validate(CLAccessor(_target), _reference); @@ -138,15 +144,17 @@ TEST_SUITE_END() TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), +FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", - DataType::QASYMM8))) + DataType::QASYMM8)), + framework::dataset::make("Axis", 0))) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType", - DataType::QASYMM8))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 0))) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index 11c454ea67..e9612f2223 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -98,17 +98,18 @@ TensorShape calculate_depth_concatenate_shape(const std::vector<TensorShape> &in return out_shape; } -TensorShape calculate_width_concatenate_shape(const std::vector<TensorShape> &input_shapes) +TensorShape calculate_concatenate_shape(const std::vector<TensorShape> &input_shapes, size_t axis) { ARM_COMPUTE_ERROR_ON(input_shapes.empty()); - TensorShape out_shape = input_shapes[0]; + ARM_COMPUTE_ERROR_ON(axis >= out_shape.num_dimensions()); - int width = std::accumulate(input_shapes.begin(), input_shapes.end(), 0, [](int sum, const TensorShape & shape) + const int new_size = std::accumulate(input_shapes.begin(), input_shapes.end(), 0, [&](int sum, const TensorShape & shape) { - return sum + shape.x(); + ARM_COMPUTE_ERROR_ON(axis >= shape.num_dimensions()); + return sum + shape[axis]; }); - out_shape.set(0, width); + out_shape.set(axis, new_size); return out_shape; } diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index 4d1d21440d..2e8c667a41 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -128,13 +128,14 @@ void fill_mask_from_pattern(uint8_t *mask, int cols, int rows, MatrixPattern pat */ TensorShape calculate_depth_concatenate_shape(const std::vector<TensorShape> &input_shapes); -/** Calculate output tensor shape give a vector of input tensor to concatenate +/** Calculate output tensor shape for the concatenate operation along a given axis * * @param[in] input_shapes Shapes of the tensors to concatenate across width. + * @param[in] axis Axis to use for the concatenate operation * * @return The shape of output concatenated tensor. */ -TensorShape calculate_width_concatenate_shape(const std::vector<TensorShape> &input_shapes); +TensorShape calculate_concatenate_shape(const std::vector<TensorShape> &input_shapes, size_t axis); /** Parameters of Harris Corners algorithm. */ struct HarrisCornersParameters diff --git a/tests/validation/NEON/HeightConcatenateLayer.cpp b/tests/validation/NEON/HeightConcatenateLayer.cpp new file mode 100644 index 0000000000..f5400f9246 --- /dev/null +++ b/tests/validation/NEON/HeightConcatenateLayer.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ConcatenateLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(NEON) +TEST_SUITE(HeightConcatenateLayer) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo1", { TensorInfo(TensorShape(23U, 15U, 5U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(22U, 27U, 5U), 1, DataType::F32), // Mismatching y dimension + TensorInfo(TensorShape(11U, 25U, 5U), 1, DataType::F32), // Mismatching total height + TensorInfo(TensorShape(16U, 25U, 5U), 1, DataType::F32) + }), + framework::dataset::make("InputInfo2", { TensorInfo(TensorShape(23U, 15U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(22U, 127U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 26U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 25U, 5U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(23U, 30U, 5U), 1, DataType::F16), + TensorInfo(TensorShape(22U, 12U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(11U, 7U, 5U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 50U, 5U), 1, DataType::F32) + })), + framework::dataset::make("Expected", { false, false, false, true })), + input_info1, input_info2, output_info,expected) +{ + std::vector<TensorInfo> inputs_vector_info; + inputs_vector_info.emplace_back(std::move(input_info1)); + inputs_vector_info.emplace_back(std::move(input_info2)); + + std::vector<ITensorInfo *> inputs_vector_info_raw; + for(auto &input : inputs_vector_info) + { + inputs_vector_info_raw.emplace_back(&input); + } + + bool is_valid = bool(NEConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(false), DataLayoutDimension::HEIGHT)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using NEHeightConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 1))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEHeightConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 1))) + +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 1))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 1))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/WidthConcatenateLayer.cpp b/tests/validation/NEON/WidthConcatenateLayer.cpp index 6e94e92d05..dba14ebb35 100644 --- a/tests/validation/NEON/WidthConcatenateLayer.cpp +++ b/tests/validation/NEON/WidthConcatenateLayer.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h" +#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" @@ -31,7 +31,7 @@ #include "tests/framework/Macros.h" #include "tests/framework/datasets/Datasets.h" #include "tests/validation/Validation.h" -#include "tests/validation/fixtures/WidthConcatenateLayerFixture.h" +#include "tests/validation/fixtures/ConcatenateLayerFixture.h" namespace arm_compute { @@ -72,27 +72,30 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( inputs_vector_info_raw.emplace_back(&input); } - bool is_valid = bool(NEWidthConcatenateLayer::validate(inputs_vector_info_raw, - &output_info.clone()->set_is_resizable(false))); + bool is_valid = bool(NEConcatenateLayer::validate(inputs_vector_info_raw, + &output_info.clone()->set_is_resizable(false),DataLayoutDimension::WIDTH)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* - template <typename T> -using NEWidthConcatenateLayerFixture = WidthConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEWidthConcatenateLayer, T>; +using NEWidthConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>; TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), +FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", - DataType::F32))) + DataType::F32)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType", - DataType::F32))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(Accessor(_target), _reference); @@ -102,15 +105,19 @@ TEST_SUITE_END() TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), +FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), framework::dataset::make("DataType", - DataType::QASYMM8))) + DataType::QASYMM8)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType", - DataType::QASYMM8))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 0))) + { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/fixtures/WidthConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h index 47a03ed865..db09957c09 100644 --- a/tests/validation/fixtures/WidthConcatenateLayerFixture.h +++ b/tests/validation/fixtures/ConcatenateLayerFixture.h @@ -33,7 +33,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" -#include "tests/validation/reference/WidthConcatenateLayer.h" +#include "tests/validation/reference/ConcatenateLayer.h" #include <random> @@ -44,11 +44,11 @@ namespace test namespace validation { template <typename TensorType, typename ITensorType, typename AccessorType, typename FunctionType, typename T> -class WidthConcatenateLayerValidationFixture : public framework::Fixture +class ConcatenateLayerValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape shape, DataType data_type) + void setup(TensorShape shape, DataType data_type, unsigned int axis) { // Create input shapes std::mt19937 gen(library->seed()); @@ -78,12 +78,12 @@ public: { // Decrease the dimension by a small percentage. Don't increase // as that could make tensor too large. - s.set(0, s[0] + 2 * static_cast<int>(s[0] * change_dis(gen))); + s.set(axis, s[axis] + 2 * static_cast<int>(s[axis] * change_dis(gen))); } } - _target = compute_target(shapes, qinfo, data_type); - _reference = compute_reference(shapes, qinfo, data_type); + _target = compute_target(shapes, qinfo, data_type, axis); + _reference = compute_reference(shapes, qinfo, data_type, axis); } protected: @@ -93,7 +93,7 @@ protected: library->fill_tensor_uniform(tensor, i); } - TensorType compute_target(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type) + TensorType compute_target(const std::vector<TensorShape> &shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type, unsigned int axis) { std::vector<TensorType> srcs; std::vector<ITensorType *> src_ptrs; @@ -107,13 +107,26 @@ protected: src_ptrs.emplace_back(&srcs.back()); } - TensorShape dst_shape = misc::shape_calculator::calculate_width_concatenate_shape(src_ptrs); - - TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]); + const TensorShape dst_shape = misc::shape_calculator::calculate_concatenate_shape(src_ptrs, axis); + TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]); // Create and configure function - FunctionType width_concat; - width_concat.configure(src_ptrs, &dst); + FunctionType concat; + switch(axis) + { + case 0: + concat.configure(src_ptrs, &dst, DataLayoutDimension::WIDTH); + break; + case 1: + concat.configure(src_ptrs, &dst, DataLayoutDimension::HEIGHT); + break; + case 2: + concat.configure(src_ptrs, &dst, DataLayoutDimension::CHANNEL); + break; + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } for(auto &src : srcs) { @@ -140,12 +153,12 @@ protected: } // Compute function - width_concat.run(); + concat.run(); return dst; } - SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type) + SimpleTensor<T> compute_reference(const std::vector<TensorShape> &shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type, unsigned int axis) { std::vector<SimpleTensor<T>> srcs; @@ -156,10 +169,9 @@ protected: fill(srcs.back(), j); } - const TensorShape dst_shape = calculate_width_concatenate_shape(shapes); + const TensorShape dst_shape = calculate_concatenate_shape(shapes, axis); SimpleTensor<T> dst{ dst_shape, data_type, 1, qinfo[shapes.size()] }; - - return reference::widthconcatenate_layer<T>(srcs, dst); + return reference::concatenate_layer<T>(srcs, dst, axis); } TensorType _target{}; diff --git a/tests/validation/fixtures/LSTMLayerFixture.h b/tests/validation/fixtures/LSTMLayerFixture.h index b30f1e534b..2cf83b8b3d 100644 --- a/tests/validation/fixtures/LSTMLayerFixture.h +++ b/tests/validation/fixtures/LSTMLayerFixture.h @@ -29,11 +29,11 @@ #include "tests/framework/Fixture.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/ArithmeticOperations.h" +#include "tests/validation/reference/ConcatenateLayer.h" #include "tests/validation/reference/FullyConnectedLayer.h" #include "tests/validation/reference/GEMM.h" #include "tests/validation/reference/PixelWiseMultiplication.h" #include "tests/validation/reference/Transpose.h" -#include "tests/validation/reference/WidthConcatenateLayer.h" namespace arm_compute { @@ -415,7 +415,7 @@ protected: scratch_inputs.emplace_back(std::move(cell_state_out)); scratch_inputs.emplace_back(std::move(forget_gate)); scratch_inputs.emplace_back(std::move(output)); - scratch = reference::widthconcatenate_layer(scratch_inputs, scratch); + scratch = reference::concatenate_layer(scratch_inputs, scratch, Window::DimX); _reference_scratch = std::move(scratch); return output_state_out; } diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp index 38543393ce..1440878829 100644 --- a/tests/validation/reference/WidthConcatenateLayer.cpp +++ b/tests/validation/reference/ConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,9 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "WidthConcatenateLayer.h" +#include "ConcatenateLayer.h" #include "tests/validation/Helpers.h" +#include "tests/validation/reference/Permute.h" namespace arm_compute { @@ -33,24 +34,22 @@ namespace validation { namespace reference { +namespace +{ template <typename T> SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst) { // Create reference std::vector<TensorShape> shapes; - for(const auto &src : srcs) { shapes.emplace_back(src.shape()); } - // Compute reference int width_offset = 0; const int width_out = dst.shape().x(); - // Set output tensor to 0 std::fill_n(dst.data(), dst.num_elements(), 0); - for(const auto &src : srcs) { ARM_COMPUTE_ERROR_ON(width_offset >= width_out); @@ -89,13 +88,43 @@ SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, } width_offset += width; } - return dst; } template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst); template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst); template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst); +} // namespace + +template <typename T> +SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst, unsigned int axis) +{ + switch(axis) + { + case Window::DimX: + { + return widthconcatenate_layer(srcs, dst); + } + case Window::DimY: + { + for(auto &t : srcs) + { + t = reference::permute<T>(t, PermutationVector(1U, 0U)); + } + dst = reference::permute<T>(dst, PermutationVector(1U, 0U)); + return reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(1U, 0U)); + } + default: + { + ARM_COMPUTE_ERROR("Not supported"); + return dst; + } + } +} + +template SimpleTensor<float> concatenate_layer(std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst, unsigned int axis); +template SimpleTensor<half> concatenate_layer(std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst, unsigned int axis); +template SimpleTensor<uint8_t> concatenate_layer(std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst, unsigned int axis); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/ConcatenateLayer.h index 0f1f428f10..14fd097eee 100644 --- a/tests/validation/reference/WidthConcatenateLayer.h +++ b/tests/validation/reference/ConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ -#define __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ +#ifndef __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__ +#define __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__ #include "tests/SimpleTensor.h" @@ -37,9 +37,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst); +SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst, unsigned int axis); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ */ +#endif /* __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__ */ |