From 338595bca8ab60492f10626860acb1ab3722b1ce Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 28 Jun 2019 14:09:53 +0100 Subject: COMPMID-2234 : Add support for axis 3 in NE/CLConcatenateLayer Change-Id: Ic86f89ece3afe72809bc69c6de6fee7d21daa1d4 Signed-off-by: Vidhya Sudhan Loganathan Reviewed-on: https://review.mlplatform.org/c/1440 Comments-Addressed: Arm Jenkins Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- arm_compute/core/CL/CLKernels.h | 1 + .../CL/kernels/CLBatchConcatenateLayerKernel.h | 83 ++++++++++ arm_compute/core/NEON/NEKernels.h | 1 + .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 90 ++++++++++ .../runtime/CL/functions/CLConcatenateLayer.h | 5 +- .../runtime/NEON/functions/NEConcatenateLayer.h | 5 +- src/core/CL/CLKernelLibrary.cpp | 2 +- src/core/CL/cl_kernels/concatenate.cl | 2 +- .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 168 +++++++++++++++++++ .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 2 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 181 +++++++++++++++++++++ src/runtime/CL/functions/CLConcatenateLayer.cpp | 21 +++ src/runtime/NEON/functions/NEConcatenateLayer.cpp | 13 ++ tests/validation/CL/BatchConcatenateLayer.cpp | 170 +++++++++++++++++++ tests/validation/NEON/BatchConcatenateLayer.cpp | 154 ++++++++++++++++++ tests/validation/reference/ConcatenateLayer.cpp | 10 ++ 16 files changed, 901 insertions(+), 7 deletions(-) create mode 100644 arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h create mode 100644 arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h create mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp create mode 100644 src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp create mode 100644 tests/validation/CL/BatchConcatenateLayer.cpp create mode 100644 tests/validation/NEON/BatchConcatenateLayer.cpp diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index cd5612c9ae..8fbc4770b0 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -28,6 +28,7 @@ #include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h" #include "arm_compute/core/CL/kernels/CLAccumulateKernel.h" #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h" diff --git a/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h new file mode 100644 index 0000000000..69571ad499 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the batch concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class CLBatchConcatenateLayerKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLBatchConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~CLBatchConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] batch_offset The offset on axis # 3. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + unsigned int _batch_offset; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h index 4023d82107..e41f299611 100644 --- a/arm_compute/core/NEON/NEKernels.h +++ b/arm_compute/core/NEON/NEKernels.h @@ -30,6 +30,7 @@ #include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h" +#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h" diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h new file mode 100644 index 0000000000..edd9470a3c --- /dev/null +++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/NEON/INEKernel.h" + +namespace arm_compute +{ +class ITensor; + +/** Interface for the batch concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class NEBatchConcatenateLayerKernel : public INEKernel +{ +public: + const char *name() const override + { + return "NEBatchConcatenateLayerKernel"; + } + /** Default constructor */ + NEBatchConcatenateLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default; + /** Default destructor */ + ~NEBatchConcatenateLayerKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const ITensor *input, unsigned int batch_offset, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel + * + * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] output Output tensor info. Data types supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + +private: + using BatchConcatFunction = void(const ITensor *in, ITensor *out, int batch_offset, const Window &window); + +private: + BatchConcatFunction *_func; + const ITensor *_input; + ITensor *_output; + unsigned int _batch_offset; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index c56fc117b9..b69930c7d3 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -44,6 +44,7 @@ class Status; * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0). * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1). * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2). + * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3). */ class CLConcatenateLayer : public IFunction { @@ -57,7 +58,7 @@ public: * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ void configure(const std::vector &inputs_vector, ICLTensor *output, size_t axis); /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer @@ -67,7 +68,7 @@ public: * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. * * @return a status */ diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 8c97efc4f0..953e3fa641 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -45,6 +45,7 @@ class Status; * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0). * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1). * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2). + * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3). */ class NEConcatenateLayer : public IFunction { @@ -58,7 +59,7 @@ public: * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ void configure(std::vector inputs_vector, ITensor *output, size_t axis); void configure(std::vector inputs_vector, ITensor *output, size_t axis); @@ -69,7 +70,7 @@ public: * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1 and 2. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. * * @return a status */ diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index c0875bebcd..db57bb93a6 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -188,7 +188,7 @@ const std::map CLKernelLibrary::_kernel_program_map = { "compare_less_quantized", "comparisons.cl" }, { "compare_lessequal", "comparisons.cl" }, { "compare_lessequal_quantized", "comparisons.cl" }, - { "concatenate_depth", "concatenate.cl" }, + { "concatenate", "concatenate.cl" }, { "concatenate_width", "concatenate.cl" }, { "concatenate_height", "concatenate.cl" }, { "concatenate_width_x2", "concatenate.cl" }, diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl index e365683958..5ccf746a4e 100644 --- a/src/core/CL/cl_kernels/concatenate.cl +++ b/src/core/CL/cl_kernels/concatenate.cl @@ -406,7 +406,7 @@ __kernel void concatenate_height( * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor * @param[in] offsets The offsets to the first valid element of the output tensor in bytes */ -__kernel void concatenate_depth( +__kernel void concatenate( TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst), int offset) diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp new file mode 100644 index 0000000000..86bf366346 --- /dev/null +++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLValidate.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Window.h" + +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +namespace +{ +std::pair validate_and_configure_window(ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) +{ + ARM_COMPUTE_UNUSED(batch_offset); + + const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); + + // The window needs to be based on output, except for the batch size + Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + // The total batch size is the concatenation of the batch size of the inputs + win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1)); + + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, + DataType::U16, DataType::S16, + DataType::U32, DataType::S32, + DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output); + + return Status{}; +} +} // namespace + +CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel() + : _input(nullptr), _output(nullptr), _batch_offset(0) +{ +} + +void CLBatchConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info())); + + _input = input; + _output = output; + _batch_offset = batch_offset; + + const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) + { + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate", build_opts.options())); + + // Configure kernel window + auto win_config = validate_and_configure_window(input->info(), batch_offset, output->info()); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); + + ICLKernel::configure_internal(std::get<1>(win_config)); + // Set config_id for enabling LWS tuning + _config_id = "concatenate_"; + _config_id += support::cpp11::to_string(3); + _config_id += "_"; + _config_id += support::cpp11::to_string(batch_offset); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(2)); + _config_id += "_"; + _config_id += support::cpp11::to_string(input->info()->dimension(3)); +} + +Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, + unsigned int batch_offset, + const arm_compute::ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), batch_offset, output->clone().get()).first); + return Status{}; +} + +void CLBatchConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + Window slice = window.first_slice_window_3D(); + + const int offset_to_first_elements_in_bytes = _batch_offset * _output->info()->strides_in_bytes()[3]; + + unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters + _kernel.setArg(idx, offset_to_first_elements_in_bytes); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice); + add_3D_tensor_argument(idx, _output, slice); + enqueue(queue, *this, slice, lws_hint()); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 5e1bbe944f..40b633b273 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -109,7 +109,7 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i } // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate_depth", build_opts.options())); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("concatenate", build_opts.options())); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info()); diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp new file mode 100644 index 0000000000..4263892c50 --- /dev/null +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/NEON/wrapper/wrapper.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include + +using namespace arm_compute; + +namespace +{ +template +void batch_concat(const ITensor *in, ITensor *out, int batch_offset, const Window &window) +{ + // Offset input + uint8_t *input_ptr = in->buffer() + in->info()->offset_first_element_in_bytes(); + + // Offset output + uint8_t *output_ptr = out->buffer() + out->info()->offset_first_element_in_bytes() + batch_offset * out->info()->strides_in_bytes()[3]; + + Iterator input(in, window); + Iterator output(out, window); + + const DataType dt = in->info()->data_type(); + const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); + if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast(output_ptr + output.offset()); + vst1q_u8(out_ptr, vquantize(vdequantize(vld1q_u8(in_ptr), input_qinfo), output_qinfo)); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast(output_ptr + output.offset()); + + wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); + }, + input, output); + } +} + +std::pair validate_and_configure_window(ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) +{ + ARM_COMPUTE_UNUSED(batch_offset); + + const unsigned int num_elems_processed_per_iteration = 16 / input->element_size(); + + // The window needs to be based on input as we copy all the batchs of input + Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1)); + + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} + +Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, + DataType::U16, DataType::S16, + DataType::U32, DataType::S32, + DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output); + + return Status{}; +} +} // namespace + +NEBatchConcatenateLayerKernel::NEBatchConcatenateLayerKernel() + : _func(nullptr), _input(nullptr), _output(nullptr), _batch_offset(0) +{ +} + +void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int batch_offset, ITensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info())); + + _func = nullptr; + _input = input; + _output = output; + _batch_offset = batch_offset; + + switch(input->info()->data_type()) + { + case DataType::S8: + case DataType::U8: + case DataType::QASYMM8: + _func = &batch_concat; + break; + case DataType::S16: + case DataType::U16: + case DataType::F16: + _func = &batch_concat; + break; + case DataType::S32: + case DataType::U32: + case DataType::F32: + _func = &batch_concat; + break; + default: + ARM_COMPUTE_ERROR("Unsupported data type."); + } + + // Configure kernel window + auto win_config = validate_and_configure_window(input->info(), batch_offset, output->info()); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); + + INEKernel::configure(std::get<1>(win_config)); +} + +Status NEBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, + unsigned int batch_offset, + const arm_compute::ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), batch_offset, output->clone().get()).first); + return Status{}; +} + +void NEBatchConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + ARM_COMPUTE_ERROR_ON(_func == nullptr); + + (*_func)(_input, _output, _batch_offset, window); +} diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 0594a17a7a..1d396f5ebf 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" +#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" @@ -124,6 +125,17 @@ void CLConcatenateLayer::configure(const std::vector &inputs_vector } break; } + case 3: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = support::cpp14::make_unique(); + kernel->configure(inputs_vector.at(i), offset, output); + offset += inputs_vector.at(i)->info()->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } default: ARM_COMPUTE_ERROR("Axis not supported"); } @@ -184,6 +196,15 @@ Status CLConcatenateLayer::validate(const std::vector &inputs_vec } break; } + case 3: + { + for(const auto &input : inputs_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output)); + offset += input->dimension(axis); + } + break; + } default: ARM_COMPUTE_ERROR("Axis not supported"); } diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index d338493e51..9a70d32843 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h" @@ -112,6 +113,13 @@ void NEConcatenateLayer::configure_internal(std::vector &&inputs_v _concat_kernels.emplace_back(std::move(kernel)); break; } + case 3: + { + auto kernel = support::cpp14::make_unique(); + kernel->configure(inputs_vector.at(i), offset, output); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } default: ARM_COMPUTE_ERROR("Axis not supported"); } @@ -146,6 +154,11 @@ Status NEConcatenateLayer::validate_internal(const std::vector ARM_COMPUTE_RETURN_ON_ERROR(NEDepthConcatenateLayerKernel::validate(input, offset, output)); break; } + case 3: + { + ARM_COMPUTE_RETURN_ON_ERROR(NEBatchConcatenateLayerKernel::validate(input, offset, output)); + break; + } default: ARM_COMPUTE_ERROR("Axis not supported"); } diff --git a/tests/validation/CL/BatchConcatenateLayer.cpp b/tests/validation/CL/BatchConcatenateLayer.cpp new file mode 100644 index 0000000000..b789569155 --- /dev/null +++ b/tests/validation/CL/BatchConcatenateLayer.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ConcatenateLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(CL) +TEST_SUITE(BatchConcatenateLayer) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo1", { TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(20U, 27U, 4U, 4U), 1, DataType::F32), // Mismatching x dimension + TensorInfo(TensorShape(23U, 26U, 4U, 3U), 1, DataType::F32), // Mismatching y dim + TensorInfo(TensorShape(23U, 27U, 4U, 3U), 1, DataType::F32), // Mismatching z dim + TensorInfo(TensorShape(16U, 27U, 3U, 6U), 1, DataType::F32) + }), + framework::dataset::make("InputInfo2", { TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 3U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 27U, 3U, 6U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F16), + TensorInfo(TensorShape(23U, 12U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 20U, 4U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 27U, 3U, 12U), 1, DataType::F32) + })), + framework::dataset::make("Expected", { false, false, false, false, true })), + input_info1, input_info2, output_info,expected) +{ + std::vector inputs_vector_info; + inputs_vector_info.emplace_back(std::move(input_info1)); + inputs_vector_info.emplace_back(std::move(input_info2)); + + std::vector inputs_vector_info_raw; + inputs_vector_info_raw.reserve(inputs_vector_info.size()); + for(auto &input : inputs_vector_info) + { + inputs_vector_info_raw.emplace_back(&input); + } + + bool is_valid = bool(CLConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(false), 3)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +TEST_CASE(Configuration, framework::DatasetMode::ALL) +{ + // Create tensors + CLTensor src1 = create_tensor(TensorShape(128U, 32U, 32U), DataType::F32, 1); + CLTensor src2 = create_tensor(TensorShape(128U, 32U, 32U), DataType::F32, 1); + CLTensor src3 = create_tensor(TensorShape(128U, 32U, 32U), DataType::F32, 1); + CLTensor dst; + + ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(src3.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLConcatenateLayer concat_layer; + + concat_layer.configure({ &src1, &src2, &src3 }, &dst, 3); +} +template +using CLBatchConcatenateLayerFixture = ConcatenateLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large3DShapes(), datasets::Small4DShapes()), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/BatchConcatenateLayer.cpp b/tests/validation/NEON/BatchConcatenateLayer.cpp new file mode 100644 index 0000000000..f95663dbd3 --- /dev/null +++ b/tests/validation/NEON/BatchConcatenateLayer.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ConcatenateLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(NEON) +TEST_SUITE(BatchConcatenateLayer) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo1", { TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(20U, 27U, 4U, 4U), 1, DataType::F32), // Mismatching x dimension + TensorInfo(TensorShape(23U, 26U, 4U, 3U), 1, DataType::F32), // Mismatching y dim + TensorInfo(TensorShape(23U, 27U, 4U, 3U), 1, DataType::F32), // Mismatching z dim + TensorInfo(TensorShape(16U, 27U, 3U, 6U), 1, DataType::F32) + }), + framework::dataset::make("InputInfo2", { TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 3U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 27U, 3U, 6U), 1, DataType::F32) + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F16), + TensorInfo(TensorShape(23U, 12U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 27U, 4U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(23U, 20U, 4U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(16U, 27U, 3U, 12U), 1, DataType::F32) + })), + framework::dataset::make("Expected", { false, false, false, false, true })), + input_info1, input_info2, output_info,expected) +{ + std::vector inputs_vector_info; + inputs_vector_info.emplace_back(std::move(input_info1)); + inputs_vector_info.emplace_back(std::move(input_info2)); + + std::vector inputs_vector_info_raw; + inputs_vector_info_raw.reserve(inputs_vector_info.size()); + for(auto &input : inputs_vector_info) + { + inputs_vector_info_raw.emplace_back(&input); + } + + bool is_valid = bool(NEConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(false), 3)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template +using NEBatchConcatenateLayerFixture = ConcatenateLayerValidationFixture; + +TEST_SUITE(Float) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("Axis", 3))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/ConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp index 6c90d74a0f..aa74ca2474 100644 --- a/tests/validation/reference/ConcatenateLayer.cpp +++ b/tests/validation/reference/ConcatenateLayer.cpp @@ -127,6 +127,16 @@ SimpleTensor concatenate_layer(std::vector> &srcs, SimpleTens dst = reference::permute(dst, PermutationVector(2U, 1U, 0U)); return reference::permute(widthconcatenate_layer(srcs, dst), PermutationVector(2U, 1U, 0U)); } + case 3: + { + for(auto &t : srcs) + { + t = reference::permute(t, PermutationVector(3U, 2U, 1U, 0U)); + } + dst = reference::permute(dst, PermutationVector(3U, 2U, 1U, 0U)); + auto ret = reference::permute(widthconcatenate_layer(srcs, dst), PermutationVector(3U, 2U, 1U, 0U)); + return ret; + } default: { ARM_COMPUTE_ERROR("Not supported"); -- cgit v1.2.1