diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2021-01-18 21:15:59 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-01-20 16:28:27 +0000 |
commit | 7d61ff041826782d14e67b7f5b7a2864905ff38b (patch) | |
tree | 2e69c8a5fdabc6717b0691acdbbe7374d856902f /src | |
parent | da6a6eb3bc06ce8869ae3290853970d4c0ce412e (diff) | |
download | ComputeLibrary-7d61ff041826782d14e67b7f5b7a2864905ff38b.tar.gz |
Make all CL Concatenate kernels and functions state-less
Resolves COMPMID-3995
Change-Id: I84172bed20924f1d9ae3b4d14d7b321e9494296e
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4887
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src')
25 files changed, 1163 insertions, 925 deletions
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h index f23871d4db..11f1d2d7cf 100644 --- a/src/core/CL/CLKernels.h +++ b/src/core/CL/CLKernels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,6 @@ #include "src/core/CL/kernels/CLAccumulateKernel.h" #include "src/core/CL/kernels/CLActivationLayerKernel.h" #include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "src/core/CL/kernels/CLBitwiseKernel.h" @@ -48,7 +47,6 @@ #include "src/core/CL/kernels/CLCropKernel.h" #include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" #include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" @@ -92,7 +90,6 @@ #include "src/core/CL/kernels/CLHOGDescriptorKernel.h" #include "src/core/CL/kernels/CLHOGDetectorKernel.h" #include "src/core/CL/kernels/CLHarrisCornersKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLHistogramKernel.h" #include "src/core/CL/kernels/CLIm2ColKernel.h" #include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" @@ -144,9 +141,6 @@ #include "src/core/CL/kernels/CLWarpAffineKernel.h" #include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "src/core/CL/kernels/CLWeightsReshapeKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h deleted file mode 100644 index 54a89eb243..0000000000 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLBatchConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h deleted file mode 100644 index 6c73bd4bf4..0000000000 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLDepthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h deleted file mode 100644 index f4cb627052..0000000000 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLHeightConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _height_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h deleted file mode 100644 index 2af89e12eb..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 2 tensors. - * The input1 and input2 tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate2TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate2TensorsKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate2TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h deleted file mode 100644 index 0caf87114d..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 4 tensors. - * All input tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate4TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate4TensorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate4TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] input3 Third tensor info. Data types supported: same as @p input1 - * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h deleted file mode 100644 index 09c3f4455d..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLWidthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/src/core/gpu/cl/ClCompileContext.h b/src/core/gpu/cl/ClCompileContext.h new file mode 100644 index 0000000000..e69cc0200f --- /dev/null +++ b/src/core/gpu/cl/ClCompileContext.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_COMPILE_CONTEXT_H +#define ARM_COMPUTE_CL_COMPILE_CONTEXT_H + +#include "arm_compute/core/CL/CLCompileContext.h" + +namespace arm_compute +{ +namespace opencl +{ +using ClCompileContext = arm_compute::CLCompileContext; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_COMPILE_CONTEXT_H */ diff --git a/src/core/gpu/cl/IClKernel.h b/src/core/gpu/cl/IClKernel.h new file mode 100644 index 0000000000..52ea3c9183 --- /dev/null +++ b/src/core/gpu/cl/IClKernel.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICL_KERNEL_H +#define ARM_COMPUTE_ICL_KERNEL_H + +#include "arm_compute/core/ITensorInfo.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +using IClKernel = arm_compute::ICLKernel; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_ICL_KERNEL_H */ diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp index ccd6a5a0fc..c16ff1f028 100644 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" +#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -36,50 +36,54 @@ namespace arm_compute { +namespace opencl +{ +namespace kernels +{ namespace { -Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimZ) != dst->dimension(Window::DimZ)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(3) + batch_offset > dst->dimension(3)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, src, dst); return Status{}; } } // namespace -CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel() +ClBatchConcatenateKernel::ClBatchConcatenateKernel() : _batch_offset(0) { } -void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) +void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, batch_offset, dst)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({ src, dst }); _batch_offset = batch_offset; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0)); // Add build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); @@ -91,12 +95,12 @@ void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_co _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); // Configure kernel window - auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1)); + auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); + win.set(3, Window::Dimension(0, src->tensor_shape()[3], 1)); ICLKernel::configure_internal(win); - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); // Set config_id for enabling LWS tuning _config_id = "concatenate_"; @@ -104,26 +108,26 @@ void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_co _config_id += "_"; _config_id += support::cpp11::to_string(batch_offset); _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(0)); + _config_id += support::cpp11::to_string(src->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(1)); + _config_id += support::cpp11::to_string(src->dimension(1)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(2)); + _config_id += support::cpp11::to_string(src->dimension(2)); _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(3)); + _config_id += support::cpp11::to_string(src->dimension(3)); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, - unsigned int batch_offset, - const arm_compute::ITensorInfo *output) +Status ClBatchConcatenateKernel::validate(const arm_compute::ITensorInfo *src, + unsigned int batch_offset, + const arm_compute::ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, batch_offset, dst)); return Status{}; } -void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClBatchConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -135,7 +139,7 @@ void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3]; - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters + unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes); do @@ -147,4 +151,6 @@ void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w } while(window.slide_window_slice_3D(slice)); } +} // namespace opencl +} // namespace kernels } // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h new file mode 100644 index 0000000000..378a08aa4f --- /dev/null +++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H +#define ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the batch concatenate kernel. + * The src tensor will be concatenated into the destination tensor. + */ +class ClBatchConcatenateKernel : public IClKernel +{ +public: + /** Default constructor */ + ClBatchConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClBatchConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] dst Destination tensor. Data types supported: Same as @p src. + * + * @note: The dst tensor's low two dimensions can't be smaller than the src one's. + * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClBatchConcatenateKernel + * + * @param[in] src Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] dst Destination tensor info. Data types supported: Same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; + +private: + unsigned int _batch_offset; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp index eb5bfc2d86..e8893d76d2 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" +#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -36,49 +36,53 @@ namespace arm_compute { +namespace opencl +{ +namespace kernels +{ namespace { -Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(2) + depth_offset > dst->dimension(2)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, src, dst); return Status{}; } } // namespace -CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel() +ClDepthConcatenateKernel::ClDepthConcatenateKernel() : _depth_offset(0) { } -void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) +void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, depth_offset, dst)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({ src, dst }); _depth_offset = depth_offset; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0)); // Add build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); @@ -90,25 +94,25 @@ void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_co _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); // Configure kernel window - auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->tensor_shape().z(), 1)); + auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); + win.set(Window::DimZ, Window::Dimension(0, src->tensor_shape().z(), 1)); ICLKernel::configure_internal(win); - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, - unsigned int depth_offset, - const arm_compute::ITensorInfo *output) +Status ClDepthConcatenateKernel::validate(const arm_compute::ITensorInfo *src, + unsigned int depth_offset, + const arm_compute::ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, depth_offset, dst)); return Status{}; } -void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClDepthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -120,7 +124,7 @@ void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2]; - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters + unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes); do @@ -132,4 +136,6 @@ void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w } while(window.slide_window_slice_3D(slice)); } +} // namespace kernels +} // namespace opencl } // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h new file mode 100644 index 0000000000..144d7d48f2 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H +#define ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the depth concatenate kernel. + * The src tensor will be concatenated into the dst tensor. + */ +class ClDepthConcatenateKernel : public ICLKernel +{ +public: + /** Default constructor */ + ClDepthConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDepthConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] dst Destination tensor. Data types supported: Same as @p src. + * + * @note: The dst tensor's low two dimensions can't be smaller than the src one's. + * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClDepthConcatenateKernel + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] depth_offset The offset on the Z axis. + * @param[in] dst Destination tensor info. Data types supported: Same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; + +private: + unsigned int _depth_offset; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H */ diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp index 8aa7366d50..83e976e10f 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" +#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -37,60 +37,64 @@ namespace arm_compute { +namespace opencl +{ +namespace kernels +{ namespace { -Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) + height_offset > dst->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != output->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != dst->dimension(0)); for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i)); } - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4); return Status{}; } } // namespace -CLHeightConcatenateLayerKernel::CLHeightConcatenateLayerKernel() +ClHeightConcatenateKernel::ClHeightConcatenateKernel() : _height_offset(0) { } -Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) +Status ClHeightConcatenateKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, height_offset, dst)); return Status{}; } -void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output) +void ClHeightConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, height_offset, dst)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({ src, dst }); _height_offset = height_offset; // Add build options - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->dimension(0)); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, src->dimension(0)); CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(src->element_size())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2))); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2))); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); @@ -102,17 +106,17 @@ void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_c _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options()); // Configure kernel window - // The window needs to be based on input as we copy all the heights of input - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + // The window needs to be based on src as we copy all the heights of src + Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClHeightConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -125,4 +129,6 @@ void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window & add_4D_tensor_argument(idx, dst, window); enqueue(queue, *this, window, lws_hint()); } +} // namespace kernels +} // namespace opencl } // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h new file mode 100644 index 0000000000..88cd4c4d17 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H +#define ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the height concatenate kernel. + * The source tensor will be concatenated into the destination tensor. + */ +class ClHeightConcatenateKernel : public IClKernel +{ +public: + /** Default constructor */ + ClHeightConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClHeightConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the dst tensor. + * @param[out] dst Destination tensor. Data types supported: same as @p src. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClHeightConcatenateKernel + * + * @param[in] src Source tensor info. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the dst tensor. + * @param[in] dst Destination tensor info. Data types supported: same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; + +private: + unsigned int _height_offset; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp index d6697ba46b..6a2ab3b50f 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -37,62 +37,66 @@ namespace arm_compute { +namespace opencl +{ +namespace kernels +{ namespace { -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) > output->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); + ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) > dst->dimension(0)); for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i)); } - ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4); return Status{}; } } // namespace -Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +Status ClWidthConcatenate2TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, dst)); return Status{}; } -void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output) +void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, dst)); - auto padding_info = get_padding_info({ input1, input2, output }); + auto padding_info = get_padding_info({ src1, src2, dst }); - const unsigned int min_dimension = std::min(input1->dimension(0), input2->dimension(0)); + const unsigned int min_dimension = std::min(src1->dimension(0), src2->dimension(0)); const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); - const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration; + const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration; // Add build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2))); - build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0))); - build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0))); - build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2))); + build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0))); + build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0))); + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size())); + build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); // If input have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2); - if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo) + const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2); + if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo) { - const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform(); - const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); @@ -106,27 +110,27 @@ void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options()); // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); // Set config_id for enabling LWS tuning _config_id = "concatenate_width_x2_"; - _config_id += lower_string(string_from_data_type(input1->data_type())); + _config_id += lower_string(string_from_data_type(src1->data_type())); _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(0)); + _config_id += support::cpp11::to_string(src1->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(1)); + _config_id += support::cpp11::to_string(src1->dimension(1)); _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(0)); + _config_id += support::cpp11::to_string(src2->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(1)); + _config_id += support::cpp11::to_string(src2->dimension(1)); } -void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -147,4 +151,6 @@ void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window } while(window.slide_window_slice_4D(slice)); } +} // namespace kernels +} // namespace opencl } // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h new file mode 100644 index 0000000000..92715008cf --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H +#define ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the width concatenate kernel of 2 tensors. + * The src1 and src2 tensors will be concatenated into the dst tensor. + */ +class ClWidthConcatenate2TensorsKernel : public IClKernel +{ +public: + /** Default constructor */ + ClWidthConcatenate2TensorsKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate2TensorsKernel); + /** Initialise the kernel's sources and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor. Data types supported: All. + * @param[in] src2 Second source tensor. Data types supported: same as @p src1 + * @param[out] dst Destination tensor. Data types supported: Same as @p src1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate2TensorsKernel + * + * @param[in] src1 First tensor info. Data types supported: All. + * @param[in] src2 Second tensor info. Data types supported: same as @p src1 + * @param[in] dst Destination tensor info. Data types supported: Same as @p src1. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp index 7ecdd30224..4b49652a73 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -37,76 +37,80 @@ namespace arm_compute { +namespace opencl +{ +namespace kernels +{ namespace { -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, input3, input4, output); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) + input3->dimension(0) + input4->dimension(0) > output->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); + ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, src3, src4, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) + src3->dimension(0) + src4->dimension(0) > dst->dimension(0)); for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input3->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input4->dimension(i) != output->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src3->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src4->dimension(i) != dst->dimension(i)); } - ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4); return Status{}; } } // namespace -CLWidthConcatenate4TensorsKernel::CLWidthConcatenate4TensorsKernel() +ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel() { } -Status CLWidthConcatenate4TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output) +Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, input3, input4, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, src3, src4, dst)); return Status{}; } -void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context, - ITensorInfo *input1, ITensorInfo *input2, - ITensorInfo *input3, ITensorInfo *input4, - ITensorInfo *output) +void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context, + ITensorInfo *src1, ITensorInfo *src2, + ITensorInfo *src3, ITensorInfo *src4, + ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, input3, input4, output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, src3, src4, dst)); - auto padding_info = get_padding_info({ input1, input2, input3, input4, output }); - const unsigned int min_dimension = std::min(std::min(input1->dimension(0), input2->dimension(0)), std::min(input3->dimension(0), input4->dimension(0))); + auto padding_info = get_padding_info({ src1, src2, src3, src4, dst }); + const unsigned int min_dimension = std::min(std::min(src1->dimension(0), src2->dimension(0)), std::min(src3->dimension(0), src4->dimension(0))); const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); - const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration; + const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration; // Add build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2))); - build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0))); - build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0))); - build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(input3->dimension(0))); - build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(input4->dimension(0))); - build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) + input3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - - // If input have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2, input3, input4); - if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo) + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2))); + build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0))); + build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0))); + build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(src3->dimension(0))); + build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(src4->dimension(0))); + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size())); + build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) + src3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + + // If soources have different quantization info set quantization parameters needed for the re-quantization process + const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2, src3, src4); + if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo) { - const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform(); - const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform(); - const UniformQuantizationInfo iq3_info = input3->quantization_info().uniform(); - const UniformQuantizationInfo iq4_info = input4->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform(); + const UniformQuantizationInfo iq3_info = src3->quantization_info().uniform(); + const UniformQuantizationInfo iq4_info = src4->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); @@ -124,35 +128,35 @@ void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options()); // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); // Set config_id for enabling LWS tuning _config_id = "concatenate_width_x4_"; - _config_id += lower_string(string_from_data_type(input1->data_type())); + _config_id += lower_string(string_from_data_type(src1->data_type())); _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(0)); + _config_id += support::cpp11::to_string(src1->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(1)); + _config_id += support::cpp11::to_string(src1->dimension(1)); _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(0)); + _config_id += support::cpp11::to_string(src2->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(1)); + _config_id += support::cpp11::to_string(src2->dimension(1)); _config_id += "_"; - _config_id += support::cpp11::to_string(input3->dimension(0)); + _config_id += support::cpp11::to_string(src3->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input3->dimension(1)); + _config_id += support::cpp11::to_string(src3->dimension(1)); _config_id += "_"; - _config_id += support::cpp11::to_string(input4->dimension(0)); + _config_id += support::cpp11::to_string(src4->dimension(0)); _config_id += "_"; - _config_id += support::cpp11::to_string(input4->dimension(1)); + _config_id += support::cpp11::to_string(src4->dimension(1)); } -void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -177,4 +181,6 @@ void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window } while(window.slide_window_slice_4D(slice)); } +} // namespace kernels +} // namespace opencl } // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h new file mode 100644 index 0000000000..06d6c0399a --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H +#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the width concatenate kernel of 4 tensors. + * All source tensors will be concatenated into the destination tensor. + */ +class ClWidthConcatenate4TensorsKernel : public IClKernel +{ +public: + /** Default constructor */ + ClWidthConcatenate4TensorsKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate4TensorsKernel); + /** Initialise the kernel's sources and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor. Data types supported: All. + * @param[in] src2 Second source tensor. Data types supported: same as @p src1 + * @param[in] src3 Third source tensor. Data types supported: same as @p src1 + * @param[in] src4 Fourth source tensor. Data types supported: same as @p src1 + * @param[out] dst Destination tensor. Data types supported: same as @p src1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *src3, ITensorInfo *src4, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate4TensorsKernel + * + * @param[in] src1 First tensor info. Data types supported: All. + * @param[in] src2 Second tensor info. Data types supported: same as @p src1 + * @param[in] src3 Third tensor info. Data types supported: same as @p src1 + * @param[in] src4 Fourth tensor info. Data types supported: same as @p src1 + * @param[in] dst Destination tensor info. Data types supported: same as @p src1. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp index 30d0a481bd..8cbbc27444 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -36,58 +36,62 @@ namespace arm_compute { +namespace opencl +{ +namespace kernels +{ namespace { -Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0)); for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i)); } - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4); return Status{}; } } // namespace -CLWidthConcatenateLayerKernel::CLWidthConcatenateLayerKernel() +ClWidthConcatenateKernel::ClWidthConcatenateKernel() { } -Status CLWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output) +Status ClWidthConcatenateKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, width_offset, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, width_offset, dst)); return Status{}; } -void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output) +void ClWidthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, width_offset, dst)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({ src, dst }); - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, input->dimension(0)); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, src->dimension(0)); // Add build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2))); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2))); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) { - const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); - const UniformQuantizationInfo oqinfo = output->quantization_info().uniform(); + const UniformQuantizationInfo iqinfo = src->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform(); build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); @@ -98,16 +102,16 @@ void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_co // Create kernel _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options()); // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClWidthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); @@ -120,4 +124,6 @@ void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w add_4D_tensor_argument(idx, dst, window); enqueue(queue, *this, window, lws_hint()); } +} // namespace kernels +} // namespace opencl } // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h new file mode 100644 index 0000000000..3bffe52700 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H +#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the width concatenate kernel. + * The source tensor will be concatenated into the destination tensor. + */ +class ClWidthConcatenateKernel : public IClKernel +{ +public: + /** Default constructor */ + ClWidthConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in,out] dst Destination tensor. Data types supported: same as @p src. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenateKernel + * + * @param[in] src Source tensor info. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in] dst Destination tensor info. Data types supported: same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H */ diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 0c473a79c8..ea96e45bf8 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,242 +23,19 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" namespace arm_compute { -namespace experimental -{ -CLConcatenation::CLConcatenation() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) -{ -} - -void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_ERROR_ON(output == nullptr); - _axis = axis; - _num_inputs = inputs_vector.size(); - - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis); - std::vector<const ITensorInfo *> const_inputs_vector(inputs_vector.size()); - std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t; - }); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis)); - - unsigned int offset = 0; - switch(_axis) - { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = std::make_unique<CLWidthConcatenate2TensorsKernel>(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = std::make_unique<CLWidthConcatenate4TensorsKernel>(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLWidthConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLHeightConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLDepthConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<CLBatchConcatenateLayerKernel>(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} - -Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr); - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case 3: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(output->total_size() != 0) - { - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - return Status{}; -} - -void CLConcatenation::run(ITensorPack &tensors) -{ - if(tensors.empty()) - { - ARM_COMPUTE_ERROR("No inputs provided"); - } - - if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs)) - { - ARM_COMPUTE_ERROR("Configured with different number of inputs"); - } - - if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) - { - ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); - } - else - { - int i = 0; - for(auto &k : _concat_kernels) - { - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); - CLScheduler::get().enqueue_op(*k, pack, true); - ++i; - } - } -} -} // namespace experimental - struct CLConcatenateLayer::Impl { - std::vector<const ICLTensor *> srcs{}; - ICLTensor *dst{ nullptr }; - unsigned int num_inputs{ 0 }; - unsigned int axis{ 0 }; - std::unique_ptr<experimental::CLConcatenation> op{ nullptr }; + std::vector<const ICLTensor *> srcs{}; + ICLTensor *dst{ nullptr }; + unsigned int num_inputs{ 0 }; + unsigned int axis{ 0 }; + std::unique_ptr<opencl::ClConcatenate> op{ nullptr }; }; CLConcatenateLayer::CLConcatenateLayer() @@ -285,7 +62,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: _impl->dst = output; _impl->axis = axis; _impl->num_inputs = inputs_vector.size(); - _impl->op = std::make_unique<experimental::CLConcatenation>(); + _impl->op = std::make_unique<opencl::ClConcatenate>(); std::vector<ITensorInfo *> inputs_vector_info; for(unsigned int i = 0; i < inputs_vector.size(); ++i) @@ -298,7 +75,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis) { - return experimental::CLConcatenation::validate(inputs_vector, output, axis); + return opencl::ClConcatenate::validate(inputs_vector, output, axis); } void CLConcatenateLayer::run() diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h new file mode 100644 index 0000000000..049bf05dc1 --- /dev/null +++ b/src/runtime/gpu/cl/IClOperator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICL_OPERATOR_H +#define ARM_COMPUTE_ICL_OPERATOR_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/runtime/CL/ICLOperator.h" + +namespace arm_compute +{ +namespace opencl +{ +using IClOperator = experimental::ICLOperator; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_ICL_OPERATOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp new file mode 100644 index 0000000000..4385fcfaed --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConcatenate.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" + +namespace arm_compute +{ +namespace opencl +{ +ClConcatenate::ClConcatenate() + : _concat_kernels(), + _num_inputs(0), + _axis(Window::DimX) +{ +} + +void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis) +{ + ARM_COMPUTE_ERROR_ON(dst == nullptr); + _axis = axis; + _num_inputs = src_vector.size(); + + TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis); + std::vector<const ITensorInfo *> const_src_vector(src_vector.size()); + std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(t); + return t; + }); + + // dst auto inizialitation if not yet initialized + auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type()); + ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis)); + + unsigned int offset = 0; + switch(_axis) + { + case Window::DimX: + { + switch(_num_inputs) + { + case 2: + { + // Configure WidthConcatenate2Tensors kernel + auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>(); + kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } + case 4: + { + // Configure WidthConcatenate4Tensors kernel + auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>(); + kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } + default: + { + // Configure generic case WidthConcatenate kernels + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + } + break; + } + case Window::DimY: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + case Window::DimZ: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + case 3: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } +} + +Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis) +{ + ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr); + const unsigned int num_inputs = src_vector.size(); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); + ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); + + unsigned int offset = 0; + switch(axis) + { + case Window::DimX: + { + switch(num_inputs) + { + case 2: + // Validate WidthConcatenate2Tensors kernels if there are 2 inputs + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst)); + break; + case 4: + // Validate WidthConcatenate4Tensors kernels if there are 4 inputs + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst)); + break; + default: + // Validate generic case of WidthConcatenate kernel + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + break; + } + case Window::DimY: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + case Window::DimZ: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + case 3: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } + + if(dst->total_size() != 0) + { + TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis); + ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size()); + } + + return Status{}; +} + +void ClConcatenate::run(ITensorPack &tensors) +{ + if(tensors.empty()) + { + ARM_COMPUTE_ERROR("No inputs provided"); + } + + if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs)) + { + ARM_COMPUTE_ERROR("Configured with different number of inputs"); + } + + if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) + { + ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); + CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); + } + else + { + int i = 0; + for(auto &k : _concat_kernels) + { + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); + pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); + CLScheduler::get().enqueue_op(*k, pack, true); + ++i; + } + } +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h new file mode 100644 index 0000000000..112e2ac6b7 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConcatenate.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONCATENATE_H +#define ARM_COMPUTE_CLCONCATENATE_H + +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +#include <vector> + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0). + * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1). + * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2). + * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3). + */ +class ClConcatenate : public IClOperator +{ +public: + /** Default constructor */ + ClConcatenate(); + /** Initialise the kernel's inputs vector and dst. + * + * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, + * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] src_vector The vectors containing all the tensors to concatenate. Data types supported: All + * @param[out] dst Destination tensor. Data types supported: same as @p src_vector. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis); + /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate + * + * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, + * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. + * + * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All + * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + * + * @return a status + */ + static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + +private: + std::vector<std::unique_ptr<IClKernel>> _concat_kernels; + unsigned int _num_inputs; + unsigned int _axis; +}; +} // namespace opencl +} // namespace arm_comPUTE +#endif /* ARM_COMPUTE_CL_CONCATENATE_H */ |