From 7d61ff041826782d14e67b7f5b7a2864905ff38b Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 18 Jan 2021 21:15:59 +0000 Subject: Make all CL Concatenate kernels and functions state-less Resolves COMPMID-3995 Change-Id: I84172bed20924f1d9ae3b4d14d7b321e9494296e Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4887 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- Android.bp | 13 +- SConscript | 4 + .../runtime/CL/functions/CLConcatenateLayer.h | 69 +----- docs/00_introduction.dox | 20 +- src/core/CL/CLKernels.h | 8 +- .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 150 ------------ .../CL/kernels/CLBatchConcatenateLayerKernel.h | 82 ------- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 135 ----------- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 80 ------- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 128 ----------- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 77 ------- .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 150 ------------ .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 73 ------ .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 180 --------------- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 77 ------- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 123 ---------- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 74 ------ src/core/gpu/cl/ClCompileContext.h | 36 +++ src/core/gpu/cl/IClKernel.h | 37 +++ .../gpu/cl/kernels/ClBatchConcatenateKernel.cpp | 156 +++++++++++++ src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h | 77 +++++++ .../gpu/cl/kernels/ClDepthConcatenateKernel.cpp | 141 ++++++++++++ src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h | 77 +++++++ .../gpu/cl/kernels/ClHeightConcatenateKernel.cpp | 134 +++++++++++ .../gpu/cl/kernels/ClHeightConcatenateKernel.h | 74 ++++++ .../kernels/ClWidthConcatenate2TensorsKernel.cpp | 156 +++++++++++++ .../cl/kernels/ClWidthConcatenate2TensorsKernel.h | 70 ++++++ .../kernels/ClWidthConcatenate4TensorsKernel.cpp | 186 +++++++++++++++ .../cl/kernels/ClWidthConcatenate4TensorsKernel.h | 75 ++++++ .../gpu/cl/kernels/ClWidthConcatenateKernel.cpp | 129 +++++++++++ src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h | 71 ++++++ src/runtime/CL/functions/CLConcatenateLayer.cpp | 243 +------------------- src/runtime/gpu/cl/IClOperator.h | 37 +++ src/runtime/gpu/cl/operators/ClConcatenate.cpp | 254 +++++++++++++++++++++ src/runtime/gpu/cl/operators/ClConcatenate.h | 86 +++++++ 35 files changed, 1839 insertions(+), 1643 deletions(-) delete mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.h delete mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.h delete mode 100644 src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLHeightConcatenateLayerKernel.h delete mode 100644 src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp delete mode 100644 src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h delete mode 100644 src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp delete mode 100644 src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h delete mode 100644 src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLWidthConcatenateLayerKernel.h create mode 100644 src/core/gpu/cl/ClCompileContext.h create mode 100644 src/core/gpu/cl/IClKernel.h create mode 100644 src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp create mode 100644 src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h create mode 100644 src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp create mode 100644 src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h create mode 100644 src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp create mode 100644 src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h create mode 100644 src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp create mode 100644 src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h create mode 100644 src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp create mode 100644 src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h create mode 100644 src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp create mode 100644 src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h create mode 100644 src/runtime/gpu/cl/IClOperator.h create mode 100644 src/runtime/gpu/cl/operators/ClConcatenate.cpp create mode 100644 src/runtime/gpu/cl/operators/ClConcatenate.h diff --git a/Android.bp b/Android.bp index 41ed188e6d..4427bd4fee 100644 --- a/Android.bp +++ b/Android.bp @@ -82,7 +82,6 @@ cc_library_static { "src/core/CL/kernels/CLAccumulateKernel.cpp", "src/core/CL/kernels/CLActivationLayerKernel.cpp", "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp", - "src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp", "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp", "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp", "src/core/CL/kernels/CLBitwiseKernel.cpp", @@ -101,7 +100,6 @@ cc_library_static { "src/core/CL/kernels/CLCropKernel.cpp", "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp", "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp", - "src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp", "src/core/CL/kernels/CLDepthConvertLayerKernel.cpp", "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp", "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp", @@ -145,7 +143,6 @@ cc_library_static { "src/core/CL/kernels/CLHOGDescriptorKernel.cpp", "src/core/CL/kernels/CLHOGDetectorKernel.cpp", "src/core/CL/kernels/CLHarrisCornersKernel.cpp", - "src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp", "src/core/CL/kernels/CLHistogramKernel.cpp", "src/core/CL/kernels/CLIm2ColKernel.cpp", "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp", @@ -197,9 +194,6 @@ cc_library_static { "src/core/CL/kernels/CLWarpAffineKernel.cpp", "src/core/CL/kernels/CLWarpPerspectiveKernel.cpp", "src/core/CL/kernels/CLWeightsReshapeKernel.cpp", - "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp", - "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp", - "src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp", "src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp", "src/core/CL/kernels/CLWinogradInputTransformKernel.cpp", "src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp", @@ -437,6 +431,12 @@ cc_library_static { "src/core/cpu/kernels/add/sve/qsymm16.cpp", "src/core/cpu/kernels/floor/NEON/fp16.cpp", "src/core/cpu/kernels/floor/NEON/fp32.cpp", + "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp", + "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp", + "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp", + "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp", + "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp", + "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp", "src/core/helpers/SoftmaxHelpers.cpp", "src/core/helpers/WindowHelpers.cpp", "src/core/utils/ScaleUtils.cpp", @@ -781,6 +781,7 @@ cc_library_static { "src/runtime/cpu/operators/CpuFloor.cpp", "src/runtime/cpu/operators/CpuPermute.cpp", "src/runtime/cpu/operators/CpuReshape.cpp", + "src/runtime/gpu/cl/operators/ClConcatenate.cpp", "utils/CommonGraphOptions.cpp", "utils/GraphUtils.cpp", "utils/Utils.cpp", diff --git a/SConscript b/SConscript index 8b8e504832..121cf3220a 100644 --- a/SConscript +++ b/SConscript @@ -212,11 +212,15 @@ if env['opencl']: core_files += Glob('src/core/CL/gemm/native/*.cpp') core_files += Glob('src/core/CL/gemm/reshaped/*.cpp') core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp') + core_files += Glob('src/core/gpu/cl/*.cpp') + core_files += Glob('src/core/gpu/cl/kernels/*.cpp') runtime_files += Glob('src/runtime/CL/*.cpp') runtime_files += Glob('src/runtime/CL/functions/*.cpp') runtime_files += Glob('src/runtime/CL/gemm/*.cpp') runtime_files += Glob('src/runtime/CL/tuners/*.cpp') + runtime_files += Glob('src/runtime/gpu/cl/*.cpp') + runtime_files += Glob('src/runtime/gpu/cl/operators/*.cpp') graph_files += Glob('src/graph/backends/CL/*.cpp') diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h index 5e7003a112..bfc8a39ac9 100644 --- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h +++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_CLCONCATENATELAYER_H #define ARM_COMPUTE_CLCONCATENATELAYER_H -#include "arm_compute/runtime/CL/ICLOperator.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/Types.h" @@ -43,10 +42,10 @@ class Status; /** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: * - * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0). - * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2). - * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3). + * -# @ref opencl::kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0). + * -# @ref opencl::kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1). + * -# @ref opencl::kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2). + * -# @ref opencl::kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3). */ class CLConcatenateLayer : public IFunction { @@ -66,7 +65,8 @@ public: /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel, + * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel. * * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. @@ -76,7 +76,8 @@ public: /** Initialise the kernel's inputs vector and output. * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel, + * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel. * * @param[in] compile_context The compile context to be used. * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All @@ -87,7 +88,8 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer * * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. + * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel, + * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel. * * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All. * @param[in] output Output tensor info. Data types supported: Same as @p input. @@ -104,54 +106,5 @@ private: struct Impl; std::unique_ptr _impl; }; - -namespace experimental -{ -/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: - * - * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0). - * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1). - * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2). - * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3). - */ -class CLConcatenation : public ICLOperator -{ -public: - /** Default constructor */ - CLConcatenation(); - /** Initialise the kernel's inputs vector and output. - * - * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. - * - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All - * @param[out] output Output tensor. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - */ - void configure(const CLCompileContext &compile_context, const std::vector &inputs_vector, ITensorInfo *output, size_t axis); - /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer - * - * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel. - * - * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - * - * @return a status - */ - static Status validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - -private: - std::vector> _concat_kernels; - unsigned int _num_inputs; - unsigned int _axis; -}; -} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_CLCONCATENATELAYER_H */ diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 9c0020da66..af78a70abc 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -165,7 +165,7 @@ v20.11 Public major release - @ref NEGEMMLowpMatrixAReductionKernel - @ref NEGEMMLowpMatrixBReductionKernel - Removed padding from OpenCL kernels: - - @ref CLBatchConcatenateLayerKernel + - CLBatchConcatenateLayerKernel - @ref CLElementwiseOperationKernel - @ref CLBatchNormalizationLayerKernel - @ref CLPoolingLayerKernel @@ -184,17 +184,17 @@ v20.11 Public major release - @ref CLDepthwiseConvolutionLayer3x3NHWCKernel - @ref CLActivationLayerKernel - @ref CLWinogradFilterTransformKernel - - @ref CLWidthConcatenateLayerKernel - - @ref CLWidthConcatenate4TensorsKernel - - @ref CLWidthConcatenate2TensorsKernel + - CLWidthConcatenateLayerKernel + - CLWidthConcatenate4TensorsKernel + - CLWidthConcatenate2TensorsKernel - @ref CLLogits1DMaxShiftExpSumKernel - @ref CLLogits1DNormKernel - - @ref CLHeightConcatenateLayerKernel + - CLHeightConcatenateLayerKernel - @ref CLGEMMMatrixMultiplyKernel - @ref CLGEMMLowpQuantizeDownInt32ScaleKernel - @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel - @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - - @ref CLDepthConcatenateLayerKernel + - CLDepthConcatenateLayerKernel - @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel - Removed OpenCL kernels / functions: - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel @@ -721,7 +721,7 @@ v19.08 Public major release - @ref CLNegLayer - @ref CLPReluLayer - @ref CLSinLayer - - @ref CLBatchConcatenateLayerKernel + - CLBatchConcatenateLayerKernel - @ref CLDepthToSpaceLayerKernel / @ref CLDepthToSpaceLayer - @ref CLGEMMLowpMatrixMultiplyNativeKernel - CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel @@ -773,7 +773,7 @@ v19.05 Public major release - @ref CLFFTScaleKernel - @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel - @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel - - @ref CLHeightConcatenateLayerKernel + - CLHeightConcatenateLayerKernel - @ref CLDirectDeconvolutionLayer - @ref CLFFT1D - @ref CLFFT2D @@ -1011,7 +1011,7 @@ v18.05 Public major release - @ref CLCopy / @ref CLCopyKernel - @ref CLLSTMLayer - @ref CLRNNLayer - - CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel + - CLWidthConcatenateLayer / CLWidthConcatenateLayerKernel - @ref CLWinogradFilterTransformKernel / @ref CLWinogradInputTransformKernel / @ref CLWinogradConvolutionLayer - @ref CLWinogradInputTransformKernel / @ref CLWinogradInputTransform - New Neon kernels / functions: @@ -1220,7 +1220,7 @@ v17.06 Public major release - User can specify his own scheduler by implementing the @ref IScheduler interface. - New OpenCL kernels / functions: - @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer - - @ref CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer + - CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer - @ref CLHOGOrientationBinningKernel @ref CLHOGBlockNormalizationKernel, @ref CLHOGDetectorKernel / @ref CLHOGDescriptor @ref CLHOGDetector @ref CLHOGGradient @ref CLHOGMultiDetection - CLLocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedLayer - @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h index f23871d4db..11f1d2d7cf 100644 --- a/src/core/CL/CLKernels.h +++ b/src/core/CL/CLKernels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,6 @@ #include "src/core/CL/kernels/CLAccumulateKernel.h" #include "src/core/CL/kernels/CLActivationLayerKernel.h" #include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "src/core/CL/kernels/CLBitwiseKernel.h" @@ -48,7 +47,6 @@ #include "src/core/CL/kernels/CLCropKernel.h" #include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" #include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" @@ -92,7 +90,6 @@ #include "src/core/CL/kernels/CLHOGDescriptorKernel.h" #include "src/core/CL/kernels/CLHOGDetectorKernel.h" #include "src/core/CL/kernels/CLHarrisCornersKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLHistogramKernel.h" #include "src/core/CL/kernels/CLIm2ColKernel.h" #include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" @@ -144,9 +141,6 @@ #include "src/core/CL/kernels/CLWarpAffineKernel.h" #include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "src/core/CL/kernels/CLWeightsReshapeKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp deleted file mode 100644 index ccd6a5a0fc..0000000000 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output); - - return Status{}; -} -} // namespace - -CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel() - : _batch_offset(0) -{ -} - -void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - _batch_offset = batch_offset; - - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); - - // Configure kernel window - auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1)); - ICLKernel::configure_internal(win); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - // Set config_id for enabling LWS tuning - _config_id = "concatenate_"; - _config_id += support::cpp11::to_string(3); - _config_id += "_"; - _config_id += support::cpp11::to_string(batch_offset); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(2)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(3)); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, - unsigned int batch_offset, - const arm_compute::ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output)); - return Status{}; -} - -void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - Window slice = window.first_slice_window_3D(); - - const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3]; - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx, offset_to_first_elements_in_bytes); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, src, slice); - add_3D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h deleted file mode 100644 index 54a89eb243..0000000000 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLBatchConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp deleted file mode 100644 index eb5bfc2d86..0000000000 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output); - - return Status{}; -} -} // namespace - -CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel() - : _depth_offset(0) -{ -} - -void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - _depth_offset = depth_offset; - - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); - - // Configure kernel window - auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->tensor_shape().z(), 1)); - ICLKernel::configure_internal(win); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, - unsigned int depth_offset, - const arm_compute::ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output)); - return Status{}; -} - -void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - Window slice = window.first_slice_window_3D(); - - const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2]; - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx, offset_to_first_elements_in_bytes); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, src, slice); - add_3D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h deleted file mode 100644 index 6c73bd4bf4..0000000000 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLDepthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp deleted file mode 100644 index 8aa7366d50..0000000000 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY)); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != output->dimension(0)); - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -CLHeightConcatenateLayerKernel::CLHeightConcatenateLayerKernel() - : _height_offset(0) -{ -} - -Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output)); - return Status{}; -} - -void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - _height_offset = height_offset; - - // Add build options - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->dimension(0)); - - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2))); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options()); - // Configure kernel window - - // The window needs to be based on input as we copy all the heights of input - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - unsigned int idx = 0; - add_4D_tensor_argument(idx, src, window); - add_4D_tensor_argument(idx, dst, window); - enqueue(queue, *this, window, lws_hint()); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h deleted file mode 100644 index f4cb627052..0000000000 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLHeightConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _height_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp deleted file mode 100644 index d6697ba46b..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/helpers/tensor_info.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) > output->dimension(0)); - - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output)); - return Status{}; -} - -void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output)); - - auto padding_info = get_padding_info({ input1, input2, output }); - - const unsigned int min_dimension = std::min(input1->dimension(0), input2->dimension(0)); - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); - const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration; - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2))); - build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0))); - build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0))); - build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - - // If input have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2); - if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo) - { - const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform(); - const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options()); - - // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); - - // Set config_id for enabling LWS tuning - _config_id = "concatenate_width_x2_"; - _config_id += lower_string(string_from_data_type(input1->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(1)); -} - -void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_4D(); - - const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, src0, slice); - add_4D_tensor_argument(idx, src1, slice); - add_4D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, window, lws_hint()); - } - while(window.slide_window_slice_4D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h deleted file mode 100644 index 2af89e12eb..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 2 tensors. - * The input1 and input2 tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate2TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate2TensorsKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate2TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp deleted file mode 100644 index 7ecdd30224..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/helpers/tensor_info.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, input3, input4, output); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) + input3->dimension(0) + input4->dimension(0) > output->dimension(0)); - - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input3->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input4->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -CLWidthConcatenate4TensorsKernel::CLWidthConcatenate4TensorsKernel() -{ -} - -Status CLWidthConcatenate4TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, input3, input4, output)); - return Status{}; -} - -void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context, - ITensorInfo *input1, ITensorInfo *input2, - ITensorInfo *input3, ITensorInfo *input4, - ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, input3, input4, output)); - - auto padding_info = get_padding_info({ input1, input2, input3, input4, output }); - const unsigned int min_dimension = std::min(std::min(input1->dimension(0), input2->dimension(0)), std::min(input3->dimension(0), input4->dimension(0))); - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); - const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration; - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2))); - build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0))); - build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0))); - build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(input3->dimension(0))); - build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(input4->dimension(0))); - build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) + input3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - - // If input have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2, input3, input4); - if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo) - { - const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform(); - const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform(); - const UniformQuantizationInfo iq3_info = input3->quantization_info().uniform(); - const UniformQuantizationInfo iq4_info = input4->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); - build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset)); - build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale)); - build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset)); - build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options()); - - // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); - - // Set config_id for enabling LWS tuning - _config_id = "concatenate_width_x4_"; - _config_id += lower_string(string_from_data_type(input1->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input3->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input3->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input4->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input4->dimension(1)); -} - -void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); - const auto src2 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2)); - const auto src3 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - Window slice = window.first_slice_window_4D(); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, src0, slice); - add_4D_tensor_argument(idx, src1, slice); - add_4D_tensor_argument(idx, src2, slice); - add_4D_tensor_argument(idx, src3, slice); - add_4D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, window, lws_hint()); - } - while(window.slide_window_slice_4D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h deleted file mode 100644 index 0caf87114d..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 4 tensors. - * All input tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate4TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate4TensorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate4TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] input3 Third tensor info. Data types supported: same as @p input1 - * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp deleted file mode 100644 index 30d0a481bd..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0)); - - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -CLWidthConcatenateLayerKernel::CLWidthConcatenateLayerKernel() -{ -} - -Status CLWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, width_offset, output)); - return Status{}; -} - -void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, input->dimension(0)); - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2))); - - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); - const UniformQuantizationInfo oqinfo = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options()); - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - unsigned int idx = 0; - add_4D_tensor_argument(idx, src, window); - add_4D_tensor_argument(idx, dst, window); - enqueue(queue, *this, window, lws_hint()); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h deleted file mode 100644 index 09c3f4455d..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLWidthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ diff --git a/src/core/gpu/cl/ClCompileContext.h b/src/core/gpu/cl/ClCompileContext.h new file mode 100644 index 0000000000..e69cc0200f --- /dev/null +++ b/src/core/gpu/cl/ClCompileContext.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_COMPILE_CONTEXT_H +#define ARM_COMPUTE_CL_COMPILE_CONTEXT_H + +#include "arm_compute/core/CL/CLCompileContext.h" + +namespace arm_compute +{ +namespace opencl +{ +using ClCompileContext = arm_compute::CLCompileContext; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_COMPILE_CONTEXT_H */ diff --git a/src/core/gpu/cl/IClKernel.h b/src/core/gpu/cl/IClKernel.h new file mode 100644 index 0000000000..52ea3c9183 --- /dev/null +++ b/src/core/gpu/cl/IClKernel.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICL_KERNEL_H +#define ARM_COMPUTE_ICL_KERNEL_H + +#include "arm_compute/core/ITensorInfo.h" +#include "src/core/CL/ICLKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +using IClKernel = arm_compute::ICLKernel; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_ICL_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp new file mode 100644 index 0000000000..c16ff1f028 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimZ) != dst->dimension(Window::DimZ)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(3) + batch_offset > dst->dimension(3)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, src, dst); + + return Status{}; +} +} // namespace + +ClBatchConcatenateKernel::ClBatchConcatenateKernel() + : _batch_offset(0) +{ +} + +void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, batch_offset, dst)); + + auto padding_info = get_padding_info({ src, dst }); + + _batch_offset = batch_offset; + + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0)); + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) + { + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + + // Create kernel + _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); + + // Configure kernel window + auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); + win.set(3, Window::Dimension(0, src->tensor_shape()[3], 1)); + ICLKernel::configure_internal(win); + + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); + + // Set config_id for enabling LWS tuning + _config_id = "concatenate_"; + _config_id += support::cpp11::to_string(3); + _config_id += "_"; + _config_id += support::cpp11::to_string(batch_offset); + _config_id += "_"; + _config_id += support::cpp11::to_string(src->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src->dimension(2)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src->dimension(3)); + + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); +} + +Status ClBatchConcatenateKernel::validate(const arm_compute::ITensorInfo *src, + unsigned int batch_offset, + const arm_compute::ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, batch_offset, dst)); + return Status{}; +} + +void ClBatchConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + + Window slice = window.first_slice_window_3D(); + + const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3]; + + unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters + _kernel.setArg(idx, offset_to_first_elements_in_bytes); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, src, slice); + add_3D_tensor_argument(idx, dst, slice); + enqueue(queue, *this, slice, lws_hint()); + } + while(window.slide_window_slice_3D(slice)); +} +} // namespace opencl +} // namespace kernels +} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h new file mode 100644 index 0000000000..378a08aa4f --- /dev/null +++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H +#define ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the batch concatenate kernel. + * The src tensor will be concatenated into the destination tensor. + */ +class ClBatchConcatenateKernel : public IClKernel +{ +public: + /** Default constructor */ + ClBatchConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClBatchConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in,out] dst Destination tensor. Data types supported: Same as @p src. + * + * @note: The dst tensor's low two dimensions can't be smaller than the src one's. + * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClBatchConcatenateKernel + * + * @param[in] src Input tensor info. Data types supported: All. + * @param[in] batch_offset The offset on axis # 3. + * @param[in] dst Destination tensor info. Data types supported: Same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; + +private: + unsigned int _batch_offset; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp new file mode 100644 index 0000000000..e8893d76d2 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2017-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY)); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(2) + depth_offset > dst->dimension(2)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, src, dst); + + return Status{}; +} +} // namespace + +ClDepthConcatenateKernel::ClDepthConcatenateKernel() + : _depth_offset(0) +{ +} + +void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, depth_offset, dst)); + + auto padding_info = get_padding_info({ src, dst }); + + _depth_offset = depth_offset; + + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0)); + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) + { + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + + // Create kernel + _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); + + // Configure kernel window + auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); + win.set(Window::DimZ, Window::Dimension(0, src->tensor_shape().z(), 1)); + ICLKernel::configure_internal(win); + + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); + + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); +} + +Status ClDepthConcatenateKernel::validate(const arm_compute::ITensorInfo *src, + unsigned int depth_offset, + const arm_compute::ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, depth_offset, dst)); + return Status{}; +} + +void ClDepthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + + Window slice = window.first_slice_window_3D(); + + const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2]; + + unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters + _kernel.setArg(idx, offset_to_first_elements_in_bytes); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, src, slice); + add_3D_tensor_argument(idx, dst, slice); + enqueue(queue, *this, slice, lws_hint()); + } + while(window.slide_window_slice_3D(slice)); +} +} // namespace kernels +} // namespace opencl +} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h new file mode 100644 index 0000000000..144d7d48f2 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H +#define ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the depth concatenate kernel. + * The src tensor will be concatenated into the dst tensor. + */ +class ClDepthConcatenateKernel : public ICLKernel +{ +public: + /** Default constructor */ + ClDepthConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDepthConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] dst Destination tensor. Data types supported: Same as @p src. + * + * @note: The dst tensor's low two dimensions can't be smaller than the src one's. + * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClDepthConcatenateKernel + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] depth_offset The offset on the Z axis. + * @param[in] dst Destination tensor info. Data types supported: Same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; + +private: + unsigned int _depth_offset; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp new file mode 100644 index 0000000000..83e976e10f --- /dev/null +++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2019-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) + height_offset > dst->dimension(Window::DimY)); + + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != dst->dimension(0)); + for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i)); + } + ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4); + + return Status{}; +} +} // namespace + +ClHeightConcatenateKernel::ClHeightConcatenateKernel() + : _height_offset(0) +{ +} + +Status ClHeightConcatenateKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, height_offset, dst)); + return Status{}; +} + +void ClHeightConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, height_offset, dst)); + + auto padding_info = get_padding_info({ src, dst }); + + _height_offset = height_offset; + + // Add build options + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, src->dimension(0)); + + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(src->element_size())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset)); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2))); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); + + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) + { + const UniformQuantizationInfo iq_info = src->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + + // Create kernel + _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options()); + // Configure kernel window + + // The window needs to be based on src as we copy all the heights of src + Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); + + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); + + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); +} + +void ClHeightConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + + unsigned int idx = 0; + add_4D_tensor_argument(idx, src, window); + add_4D_tensor_argument(idx, dst, window); + enqueue(queue, *this, window, lws_hint()); +} +} // namespace kernels +} // namespace opencl +} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h new file mode 100644 index 0000000000..88cd4c4d17 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H +#define ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the height concatenate kernel. + * The source tensor will be concatenated into the destination tensor. + */ +class ClHeightConcatenateKernel : public IClKernel +{ +public: + /** Default constructor */ + ClHeightConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClHeightConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the dst tensor. + * @param[out] dst Destination tensor. Data types supported: same as @p src. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClHeightConcatenateKernel + * + * @param[in] src Source tensor info. Data types supported: All. + * @param[in] height_offset The starting offset on the Y axis for the dst tensor. + * @param[in] dst Destination tensor info. Data types supported: same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; + +private: + unsigned int _height_offset; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp new file mode 100644 index 0000000000..6a2ab3b50f --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2018-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/tensor_info.h" +#include "support/Cast.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); + ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) > dst->dimension(0)); + + for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i)); + } + ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4); + + return Status{}; +} +} // namespace + +Status ClWidthConcatenate2TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, dst)); + return Status{}; +} + +void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, dst)); + + auto padding_info = get_padding_info({ src1, src2, dst }); + + const unsigned int min_dimension = std::min(src1->dimension(0), src2->dimension(0)); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); + const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration; + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2))); + build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0))); + build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0))); + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size())); + build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + + // If input have different quantization info set quantization parameters needed for the re-quantization process + const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2); + if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo) + { + const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + + // Create kernel + _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options()); + + // Configure kernel window + Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); + + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); + + // Set config_id for enabling LWS tuning + _config_id = "concatenate_width_x2_"; + _config_id += lower_string(string_from_data_type(src1->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(src1->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src1->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src2->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src2->dimension(1)); +} + +void ClWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + Window slice = window.first_slice_window_4D(); + + const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); + const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + + do + { + unsigned int idx = 0; + add_4D_tensor_argument(idx, src0, slice); + add_4D_tensor_argument(idx, src1, slice); + add_4D_tensor_argument(idx, dst, slice); + enqueue(queue, *this, window, lws_hint()); + } + while(window.slide_window_slice_4D(slice)); +} +} // namespace kernels +} // namespace opencl +} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h new file mode 100644 index 0000000000..92715008cf --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H +#define ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the width concatenate kernel of 2 tensors. + * The src1 and src2 tensors will be concatenated into the dst tensor. + */ +class ClWidthConcatenate2TensorsKernel : public IClKernel +{ +public: + /** Default constructor */ + ClWidthConcatenate2TensorsKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate2TensorsKernel); + /** Initialise the kernel's sources and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor. Data types supported: All. + * @param[in] src2 Second source tensor. Data types supported: same as @p src1 + * @param[out] dst Destination tensor. Data types supported: Same as @p src1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate2TensorsKernel + * + * @param[in] src1 First tensor info. Data types supported: All. + * @param[in] src2 Second tensor info. Data types supported: same as @p src1 + * @param[in] dst Destination tensor info. Data types supported: Same as @p src1. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp new file mode 100644 index 0000000000..4b49652a73 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/utils/helpers/tensor_info.h" +#include "support/Cast.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); + ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, src3, src4, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) + src3->dimension(0) + src4->dimension(0) > dst->dimension(0)); + + for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src3->dimension(i) != dst->dimension(i)); + ARM_COMPUTE_RETURN_ERROR_ON(src4->dimension(i) != dst->dimension(i)); + } + ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4); + + return Status{}; +} +} // namespace + +ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel() +{ +} + +Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, src3, src4, dst)); + return Status{}; +} + +void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context, + ITensorInfo *src1, ITensorInfo *src2, + ITensorInfo *src3, ITensorInfo *src4, + ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, src3, src4, dst)); + + auto padding_info = get_padding_info({ src1, src2, src3, src4, dst }); + const unsigned int min_dimension = std::min(std::min(src1->dimension(0), src2->dimension(0)), std::min(src3->dimension(0), src4->dimension(0))); + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); + const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration; + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2))); + build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0))); + build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0))); + build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(src3->dimension(0))); + build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(src4->dimension(0))); + build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size())); + build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) + src3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + + // If soources have different quantization info set quantization parameters needed for the re-quantization process + const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2, src3, src4); + if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo) + { + const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform(); + const UniformQuantizationInfo iq3_info = src3->quantization_info().uniform(); + const UniformQuantizationInfo iq4_info = src4->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = dst->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset)); + build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale)); + build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset)); + build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); + } + + // Create kernel + _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options()); + + // Configure kernel window + Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); + + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); + + // Set config_id for enabling LWS tuning + _config_id = "concatenate_width_x4_"; + _config_id += lower_string(string_from_data_type(src1->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(src1->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src1->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src2->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src2->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src3->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src3->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src4->dimension(0)); + _config_id += "_"; + _config_id += support::cpp11::to_string(src4->dimension(1)); +} + +void ClWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); + const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); + const auto src2 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2)); + const auto src3 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + + Window slice = window.first_slice_window_4D(); + + do + { + unsigned int idx = 0; + add_4D_tensor_argument(idx, src0, slice); + add_4D_tensor_argument(idx, src1, slice); + add_4D_tensor_argument(idx, src2, slice); + add_4D_tensor_argument(idx, src3, slice); + add_4D_tensor_argument(idx, dst, slice); + enqueue(queue, *this, window, lws_hint()); + } + while(window.slide_window_slice_4D(slice)); +} +} // namespace kernels +} // namespace opencl +} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h new file mode 100644 index 0000000000..06d6c0399a --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H +#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the width concatenate kernel of 4 tensors. + * All source tensors will be concatenated into the destination tensor. + */ +class ClWidthConcatenate4TensorsKernel : public IClKernel +{ +public: + /** Default constructor */ + ClWidthConcatenate4TensorsKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate4TensorsKernel); + /** Initialise the kernel's sources and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src1 First source tensor. Data types supported: All. + * @param[in] src2 Second source tensor. Data types supported: same as @p src1 + * @param[in] src3 Third source tensor. Data types supported: same as @p src1 + * @param[in] src4 Fourth source tensor. Data types supported: same as @p src1 + * @param[out] dst Destination tensor. Data types supported: same as @p src1. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *src3, ITensorInfo *src4, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate4TensorsKernel + * + * @param[in] src1 First tensor info. Data types supported: All. + * @param[in] src2 Second tensor info. Data types supported: same as @p src1 + * @param[in] src3 Third tensor info. Data types supported: same as @p src1 + * @param[in] src4 Fourth tensor info. Data types supported: same as @p src1 + * @param[in] dst Destination tensor info. Data types supported: same as @p src1. + * + * @return a status + */ + static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp new file mode 100644 index 0000000000..8cbbc27444 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2018-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/WindowHelpers.h" +#include "support/Cast.h" + +#include "support/StringSupport.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +namespace +{ +Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0)); + + for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) + { + ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i)); + } + ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4); + + return Status{}; +} +} // namespace + +ClWidthConcatenateKernel::ClWidthConcatenateKernel() +{ +} + +Status ClWidthConcatenateKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, width_offset, dst)); + return Status{}; +} + +void ClWidthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, width_offset, dst)); + + auto padding_info = get_padding_info({ src, dst }); + + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, src->dimension(0)); + + // Add build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration)); + build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset)); + build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2))); + + if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info()) + { + const UniformQuantizationInfo iqinfo = src->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); + } + + // Create kernel + _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options()); + // Configure kernel window + Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); + ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); + + // Set dst valid region + dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape())); + + ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); +} + +void ClWidthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + + unsigned int idx = 0; + add_4D_tensor_argument(idx, src, window); + add_4D_tensor_argument(idx, dst, window); + enqueue(queue, *this, window, lws_hint()); +} +} // namespace kernels +} // namespace opencl +} // namespace arm_compute diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h new file mode 100644 index 0000000000..3bffe52700 --- /dev/null +++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H +#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H + +#include "src/core/common/Macros.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +namespace kernels +{ +/** Interface for the width concatenate kernel. + * The source tensor will be concatenated into the destination tensor. + */ +class ClWidthConcatenateKernel : public IClKernel +{ +public: + /** Default constructor */ + ClWidthConcatenateKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenateKernel); + /** Initialise the kernel's source and destination + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in,out] dst Destination tensor. Data types supported: same as @p src. + * + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst); + /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenateKernel + * + * @param[in] src Source tensor info. Data types supported: All. + * @param[in] width_offset The offset on the X axis. + * @param[in] dst Destination tensor info. Data types supported: same as @p src. + * + * @return a status + */ + static Status validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override; +}; +} // namespace kernels +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H */ diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 0c473a79c8..ea96e45bf8 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,242 +23,19 @@ */ #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" -#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" namespace arm_compute { -namespace experimental -{ -CLConcatenation::CLConcatenation() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) -{ -} - -void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector &inputs_vector, ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_ERROR_ON(output == nullptr); - _axis = axis; - _num_inputs = inputs_vector.size(); - - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis); - std::vector const_inputs_vector(inputs_vector.size()); - std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t; - }); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis)); - - unsigned int offset = 0; - switch(_axis) - { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique(); - kernel->configure(compile_context, inputs_vector.at(i), offset, output); - offset += inputs_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} - -Status CLConcatenation::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr); - const unsigned int num_inputs = inputs_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - case 3: - { - for(const auto &input : inputs_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output)); - offset += input->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(output->total_size() != 0) - { - TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - return Status{}; -} - -void CLConcatenation::run(ITensorPack &tensors) -{ - if(tensors.empty()) - { - ARM_COMPUTE_ERROR("No inputs provided"); - } - - if(static_cast(tensors.size()) - 1 != static_cast(_num_inputs)) - { - ARM_COMPUTE_ERROR("Configured with different number of inputs"); - } - - if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) - { - ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); - } - else - { - int i = 0; - for(auto &k : _concat_kernels) - { - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); - CLScheduler::get().enqueue_op(*k, pack, true); - ++i; - } - } -} -} // namespace experimental - struct CLConcatenateLayer::Impl { - std::vector srcs{}; - ICLTensor *dst{ nullptr }; - unsigned int num_inputs{ 0 }; - unsigned int axis{ 0 }; - std::unique_ptr op{ nullptr }; + std::vector srcs{}; + ICLTensor *dst{ nullptr }; + unsigned int num_inputs{ 0 }; + unsigned int axis{ 0 }; + std::unique_ptr op{ nullptr }; }; CLConcatenateLayer::CLConcatenateLayer() @@ -285,7 +62,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: _impl->dst = output; _impl->axis = axis; _impl->num_inputs = inputs_vector.size(); - _impl->op = std::make_unique(); + _impl->op = std::make_unique(); std::vector inputs_vector_info; for(unsigned int i = 0; i < inputs_vector.size(); ++i) @@ -298,7 +75,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std: Status CLConcatenateLayer::validate(const std::vector &inputs_vector, const ITensorInfo *output, size_t axis) { - return experimental::CLConcatenation::validate(inputs_vector, output, axis); + return opencl::ClConcatenate::validate(inputs_vector, output, axis); } void CLConcatenateLayer::run() diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h new file mode 100644 index 0000000000..049bf05dc1 --- /dev/null +++ b/src/runtime/gpu/cl/IClOperator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ICL_OPERATOR_H +#define ARM_COMPUTE_ICL_OPERATOR_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/runtime/CL/ICLOperator.h" + +namespace arm_compute +{ +namespace opencl +{ +using IClOperator = experimental::ICLOperator; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_ICL_OPERATOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp new file mode 100644 index 0000000000..4385fcfaed --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConcatenate.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/gpu/cl/operators/ClConcatenate.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" +#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" + +namespace arm_compute +{ +namespace opencl +{ +ClConcatenate::ClConcatenate() + : _concat_kernels(), + _num_inputs(0), + _axis(Window::DimX) +{ +} + +void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector &src_vector, ITensorInfo *dst, size_t axis) +{ + ARM_COMPUTE_ERROR_ON(dst == nullptr); + _axis = axis; + _num_inputs = src_vector.size(); + + TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis); + std::vector const_src_vector(src_vector.size()); + std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t) + { + ARM_COMPUTE_ERROR_ON_NULLPTR(t); + return t; + }); + + // dst auto inizialitation if not yet initialized + auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type()); + ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis)); + + unsigned int offset = 0; + switch(_axis) + { + case Window::DimX: + { + switch(_num_inputs) + { + case 2: + { + // Configure WidthConcatenate2Tensors kernel + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } + case 4: + { + // Configure WidthConcatenate4Tensors kernel + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst); + _concat_kernels.emplace_back(std::move(kernel)); + break; + } + default: + { + // Configure generic case WidthConcatenate kernels + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + } + break; + } + case Window::DimY: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + case Window::DimZ: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + case 3: + { + for(unsigned int i = 0; i < _num_inputs; ++i) + { + auto kernel = std::make_unique(); + kernel->configure(compile_context, src_vector.at(i), offset, dst); + offset += src_vector.at(i)->dimension(_axis); + _concat_kernels.emplace_back(std::move(kernel)); + } + break; + } + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } +} + +Status ClConcatenate::validate(const std::vector &src_vector, const ITensorInfo *dst, size_t axis) +{ + ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr); + const unsigned int num_inputs = src_vector.size(); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); + ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); + + unsigned int offset = 0; + switch(axis) + { + case Window::DimX: + { + switch(num_inputs) + { + case 2: + // Validate WidthConcatenate2Tensors kernels if there are 2 inputs + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst)); + break; + case 4: + // Validate WidthConcatenate4Tensors kernels if there are 4 inputs + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst)); + break; + default: + // Validate generic case of WidthConcatenate kernel + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + break; + } + case Window::DimY: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + case Window::DimZ: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + case 3: + { + for(const auto &src : src_vector) + { + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst)); + offset += src->dimension(axis); + } + break; + } + default: + ARM_COMPUTE_ERROR("Axis not supported"); + } + + if(dst->total_size() != 0) + { + TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis); + ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size()); + } + + return Status{}; +} + +void ClConcatenate::run(ITensorPack &tensors) +{ + if(tensors.empty()) + { + ARM_COMPUTE_ERROR("No inputs provided"); + } + + if(static_cast(tensors.size()) - 1 != static_cast(_num_inputs)) + { + ARM_COMPUTE_ERROR("Configured with different number of inputs"); + } + + if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) + { + ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); + CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); + } + else + { + int i = 0; + for(auto &k : _concat_kernels) + { + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); + pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); + CLScheduler::get().enqueue_op(*k, pack, true); + ++i; + } + } +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h new file mode 100644 index 0000000000..112e2ac6b7 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConcatenate.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONCATENATE_H +#define ARM_COMPUTE_CLCONCATENATE_H + +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +#include + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: + * + * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0). + * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1). + * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2). + * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3). + */ +class ClConcatenate : public IClOperator +{ +public: + /** Default constructor */ + ClConcatenate(); + /** Initialise the kernel's inputs vector and dst. + * + * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, + * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. + * + * + * @param[in] compile_context The compile context to be used. + * @param[in,out] src_vector The vectors containing all the tensors to concatenate. Data types supported: All + * @param[out] dst Destination tensor. Data types supported: same as @p src_vector. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + */ + void configure(const ClCompileContext &compile_context, const std::vector &src_vector, ITensorInfo *dst, size_t axis); + /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate + * + * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. + * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, + * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. + * + * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All + * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector. + * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. + * + * @return a status + */ + static Status validate(const std::vector &src_vector, const ITensorInfo *dst, size_t axis); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + +private: + std::vector> _concat_kernels; + unsigned int _num_inputs; + unsigned int _axis; +}; +} // namespace opencl +} // namespace arm_comPUTE +#endif /* ARM_COMPUTE_CL_CONCATENATE_H */ -- cgit v1.2.1