aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-01-18 21:15:59 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-01-20 16:28:27 +0000
commit7d61ff041826782d14e67b7f5b7a2864905ff38b (patch)
tree2e69c8a5fdabc6717b0691acdbbe7374d856902f
parentda6a6eb3bc06ce8869ae3290853970d4c0ce412e (diff)
downloadComputeLibrary-7d61ff041826782d14e67b7f5b7a2864905ff38b.tar.gz
Make all CL Concatenate kernels and functions state-less
Resolves COMPMID-3995 Change-Id: I84172bed20924f1d9ae3b4d14d7b321e9494296e Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4887 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--Android.bp13
-rw-r--r--SConscript4
-rw-r--r--arm_compute/runtime/CL/functions/CLConcatenateLayer.h69
-rw-r--r--docs/00_introduction.dox20
-rw-r--r--src/core/CL/CLKernels.h8
-rw-r--r--src/core/CL/kernels/CLBatchConcatenateLayerKernel.h82
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.h80
-rw-r--r--src/core/CL/kernels/CLHeightConcatenateLayerKernel.h77
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h73
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h77
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.h74
-rw-r--r--src/core/gpu/cl/ClCompileContext.h36
-rw-r--r--src/core/gpu/cl/IClKernel.h37
-rw-r--r--src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp (renamed from src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp)80
-rw-r--r--src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h77
-rw-r--r--src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp (renamed from src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp)70
-rw-r--r--src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h77
-rw-r--r--src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp (renamed from src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp)64
-rw-r--r--src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h74
-rw-r--r--src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp (renamed from src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp)84
-rw-r--r--src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h70
-rw-r--r--src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp (renamed from src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp)120
-rw-r--r--src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h75
-rw-r--r--src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp (renamed from src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp)62
-rw-r--r--src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h71
-rw-r--r--src/runtime/CL/functions/CLConcatenateLayer.cpp243
-rw-r--r--src/runtime/gpu/cl/IClOperator.h37
-rw-r--r--src/runtime/gpu/cl/operators/ClConcatenate.cpp254
-rw-r--r--src/runtime/gpu/cl/operators/ClConcatenate.h86
29 files changed, 1195 insertions, 999 deletions
diff --git a/Android.bp b/Android.bp
index 41ed188e6d..4427bd4fee 100644
--- a/Android.bp
+++ b/Android.bp
@@ -82,7 +82,6 @@ cc_library_static {
"src/core/CL/kernels/CLAccumulateKernel.cpp",
"src/core/CL/kernels/CLActivationLayerKernel.cpp",
"src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp",
- "src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp",
"src/core/CL/kernels/CLBitwiseKernel.cpp",
@@ -101,7 +100,6 @@ cc_library_static {
"src/core/CL/kernels/CLCropKernel.cpp",
"src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
"src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp",
- "src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
"src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp",
"src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp",
@@ -145,7 +143,6 @@ cc_library_static {
"src/core/CL/kernels/CLHOGDescriptorKernel.cpp",
"src/core/CL/kernels/CLHOGDetectorKernel.cpp",
"src/core/CL/kernels/CLHarrisCornersKernel.cpp",
- "src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLHistogramKernel.cpp",
"src/core/CL/kernels/CLIm2ColKernel.cpp",
"src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp",
@@ -197,9 +194,6 @@ cc_library_static {
"src/core/CL/kernels/CLWarpAffineKernel.cpp",
"src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
"src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
- "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp",
- "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp",
- "src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp",
"src/core/CL/kernels/CLWinogradInputTransformKernel.cpp",
"src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp",
@@ -437,6 +431,12 @@ cc_library_static {
"src/core/cpu/kernels/add/sve/qsymm16.cpp",
"src/core/cpu/kernels/floor/NEON/fp16.cpp",
"src/core/cpu/kernels/floor/NEON/fp32.cpp",
+ "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
"src/core/helpers/SoftmaxHelpers.cpp",
"src/core/helpers/WindowHelpers.cpp",
"src/core/utils/ScaleUtils.cpp",
@@ -781,6 +781,7 @@ cc_library_static {
"src/runtime/cpu/operators/CpuFloor.cpp",
"src/runtime/cpu/operators/CpuPermute.cpp",
"src/runtime/cpu/operators/CpuReshape.cpp",
+ "src/runtime/gpu/cl/operators/ClConcatenate.cpp",
"utils/CommonGraphOptions.cpp",
"utils/GraphUtils.cpp",
"utils/Utils.cpp",
diff --git a/SConscript b/SConscript
index 8b8e504832..121cf3220a 100644
--- a/SConscript
+++ b/SConscript
@@ -212,11 +212,15 @@ if env['opencl']:
core_files += Glob('src/core/CL/gemm/native/*.cpp')
core_files += Glob('src/core/CL/gemm/reshaped/*.cpp')
core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp')
+ core_files += Glob('src/core/gpu/cl/*.cpp')
+ core_files += Glob('src/core/gpu/cl/kernels/*.cpp')
runtime_files += Glob('src/runtime/CL/*.cpp')
runtime_files += Glob('src/runtime/CL/functions/*.cpp')
runtime_files += Glob('src/runtime/CL/gemm/*.cpp')
runtime_files += Glob('src/runtime/CL/tuners/*.cpp')
+ runtime_files += Glob('src/runtime/gpu/cl/*.cpp')
+ runtime_files += Glob('src/runtime/gpu/cl/operators/*.cpp')
graph_files += Glob('src/graph/backends/CL/*.cpp')
diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index 5e7003a112..bfc8a39ac9 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_CLCONCATENATELAYER_H
#define ARM_COMPUTE_CLCONCATENATELAYER_H
-#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/Types.h"
@@ -43,10 +42,10 @@ class Status;
/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
*
- * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
+ * -# @ref opencl::kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0).
+ * -# @ref opencl::kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1).
+ * -# @ref opencl::kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2).
+ * -# @ref opencl::kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3).
*/
class CLConcatenateLayer : public IFunction
{
@@ -66,7 +65,8 @@ public:
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
+ * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
*
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
* @param[out] output Output tensor. Data types supported: Same as @p input.
@@ -76,7 +76,8 @@ public:
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
+ * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
*
* @param[in] compile_context The compile context to be used.
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
@@ -87,7 +88,8 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
+ * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
*
* @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
@@ -104,54 +106,5 @@ private:
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace experimental
-{
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
-class CLConcatenation : public ICLOperator
-{
-public:
- /** Default constructor */
- CLConcatenation();
- /** Initialise the kernel's inputs vector and output.
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
- *
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- */
- void configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
- *
- * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- *
- * @return a status
- */
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::vector<std::unique_ptr<ICLKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
-};
-} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCONCATENATELAYER_H */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 9c0020da66..af78a70abc 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -165,7 +165,7 @@ v20.11 Public major release
- @ref NEGEMMLowpMatrixAReductionKernel
- @ref NEGEMMLowpMatrixBReductionKernel
- Removed padding from OpenCL kernels:
- - @ref CLBatchConcatenateLayerKernel
+ - CLBatchConcatenateLayerKernel
- @ref CLElementwiseOperationKernel
- @ref CLBatchNormalizationLayerKernel
- @ref CLPoolingLayerKernel
@@ -184,17 +184,17 @@ v20.11 Public major release
- @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
- @ref CLActivationLayerKernel
- @ref CLWinogradFilterTransformKernel
- - @ref CLWidthConcatenateLayerKernel
- - @ref CLWidthConcatenate4TensorsKernel
- - @ref CLWidthConcatenate2TensorsKernel
+ - CLWidthConcatenateLayerKernel
+ - CLWidthConcatenate4TensorsKernel
+ - CLWidthConcatenate2TensorsKernel
- @ref CLLogits1DMaxShiftExpSumKernel
- @ref CLLogits1DNormKernel
- - @ref CLHeightConcatenateLayerKernel
+ - CLHeightConcatenateLayerKernel
- @ref CLGEMMMatrixMultiplyKernel
- @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
- @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
- @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
- - @ref CLDepthConcatenateLayerKernel
+ - CLDepthConcatenateLayerKernel
- @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
- Removed OpenCL kernels / functions:
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
@@ -721,7 +721,7 @@ v19.08 Public major release
- @ref CLNegLayer
- @ref CLPReluLayer
- @ref CLSinLayer
- - @ref CLBatchConcatenateLayerKernel
+ - CLBatchConcatenateLayerKernel
- @ref CLDepthToSpaceLayerKernel / @ref CLDepthToSpaceLayer
- @ref CLGEMMLowpMatrixMultiplyNativeKernel
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
@@ -773,7 +773,7 @@ v19.05 Public major release
- @ref CLFFTScaleKernel
- @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
- @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- - @ref CLHeightConcatenateLayerKernel
+ - CLHeightConcatenateLayerKernel
- @ref CLDirectDeconvolutionLayer
- @ref CLFFT1D
- @ref CLFFT2D
@@ -1011,7 +1011,7 @@ v18.05 Public major release
- @ref CLCopy / @ref CLCopyKernel
- @ref CLLSTMLayer
- @ref CLRNNLayer
- - CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel
+ - CLWidthConcatenateLayer / CLWidthConcatenateLayerKernel
- @ref CLWinogradFilterTransformKernel / @ref CLWinogradInputTransformKernel / @ref CLWinogradConvolutionLayer
- @ref CLWinogradInputTransformKernel / @ref CLWinogradInputTransform
- New Neon kernels / functions:
@@ -1220,7 +1220,7 @@ v17.06 Public major release
- User can specify his own scheduler by implementing the @ref IScheduler interface.
- New OpenCL kernels / functions:
- @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer
- - @ref CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer
+ - CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer
- @ref CLHOGOrientationBinningKernel @ref CLHOGBlockNormalizationKernel, @ref CLHOGDetectorKernel / @ref CLHOGDescriptor @ref CLHOGDetector @ref CLHOGGradient @ref CLHOGMultiDetection
- CLLocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedLayer
- @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index f23871d4db..11f1d2d7cf 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,6 @@
#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "src/core/CL/kernels/CLActivationLayerKernel.h"
#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
-#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "src/core/CL/kernels/CLBitwiseKernel.h"
@@ -48,7 +47,6 @@
#include "src/core/CL/kernels/CLCropKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
@@ -92,7 +90,6 @@
#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLHistogramKernel.h"
#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
@@ -144,9 +141,6 @@
#include "src/core/CL/kernels/CLWarpAffineKernel.h"
#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h
deleted file mode 100644
index 54a89eb243..0000000000
--- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLBatchConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h
deleted file mode 100644
index 6c73bd4bf4..0000000000
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLDepthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h
deleted file mode 100644
index f4cb627052..0000000000
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLHeightConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
deleted file mode 100644
index 2af89e12eb..0000000000
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel of 2 tensors.
- * The input1 and input2 tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate2TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate2TensorsKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate2TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
deleted file mode 100644
index 0caf87114d..0000000000
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel of 4 tensors.
- * All input tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate4TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate4TensorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate4TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[in] input3 Third input tensor. Data types supported: same as @p input1
- * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] input3 Third tensor info. Data types supported: same as @p input1
- * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h
deleted file mode 100644
index 09c3f4455d..0000000000
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLWidthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */
diff --git a/src/core/gpu/cl/ClCompileContext.h b/src/core/gpu/cl/ClCompileContext.h
new file mode 100644
index 0000000000..e69cc0200f
--- /dev/null
+++ b/src/core/gpu/cl/ClCompileContext.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_COMPILE_CONTEXT_H
+#define ARM_COMPUTE_CL_COMPILE_CONTEXT_H
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using ClCompileContext = arm_compute::CLCompileContext;
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_COMPILE_CONTEXT_H */
diff --git a/src/core/gpu/cl/IClKernel.h b/src/core/gpu/cl/IClKernel.h
new file mode 100644
index 0000000000..52ea3c9183
--- /dev/null
+++ b/src/core/gpu/cl/IClKernel.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICL_KERNEL_H
+#define ARM_COMPUTE_ICL_KERNEL_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using IClKernel = arm_compute::ICLKernel;
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_ICL_KERNEL_H */
diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
index ccd6a5a0fc..c16ff1f028 100644
--- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -36,50 +36,54 @@
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimZ) != dst->dimension(Window::DimZ));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(3) + batch_offset > dst->dimension(3));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, src, dst);
return Status{};
}
} // namespace
-CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel()
+ClBatchConcatenateKernel::ClBatchConcatenateKernel()
: _batch_offset(0)
{
}
-void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output)
+void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, batch_offset, dst));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({ src, dst });
_batch_offset = batch_offset;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0));
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0));
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
{
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+ const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
@@ -91,12 +95,12 @@ void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_co
_kernel = create_kernel(compile_context, "concatenate", build_opts.options());
// Configure kernel window
- auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1));
+ auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
+ win.set(3, Window::Dimension(0, src->tensor_shape()[3], 1));
ICLKernel::configure_internal(win);
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
// Set config_id for enabling LWS tuning
_config_id = "concatenate_";
@@ -104,26 +108,26 @@ void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_co
_config_id += "_";
_config_id += support::cpp11::to_string(batch_offset);
_config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(0));
+ _config_id += support::cpp11::to_string(src->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(1));
+ _config_id += support::cpp11::to_string(src->dimension(1));
_config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(2));
+ _config_id += support::cpp11::to_string(src->dimension(2));
_config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(3));
+ _config_id += support::cpp11::to_string(src->dimension(3));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
- unsigned int batch_offset,
- const arm_compute::ITensorInfo *output)
+Status ClBatchConcatenateKernel::validate(const arm_compute::ITensorInfo *src,
+ unsigned int batch_offset,
+ const arm_compute::ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, batch_offset, dst));
return Status{};
}
-void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClBatchConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -135,7 +139,7 @@ void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3];
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
_kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
do
@@ -147,4 +151,6 @@ void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
}
while(window.slide_window_slice_3D(slice));
}
+} // namespace opencl
+} // namespace kernels
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h
new file mode 100644
index 0000000000..378a08aa4f
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H
+#define ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the batch concatenate kernel.
+ * The src tensor will be concatenated into the destination tensor.
+ */
+class ClBatchConcatenateKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClBatchConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClBatchConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in,out] dst Destination tensor. Data types supported: Same as @p src.
+ *
+ * @note: The dst tensor's low two dimensions can't be smaller than the src one's.
+ * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClBatchConcatenateKernel
+ *
+ * @param[in] src Input tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _batch_offset;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H */
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
index eb5bfc2d86..e8893d76d2 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -36,49 +36,53 @@
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(2) + depth_offset > dst->dimension(2));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, src, dst);
return Status{};
}
} // namespace
-CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel()
+ClDepthConcatenateKernel::ClDepthConcatenateKernel()
: _depth_offset(0)
{
}
-void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
+void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, depth_offset, dst));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({ src, dst });
_depth_offset = depth_offset;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0));
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0));
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
{
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+ const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
@@ -90,25 +94,25 @@ void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_co
_kernel = create_kernel(compile_context, "concatenate", build_opts.options());
// Configure kernel window
- auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- win.set(Window::DimZ, Window::Dimension(0, input->tensor_shape().z(), 1));
+ auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
+ win.set(Window::DimZ, Window::Dimension(0, src->tensor_shape().z(), 1));
ICLKernel::configure_internal(win);
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
- unsigned int depth_offset,
- const arm_compute::ITensorInfo *output)
+Status ClDepthConcatenateKernel::validate(const arm_compute::ITensorInfo *src,
+ unsigned int depth_offset,
+ const arm_compute::ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, depth_offset, dst));
return Status{};
}
-void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClDepthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -120,7 +124,7 @@ void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2];
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
_kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
do
@@ -132,4 +136,6 @@ void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
}
while(window.slide_window_slice_3D(slice));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h
new file mode 100644
index 0000000000..144d7d48f2
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H
+#define ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the depth concatenate kernel.
+ * The src tensor will be concatenated into the dst tensor.
+ */
+class ClDepthConcatenateKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ ClDepthConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDepthConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in,out] dst Destination tensor. Data types supported: Same as @p src.
+ *
+ * @note: The dst tensor's low two dimensions can't be smaller than the src one's.
+ * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClDepthConcatenateKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _depth_offset;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H */
diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
index 8aa7366d50..83e976e10f 100644
--- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -37,60 +37,64 @@
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) + height_offset > dst->dimension(Window::DimY));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != output->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != dst->dimension(0));
for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i));
}
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4);
return Status{};
}
} // namespace
-CLHeightConcatenateLayerKernel::CLHeightConcatenateLayerKernel()
+ClHeightConcatenateKernel::ClHeightConcatenateKernel()
: _height_offset(0)
{
}
-Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
+Status ClHeightConcatenateKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, height_offset, dst));
return Status{};
}
-void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output)
+void ClHeightConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, height_offset, dst));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({ src, dst });
_height_offset = height_offset;
// Add build options
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->dimension(0));
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, src->dimension(0));
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(src->element_size()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2)));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2)));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
{
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+ const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
@@ -102,17 +106,17 @@ void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_c
_kernel = create_kernel(compile_context, "concatenate_height", build_opts.options());
// Configure kernel window
- // The window needs to be based on input as we copy all the heights of input
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ // The window needs to be based on src as we copy all the heights of src
+ Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClHeightConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -125,4 +129,6 @@ void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &
add_4D_tensor_argument(idx, dst, window);
enqueue(queue, *this, window, lws_hint());
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h
new file mode 100644
index 0000000000..88cd4c4d17
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H
+#define ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the height concatenate kernel.
+ * The source tensor will be concatenated into the destination tensor.
+ */
+class ClHeightConcatenateKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClHeightConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClHeightConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the dst tensor.
+ * @param[out] dst Destination tensor. Data types supported: same as @p src.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClHeightConcatenateKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the dst tensor.
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _height_offset;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H */
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
index d6697ba46b..6a2ab3b50f 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -37,62 +37,66 @@
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) > output->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) > dst->dimension(0));
for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i));
}
- ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4);
return Status{};
}
} // namespace
-Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status ClWidthConcatenate2TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, dst));
return Status{};
}
-void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
+void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, dst));
- auto padding_info = get_padding_info({ input1, input2, output });
+ auto padding_info = get_padding_info({ src1, src2, dst });
- const unsigned int min_dimension = std::min(input1->dimension(0), input2->dimension(0));
+ const unsigned int min_dimension = std::min(src1->dimension(0), src2->dimension(0));
const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension);
- const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration;
+ const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration;
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2)));
- build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0)));
- build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0)));
- build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size()));
- build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2)));
+ build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0)));
+ build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0)));
+ build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size()));
+ build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
// If input have different quantization info set quantization parameters needed for the re-quantization process
- const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2);
- if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo)
+ const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2);
+ if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo)
{
- const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform();
- const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+ const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
@@ -106,27 +110,27 @@ void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile
_kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options());
// Configure kernel window
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+ Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
// Set config_id for enabling LWS tuning
_config_id = "concatenate_width_x2_";
- _config_id += lower_string(string_from_data_type(input1->data_type()));
+ _config_id += lower_string(string_from_data_type(src1->data_type()));
_config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(0));
+ _config_id += support::cpp11::to_string(src1->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(1));
+ _config_id += support::cpp11::to_string(src1->dimension(1));
_config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(0));
+ _config_id += support::cpp11::to_string(src2->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(1));
+ _config_id += support::cpp11::to_string(src2->dimension(1));
}
-void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -147,4 +151,6 @@ void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window
}
while(window.slide_window_slice_4D(slice));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
new file mode 100644
index 0000000000..92715008cf
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H
+#define ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel of 2 tensors.
+ * The src1 and src2 tensors will be concatenated into the dst tensor.
+ */
+class ClWidthConcatenate2TensorsKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClWidthConcatenate2TensorsKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate2TensorsKernel);
+ /** Initialise the kernel's sources and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src1 First source tensor. Data types supported: All.
+ * @param[in] src2 Second source tensor. Data types supported: same as @p src1
+ * @param[out] dst Destination tensor. Data types supported: Same as @p src1.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate2TensorsKernel
+ *
+ * @param[in] src1 First tensor info. Data types supported: All.
+ * @param[in] src2 Second tensor info. Data types supported: same as @p src1
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_2TENSORS_KERNEL_H */
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
index 7ecdd30224..4b49652a73 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -37,76 +37,80 @@
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, input3, input4, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, input3, input4, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) + input3->dimension(0) + input4->dimension(0) > output->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, src3, src4, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) + src3->dimension(0) + src4->dimension(0) > dst->dimension(0));
for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input3->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input4->dimension(i) != output->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src3->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src4->dimension(i) != dst->dimension(i));
}
- ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4);
return Status{};
}
} // namespace
-CLWidthConcatenate4TensorsKernel::CLWidthConcatenate4TensorsKernel()
+ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel()
{
}
-Status CLWidthConcatenate4TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output)
+Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, input3, input4, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, src3, src4, dst));
return Status{};
}
-void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context,
- ITensorInfo *input1, ITensorInfo *input2,
- ITensorInfo *input3, ITensorInfo *input4,
- ITensorInfo *output)
+void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context,
+ ITensorInfo *src1, ITensorInfo *src2,
+ ITensorInfo *src3, ITensorInfo *src4,
+ ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, input3, input4, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, src3, src4, dst));
- auto padding_info = get_padding_info({ input1, input2, input3, input4, output });
- const unsigned int min_dimension = std::min(std::min(input1->dimension(0), input2->dimension(0)), std::min(input3->dimension(0), input4->dimension(0)));
+ auto padding_info = get_padding_info({ src1, src2, src3, src4, dst });
+ const unsigned int min_dimension = std::min(std::min(src1->dimension(0), src2->dimension(0)), std::min(src3->dimension(0), src4->dimension(0)));
const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension);
- const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration;
+ const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration;
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2)));
- build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0)));
- build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0)));
- build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(input3->dimension(0)));
- build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(input4->dimension(0)));
- build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size()));
- build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
- build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
- build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) + input3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
-
- // If input have different quantization info set quantization parameters needed for the re-quantization process
- const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2, input3, input4);
- if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo)
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2)));
+ build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0)));
+ build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0)));
+ build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(src3->dimension(0)));
+ build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(src4->dimension(0)));
+ build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size()));
+ build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) + src3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+
+ // If soources have different quantization info set quantization parameters needed for the re-quantization process
+ const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2, src3, src4);
+ if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo)
{
- const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform();
- const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform();
- const UniformQuantizationInfo iq3_info = input3->quantization_info().uniform();
- const UniformQuantizationInfo iq4_info = input4->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
+ const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform();
+ const UniformQuantizationInfo iq3_info = src3->quantization_info().uniform();
+ const UniformQuantizationInfo iq4_info = src4->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
@@ -124,35 +128,35 @@ void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile
_kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options());
// Configure kernel window
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+ Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
// Set config_id for enabling LWS tuning
_config_id = "concatenate_width_x4_";
- _config_id += lower_string(string_from_data_type(input1->data_type()));
+ _config_id += lower_string(string_from_data_type(src1->data_type()));
_config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(0));
+ _config_id += support::cpp11::to_string(src1->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(1));
+ _config_id += support::cpp11::to_string(src1->dimension(1));
_config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(0));
+ _config_id += support::cpp11::to_string(src2->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(1));
+ _config_id += support::cpp11::to_string(src2->dimension(1));
_config_id += "_";
- _config_id += support::cpp11::to_string(input3->dimension(0));
+ _config_id += support::cpp11::to_string(src3->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input3->dimension(1));
+ _config_id += support::cpp11::to_string(src3->dimension(1));
_config_id += "_";
- _config_id += support::cpp11::to_string(input4->dimension(0));
+ _config_id += support::cpp11::to_string(src4->dimension(0));
_config_id += "_";
- _config_id += support::cpp11::to_string(input4->dimension(1));
+ _config_id += support::cpp11::to_string(src4->dimension(1));
}
-void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -177,4 +181,6 @@ void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window
}
while(window.slide_window_slice_4D(slice));
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h
new file mode 100644
index 0000000000..06d6c0399a
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H
+#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel of 4 tensors.
+ * All source tensors will be concatenated into the destination tensor.
+ */
+class ClWidthConcatenate4TensorsKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClWidthConcatenate4TensorsKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate4TensorsKernel);
+ /** Initialise the kernel's sources and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src1 First source tensor. Data types supported: All.
+ * @param[in] src2 Second source tensor. Data types supported: same as @p src1
+ * @param[in] src3 Third source tensor. Data types supported: same as @p src1
+ * @param[in] src4 Fourth source tensor. Data types supported: same as @p src1
+ * @param[out] dst Destination tensor. Data types supported: same as @p src1.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *src3, ITensorInfo *src4, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate4TensorsKernel
+ *
+ * @param[in] src1 First tensor info. Data types supported: All.
+ * @param[in] src2 Second tensor info. Data types supported: same as @p src1
+ * @param[in] src3 Third tensor info. Data types supported: same as @p src1
+ * @param[in] src4 Fourth tensor info. Data types supported: same as @p src1
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H */
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
index 30d0a481bd..8cbbc27444 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
@@ -36,58 +36,62 @@
namespace arm_compute
{
+namespace opencl
+{
+namespace kernels
+{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0));
for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i));
}
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4);
return Status{};
}
} // namespace
-CLWidthConcatenateLayerKernel::CLWidthConcatenateLayerKernel()
+ClWidthConcatenateKernel::ClWidthConcatenateKernel()
{
}
-Status CLWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
+Status ClWidthConcatenateKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, width_offset, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, width_offset, dst));
return Status{};
}
-void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output)
+void ClWidthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, width_offset, dst));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({ src, dst });
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, input->dimension(0));
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, src->dimension(0));
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2)));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2)));
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
{
- const UniformQuantizationInfo iqinfo = input->quantization_info().uniform();
- const UniformQuantizationInfo oqinfo = output->quantization_info().uniform();
+ const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
+ const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset));
build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset));
@@ -98,16 +102,16 @@ void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_co
// Create kernel
_kernel = create_kernel(compile_context, "concatenate_width", build_opts.options());
// Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+void ClWidthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
@@ -120,4 +124,6 @@ void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &w
add_4D_tensor_argument(idx, dst, window);
enqueue(queue, *this, window, lws_hint());
}
+} // namespace kernels
+} // namespace opencl
} // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h
new file mode 100644
index 0000000000..3bffe52700
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H
+#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel.
+ * The source tensor will be concatenated into the destination tensor.
+ */
+class ClWidthConcatenateKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClWidthConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in,out] dst Destination tensor. Data types supported: same as @p src.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenateKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H */
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 0c473a79c8..ea96e45bf8 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,242 +23,19 @@
*/
#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClConcatenate.h"
namespace arm_compute
{
-namespace experimental
-{
-CLConcatenation::CLConcatenation()
- : _concat_kernels(),
- _num_inputs(0),
- _axis(Window::DimX)
-{
-}
-
-void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis)
-{
- ARM_COMPUTE_ERROR_ON(output == nullptr);
- _axis = axis;
- _num_inputs = inputs_vector.size();
-
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis);
- std::vector<const ITensorInfo *> const_inputs_vector(inputs_vector.size());
- std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(t);
- return t;
- });
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis));
-
- unsigned int offset = 0;
- switch(_axis)
- {
- case Window::DimX:
- {
- switch(_num_inputs)
- {
- case 2:
- {
- // Configure WidthConcatenate2Tensors kernel
- auto kernel = std::make_unique<CLWidthConcatenate2TensorsKernel>();
- kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- case 4:
- {
- // Configure WidthConcatenate4Tensors kernel
- auto kernel = std::make_unique<CLWidthConcatenate4TensorsKernel>();
- kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- default:
- {
- // Configure generic case WidthConcatenate kernels
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLWidthConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- }
- break;
- }
- case Window::DimY:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLHeightConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- case Window::DimZ:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLDepthConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- case 3:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLBatchConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-}
-
-Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr);
- const unsigned int num_inputs = inputs_vector.size();
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
-
- unsigned int offset = 0;
- switch(axis)
- {
- case Window::DimX:
- {
- switch(num_inputs)
- {
- case 2:
- // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output));
- break;
- case 4:
- // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output));
- break;
- default:
- // Validate generic case of WidthConcatenate kernel
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- break;
- }
- case Window::DimY:
- {
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- case Window::DimZ:
- {
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- case 3:
- {
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-
- if(output->total_size() != 0)
- {
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis);
- ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
- }
-
- return Status{};
-}
-
-void CLConcatenation::run(ITensorPack &tensors)
-{
- if(tensors.empty())
- {
- ARM_COMPUTE_ERROR("No inputs provided");
- }
-
- if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
- {
- ARM_COMPUTE_ERROR("Configured with different number of inputs");
- }
-
- if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
- {
- ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
- CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
- }
- else
- {
- int i = 0;
- for(auto &k : _concat_kernels)
- {
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
- pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
- CLScheduler::get().enqueue_op(*k, pack, true);
- ++i;
- }
- }
-}
-} // namespace experimental
-
struct CLConcatenateLayer::Impl
{
- std::vector<const ICLTensor *> srcs{};
- ICLTensor *dst{ nullptr };
- unsigned int num_inputs{ 0 };
- unsigned int axis{ 0 };
- std::unique_ptr<experimental::CLConcatenation> op{ nullptr };
+ std::vector<const ICLTensor *> srcs{};
+ ICLTensor *dst{ nullptr };
+ unsigned int num_inputs{ 0 };
+ unsigned int axis{ 0 };
+ std::unique_ptr<opencl::ClConcatenate> op{ nullptr };
};
CLConcatenateLayer::CLConcatenateLayer()
@@ -285,7 +62,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std:
_impl->dst = output;
_impl->axis = axis;
_impl->num_inputs = inputs_vector.size();
- _impl->op = std::make_unique<experimental::CLConcatenation>();
+ _impl->op = std::make_unique<opencl::ClConcatenate>();
std::vector<ITensorInfo *> inputs_vector_info;
for(unsigned int i = 0; i < inputs_vector.size(); ++i)
@@ -298,7 +75,7 @@ void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std:
Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
{
- return experimental::CLConcatenation::validate(inputs_vector, output, axis);
+ return opencl::ClConcatenate::validate(inputs_vector, output, axis);
}
void CLConcatenateLayer::run()
diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h
new file mode 100644
index 0000000000..049bf05dc1
--- /dev/null
+++ b/src/runtime/gpu/cl/IClOperator.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICL_OPERATOR_H
+#define ARM_COMPUTE_ICL_OPERATOR_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/runtime/CL/ICLOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using IClOperator = experimental::ICLOperator;
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_ICL_OPERATOR_H */
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp
new file mode 100644
index 0000000000..4385fcfaed
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClConcatenate.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClConcatenate.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h"
+#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h"
+#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+ClConcatenate::ClConcatenate()
+ : _concat_kernels(),
+ _num_inputs(0),
+ _axis(Window::DimX)
+{
+}
+
+void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
+{
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ _axis = axis;
+ _num_inputs = src_vector.size();
+
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
+ std::vector<const ITensorInfo *> const_src_vector(src_vector.size());
+ std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t)
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(t);
+ return t;
+ });
+
+ // dst auto inizialitation if not yet initialized
+ auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type());
+ ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis));
+
+ unsigned int offset = 0;
+ switch(_axis)
+ {
+ case Window::DimX:
+ {
+ switch(_num_inputs)
+ {
+ case 2:
+ {
+ // Configure WidthConcatenate2Tensors kernel
+ auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>();
+ kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ case 4:
+ {
+ // Configure WidthConcatenate4Tensors kernel
+ auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>();
+ kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ default:
+ {
+ // Configure generic case WidthConcatenate kernels
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ }
+ break;
+ }
+ case Window::DimY:
+ {
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ case Window::DimZ:
+ {
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ case 3:
+ {
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+}
+
+Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr);
+ const unsigned int num_inputs = src_vector.size();
+
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
+
+ unsigned int offset = 0;
+ switch(axis)
+ {
+ case Window::DimX:
+ {
+ switch(num_inputs)
+ {
+ case 2:
+ // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]);
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
+ break;
+ case 4:
+ // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]);
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
+ break;
+ default:
+ // Validate generic case of WidthConcatenate kernel
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ break;
+ }
+ case Window::DimY:
+ {
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ case Window::DimZ:
+ {
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ case 3:
+ {
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+
+ if(dst->total_size() != 0)
+ {
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
+ }
+
+ return Status{};
+}
+
+void ClConcatenate::run(ITensorPack &tensors)
+{
+ if(tensors.empty())
+ {
+ ARM_COMPUTE_ERROR("No inputs provided");
+ }
+
+ if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
+ {
+ ARM_COMPUTE_ERROR("Configured with different number of inputs");
+ }
+
+ if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
+ {
+ ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
+ CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
+ }
+ else
+ {
+ int i = 0;
+ for(auto &k : _concat_kernels)
+ {
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
+ pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
+ CLScheduler::get().enqueue_op(*k, pack, true);
+ ++i;
+ }
+ }
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h
new file mode 100644
index 0000000000..112e2ac6b7
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClConcatenate.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCONCATENATE_H
+#define ARM_COMPUTE_CLCONCATENATE_H
+
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
+ *
+ * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0).
+ * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1).
+ * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2).
+ * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3).
+ */
+class ClConcatenate : public IClOperator
+{
+public:
+ /** Default constructor */
+ ClConcatenate();
+ /** Initialise the kernel's inputs vector and dst.
+ *
+ * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
+ * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
+ * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
+ *
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] src_vector The vectors containing all the tensors to concatenate. Data types supported: All
+ * @param[out] dst Destination tensor. Data types supported: same as @p src_vector.
+ * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+ */
+ void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate
+ *
+ * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
+ * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
+ * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
+ *
+ * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector.
+ * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+ *
+ * @return a status
+ */
+ static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+
+private:
+ std::vector<std::unique_ptr<IClKernel>> _concat_kernels;
+ unsigned int _num_inputs;
+ unsigned int _axis;
+};
+} // namespace opencl
+} // namespace arm_comPUTE
+#endif /* ARM_COMPUTE_CL_CONCATENATE_H */