aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/gpu')
-rw-r--r--src/runtime/gpu/cl/IClOperator.h37
-rw-r--r--src/runtime/gpu/cl/operators/ClActivation.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClActivation.h60
-rw-r--r--src/runtime/gpu/cl/operators/ClAdd.cpp47
-rw-r--r--src/runtime/gpu/cl/operators/ClAdd.h100
-rw-r--r--src/runtime/gpu/cl/operators/ClCast.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClCast.h74
-rw-r--r--src/runtime/gpu/cl/operators/ClConcatenate.cpp254
-rw-r--r--src/runtime/gpu/cl/operators/ClConcatenate.h86
-rw-r--r--src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h62
-rw-r--r--src/runtime/gpu/cl/operators/ClCopy.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClCopy.h62
-rw-r--r--src/runtime/gpu/cl/operators/ClCrop.cpp46
-rw-r--r--src/runtime/gpu/cl/operators/ClCrop.h74
-rw-r--r--src/runtime/gpu/cl/operators/ClDequantize.cpp53
-rw-r--r--src/runtime/gpu/cl/operators/ClDequantize.h60
-rw-r--r--src/runtime/gpu/cl/operators/ClDirectConv2d.cpp106
-rw-r--r--src/runtime/gpu/cl/operators/ClDirectConv2d.h83
-rw-r--r--src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp92
-rw-r--r--src/runtime/gpu/cl/operators/ClElementwiseOperations.h190
-rw-r--r--src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp116
-rw-r--r--src/runtime/gpu/cl/operators/ClElementwiseUnary.h192
-rw-r--r--src/runtime/gpu/cl/operators/ClFill.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClFill.h61
-rw-r--r--src/runtime/gpu/cl/operators/ClFlatten.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClFlatten.h68
-rw-r--r--src/runtime/gpu/cl/operators/ClFloor.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClFloor.h58
-rw-r--r--src/runtime/gpu/cl/operators/ClGemm.cpp760
-rw-r--r--src/runtime/gpu/cl/operators/ClGemm.h136
-rw-r--r--src/runtime/gpu/cl/operators/ClLogicalNot.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClLogicalNot.h58
-rw-r--r--src/runtime/gpu/cl/operators/ClMul.cpp60
-rw-r--r--src/runtime/gpu/cl/operators/ClMul.h107
-rw-r--r--src/runtime/gpu/cl/operators/ClPRelu.cpp57
-rw-r--r--src/runtime/gpu/cl/operators/ClPRelu.h68
-rw-r--r--src/runtime/gpu/cl/operators/ClPermute.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClPermute.h64
-rw-r--r--src/runtime/gpu/cl/operators/ClPool2d.cpp101
-rw-r--r--src/runtime/gpu/cl/operators/ClPool2d.h72
-rw-r--r--src/runtime/gpu/cl/operators/ClQuantize.cpp53
-rw-r--r--src/runtime/gpu/cl/operators/ClQuantize.h62
-rw-r--r--src/runtime/gpu/cl/operators/ClReshape.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClReshape.h59
-rw-r--r--src/runtime/gpu/cl/operators/ClScale.cpp69
-rw-r--r--src/runtime/gpu/cl/operators/ClScale.h74
-rw-r--r--src/runtime/gpu/cl/operators/ClSoftmax.cpp186
-rw-r--r--src/runtime/gpu/cl/operators/ClSoftmax.h97
-rw-r--r--src/runtime/gpu/cl/operators/ClSub.cpp47
-rw-r--r--src/runtime/gpu/cl/operators/ClSub.h100
-rw-r--r--src/runtime/gpu/cl/operators/ClTranspose.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClTranspose.h58
-rw-r--r--src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp299
-rw-r--r--src/runtime/gpu/cl/operators/ClWinogradConv2d.h126
-rw-r--r--src/runtime/gpu/cl/utils/ClAuxTensorHandler.h101
56 files changed, 0 insertions, 5290 deletions
diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h
deleted file mode 100644
index 049bf05dc1..0000000000
--- a/src/runtime/gpu/cl/IClOperator.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICL_OPERATOR_H
-#define ARM_COMPUTE_ICL_OPERATOR_H
-
-#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/runtime/CL/ICLOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-using IClOperator = experimental::ICLOperator;
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_ICL_OPERATOR_H */
diff --git a/src/runtime/gpu/cl/operators/ClActivation.cpp b/src/runtime/gpu/cl/operators/ClActivation.cpp
deleted file mode 100644
index 71aa57bdbd..0000000000
--- a/src/runtime/gpu/cl/operators/ClActivation.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClActivation.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClActivationKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClActivation::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClActivationKernel>();
- k->configure(compile_context, src, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClActivation::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClActivationKernel::validate(src, dst, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClActivation.h b/src/runtime/gpu/cl/operators/ClActivation.h
deleted file mode 100644
index 235b826b87..0000000000
--- a/src/runtime/gpu/cl/operators/ClActivation.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ACTIVATION_H
-#define ARM_COMPUTE_CL_ACTIVATION_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClActivationKernel */
-class ClActivation : public IClOperator
-{
-public:
- /** Constructor */
- ClActivation() = default;
- /** Configure operator for a given list of arguments
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] dst Destination tensor info. Data type supported: same as @p src
- * @param[in] activation_info Activation layer parameters.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &activation_info);
- /** Static function to check if given info will lead to a valid configuration of @ref ClActivation
- *
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] dst Destination tensor info. Data type supported: same as @p src
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ACTIVATION_H */
diff --git a/src/runtime/gpu/cl/operators/ClAdd.cpp b/src/runtime/gpu/cl/operators/ClAdd.cpp
deleted file mode 100644
index 01f550f819..0000000000
--- a/src/runtime/gpu/cl/operators/ClAdd.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClAdd.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClAdd::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst,
- ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::ADD, src1, src2, dst, policy, act_info);
- _kernel = std::move(k);
-}
-
-Status ClAdd::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst,
- ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
- return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::ADD, src1, src2, dst, policy, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClAdd.h b/src/runtime/gpu/cl/operators/ClAdd.h
deleted file mode 100644
index f751d8dc83..0000000000
--- a/src/runtime/gpu/cl/operators/ClAdd.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ADD_H
-#define ARM_COMPUTE_CL_ADD_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run arithmetic addition
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @note The function performs an arithmetic addition between two tensors.
- */
-class ClAdd : public IClOperator
-{
-public:
- /** Default Constructor */
- ClAdd() = default;
- /** Configure function for a given list of arguments.
- *
- * Valid configurations (src1,src2) -> dst :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref ClAdd
- *
- * Valid configurations (src1,src2) -> dst :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ADD_H */
diff --git a/src/runtime/gpu/cl/operators/ClCast.cpp b/src/runtime/gpu/cl/operators/ClCast.cpp
deleted file mode 100644
index 3f54004aa7..0000000000
--- a/src/runtime/gpu/cl/operators/ClCast.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClCast.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClCastKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
-{
- auto k = std::make_unique<kernels::ClCastKernel>();
- k->configure(compile_context, src, dst, policy);
- _kernel = std::move(k);
-}
-
-Status ClCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
-{
- return kernels::ClCastKernel::validate(src, dst, policy);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClCast.h b/src/runtime/gpu/cl/operators/ClCast.h
deleted file mode 100644
index 69e028debd..0000000000
--- a/src/runtime/gpu/cl/operators/ClCast.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_CAST_H
-#define ARM_COMPUTE_CL_CAST_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClCastKernel */
-class ClCast : public IClOperator
-{
-public:
- /** Constructor */
- ClCast() = default;
- /** Configure operator for a given list of arguments
- *
- * @note Input data type must be different than output data type.
- *
- * Valid data layouts:
- * - All
- *
- * Valid data type configurations:
- * |src |dst |
- * |:--------------|:--------------------------------------|
- * |U8 | S8, U16, S16, U32, S32, F16, F32 |
- * |U16 | U8, S8, S16, U32, S32, F16, F32 |
- * |S16 | U8, S8, U16, U32, S32, F16, F32 |
- * |U32 | U8, S8, U16, S16, S32, F16, F32 |
- * |S32 | U8, S8, U16, S16, U32, F16, F32 |
- * |F16 | U8, S8, U16, S16, U32, F32 |
- * |F32 | U8, S8, U16, S16, U32, F16 |
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[out] dst The destinatio tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to @ref ClCast::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_CAST_H */
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp
deleted file mode 100644
index 4385fcfaed..0000000000
--- a/src/runtime/gpu/cl/operators/ClConcatenate.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClConcatenate.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h"
-#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h"
-#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h"
-#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
-#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
-#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/helpers/AutoConfiguration.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-ClConcatenate::ClConcatenate()
- : _concat_kernels(),
- _num_inputs(0),
- _axis(Window::DimX)
-{
-}
-
-void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
-{
- ARM_COMPUTE_ERROR_ON(dst == nullptr);
- _axis = axis;
- _num_inputs = src_vector.size();
-
- TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
- std::vector<const ITensorInfo *> const_src_vector(src_vector.size());
- std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(t);
- return t;
- });
-
- // dst auto inizialitation if not yet initialized
- auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis));
-
- unsigned int offset = 0;
- switch(_axis)
- {
- case Window::DimX:
- {
- switch(_num_inputs)
- {
- case 2:
- {
- // Configure WidthConcatenate2Tensors kernel
- auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>();
- kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- case 4:
- {
- // Configure WidthConcatenate4Tensors kernel
- auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>();
- kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- default:
- {
- // Configure generic case WidthConcatenate kernels
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>();
- kernel->configure(compile_context, src_vector.at(i), offset, dst);
- offset += src_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- }
- break;
- }
- case Window::DimY:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>();
- kernel->configure(compile_context, src_vector.at(i), offset, dst);
- offset += src_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- case Window::DimZ:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>();
- kernel->configure(compile_context, src_vector.at(i), offset, dst);
- offset += src_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- case 3:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>();
- kernel->configure(compile_context, src_vector.at(i), offset, dst);
- offset += src_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-}
-
-Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr);
- const unsigned int num_inputs = src_vector.size();
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
- ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
-
- unsigned int offset = 0;
- switch(axis)
- {
- case Window::DimX:
- {
- switch(num_inputs)
- {
- case 2:
- // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
- break;
- case 4:
- // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
- break;
- default:
- // Validate generic case of WidthConcatenate kernel
- for(const auto &src : src_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst));
- offset += src->dimension(axis);
- }
- break;
- }
- break;
- }
- case Window::DimY:
- {
- for(const auto &src : src_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst));
- offset += src->dimension(axis);
- }
- break;
- }
- case Window::DimZ:
- {
- for(const auto &src : src_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst));
- offset += src->dimension(axis);
- }
- break;
- }
- case 3:
- {
- for(const auto &src : src_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst));
- offset += src->dimension(axis);
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-
- if(dst->total_size() != 0)
- {
- TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis);
- ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
- }
-
- return Status{};
-}
-
-void ClConcatenate::run(ITensorPack &tensors)
-{
- if(tensors.empty())
- {
- ARM_COMPUTE_ERROR("No inputs provided");
- }
-
- if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
- {
- ARM_COMPUTE_ERROR("Configured with different number of inputs");
- }
-
- if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
- {
- ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
- CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
- }
- else
- {
- int i = 0;
- for(auto &k : _concat_kernels)
- {
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
- pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
- CLScheduler::get().enqueue_op(*k, pack, true);
- ++i;
- }
- }
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h
deleted file mode 100644
index 0d960a605c..0000000000
--- a/src/runtime/gpu/cl/operators/ClConcatenate.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONCATENATE_H
-#define ARM_COMPUTE_CLCONCATENATE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0).
- * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1).
- * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2).
- * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3).
- */
-class ClConcatenate : public IClOperator
-{
-public:
- /** Default constructor */
- ClConcatenate();
- /** Initialise the kernel's inputs vector and dst.
- *
- * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
- * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
- *
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All
- * @param[out] dst Destination tensor info. Data types supported: same as @p src_vector.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- */
- void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis);
- /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate
- *
- * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
- * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
- *
- * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All
- * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- *
- * @return a status
- */
- static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::vector<std::unique_ptr<IClKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
-};
-} // namespace opencl
-} // namespace arm_comPUTE
-#endif /* ARM_COMPUTE_CL_CONCATENATE_H */
diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
deleted file mode 100644
index 0d2f2925d3..0000000000
--- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClConvertFullyConnectedWeights::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
-{
- auto k = std::make_unique<kernels::ClConvertFullyConnectedWeightsKernel>();
- k->configure(compile_context, src, dst, original_src_shape, data_layout);
- _kernel = std::move(k);
-}
-
-Status ClConvertFullyConnectedWeights::validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
-{
- return kernels::ClConvertFullyConnectedWeightsKernel::validate(src, dst, original_src_shape, data_layout);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h
deleted file mode 100644
index efedc2fcb7..0000000000
--- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H
-#define ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClConvertFullyConnectedWeightsKernel */
-class ClConvertFullyConnectedWeights : public IClOperator
-{
-public:
- /** Constructor */
- ClConvertFullyConnectedWeights() = default;
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src The src tensor info. Data types supported: All.
- * @param[in] dst The dst tensor info. Data types supported: Same as @p src
- * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout);
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClConvertFullyConnectedWeightsKernel.
- *
- * @param[in] src First tensor src info. Data types supported: All.
- * @param[in] dst Output tensor info. Data types supported: same as @p src.
- * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H */
diff --git a/src/runtime/gpu/cl/operators/ClCopy.cpp b/src/runtime/gpu/cl/operators/ClCopy.cpp
deleted file mode 100644
index 2bdb1f5ba1..0000000000
--- a/src/runtime/gpu/cl/operators/ClCopy.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClCopy.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClCopyKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClCopy::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window)
-{
- auto k = std::make_unique<kernels::ClCopyKernel>();
- k->configure(compile_context, src, dst, dst_window);
- _kernel = std::move(k);
-}
-
-Status ClCopy::validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window)
-{
- return kernels::ClCopyKernel::validate(src, dst, dst_window);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClCopy.h b/src/runtime/gpu/cl/operators/ClCopy.h
deleted file mode 100644
index 0b99676f65..0000000000
--- a/src/runtime/gpu/cl/operators/ClCopy.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_COPY_H
-#define ARM_COMPUTE_CL_COPY_H
-
-#include "arm_compute/core/Window.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClCopyKernel */
-class ClCopy : public IClOperator
-{
-public:
- /** Constructor */
- ClCopy() = default;
- /** Initialise the function's source and destination.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: All.
- * @param[out] dst Output tensor info. Data types supported: Same as @p src.
- * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClCopyKernel
- *
- * @param[in] src Source tensor info. Data types supported: All.
- * @param[in] dst Output tensor info. Data types supported: Same as @p src.
- * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_COPY_H */
diff --git a/src/runtime/gpu/cl/operators/ClCrop.cpp b/src/runtime/gpu/cl/operators/ClCrop.cpp
deleted file mode 100644
index 17bb11912f..0000000000
--- a/src/runtime/gpu/cl/operators/ClCrop.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClCrop.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClCropKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClCrop::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value,
- Window *dst_window)
-{
- auto k = std::make_unique<kernels::ClCropKernel>();
- k->configure(compile_context, src, dst, start, end, batch_index, extrapolation_value, dst_window);
- _kernel = std::move(k);
-}
-
-Status ClCrop::validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window)
-{
- return kernels::ClCropKernel::validate(src, dst, start, end, batch_index, extrapolation_value, dst_window);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClCrop.h b/src/runtime/gpu/cl/operators/ClCrop.h
deleted file mode 100644
index acfbf14742..0000000000
--- a/src/runtime/gpu/cl/operators/ClCrop.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_COPY_H
-#define ARM_COMPUTE_CL_COPY_H
-
-#include "arm_compute/core/Window.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClCropKernel */
-class ClCrop : public IClOperator
-{
-public:
- /** Constructor */
- ClCrop() = default;
- /** Initialise the function's source and destination.
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] dst Destination tensor info. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *dst_window = nullptr);
-
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClCropKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC.
- * @param[in] dst Destination tensor info. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *dst_window = nullptr);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_COPY_H */
diff --git a/src/runtime/gpu/cl/operators/ClDequantize.cpp b/src/runtime/gpu/cl/operators/ClDequantize.cpp
deleted file mode 100644
index 0c1391bb45..0000000000
--- a/src/runtime/gpu/cl/operators/ClDequantize.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClDequantize.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClDequantizeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClDequantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClDequantizeKernel>();
- k->configure(compile_context, src, dst);
- _kernel = std::move(k);
-}
-
-Status ClDequantize::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClDequantizeKernel::validate(src, dst);
-}
-
-void ClDequantize::run(ITensorPack &tensors)
-{
- ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
- CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClDequantize.h b/src/runtime/gpu/cl/operators/ClDequantize.h
deleted file mode 100644
index 47fad3eeee..0000000000
--- a/src/runtime/gpu/cl/operators/ClDequantize.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_DEQUANTIZE_H
-#define ARM_COMPUTE_CL_DEQUANTIZE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClDequantizeKernel that dequantizes an input tensor */
-class ClDequantize : public IClOperator
-{
-public:
- /** Constructor */
- ClDequantize() = default;
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] dst Destination tensor info with the same dimensions of @p src. Data type supported: F16/F32.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to @ref ClDequantize::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-
- // Inherited method overridden
- void run(ITensorPack &tensors) override;
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DEQUANTIZE_H */
diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp
deleted file mode 100644
index 13ef42a640..0000000000
--- a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClActivationKernel.h"
-#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-namespace
-{
-ITensorPack select_activation_src_dst(ITensorPack &tensors)
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_tensor(TensorType::ACL_DST));
- pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(TensorType::ACL_DST));
- return pack;
-}
-} // namespace
-
-void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(src);
-
- // Configure direct convolution kernel
- const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo();
- auto k = std::make_unique<kernels::ClDirectConv2dKernel>();
- k->set_target(CLScheduler::get().target());
- k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info);
- _direct_conv_kernel = std::move(k);
-
- // Configure border handler
- PixelValue zero_value(0.f);
- if(is_data_type_quantized_asymmetric(src->data_type()))
- {
- zero_value = PixelValue(0, src->data_type(), src->quantization_info());
- }
- auto b = std::make_unique<CLFillBorderKernel>();
- b->configure(compile_context, src, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value);
- _src_border_handler = std::move(b);
-
- // Fused activation is currently supported for NHWC and floating point types
- if(act_info.enabled() && !conv2d_act_info.enabled())
- {
- auto a = std::make_unique<kernels::ClActivationKernel>();
- a->configure(compile_context, dst, dst, act_info);
- _activation_kernel = std::move(a);
- }
-
- // Tune kernels
- CLScheduler::get().tune_kernel_static(*_direct_conv_kernel);
-}
-
-Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), CLScheduler::get().target()));
- if(act_info.enabled())
- {
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info));
- }
- return Status{};
-}
-
-void ClDirectConv2d::run(ITensorPack &tensors)
-{
- // Run border handler
- CLScheduler::get().enqueue_op(*_src_border_handler.get(), tensors, false);
- // Run direct convolution
- CLScheduler::get().enqueue_op(*_direct_conv_kernel.get(), tensors, false);
- // Run activation kernel
- if(_activation_kernel)
- {
- auto act_pack = select_activation_src_dst(tensors);
- CLScheduler::get().enqueue_op(*_activation_kernel.get(), act_pack, false);
- }
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.h b/src/runtime/gpu/cl/operators/ClDirectConv2d.h
deleted file mode 100644
index e069733fab..0000000000
--- a/src/runtime/gpu/cl/operators/ClDirectConv2d.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_H
-#define ARM_COMPUTE_CL_DIRECT_CONV2D_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to simulate a directly convolution layer. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref opencl::ClDirectConv2d
- */
-class ClDirectConv2d : public IClOperator
-{
-public:
- /** Constructor */
- ClDirectConv2d() = default;
- /** Set the src and dst tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of srcs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[out] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
- * Data types supported: Same as @p src.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to ClDirectConv2d::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited method overridden
- void run(ITensorPack &tensors) override;
-
-private:
- std::unique_ptr<IClKernel> _direct_conv_kernel{ nullptr };
- std::unique_ptr<IClKernel> _src_border_handler{ nullptr };
- std::unique_ptr<IClKernel> _activation_kernel{ nullptr };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONV2D_H */ \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp b/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp
deleted file mode 100644
index e5b836a0d8..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClElementwiseOperations.h"
-
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClElementwiseDivision::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::DIV, src1, src2, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClElementwiseDivision::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, src1, src2, dst, act_info);
-}
-
-void ClElementwiseMax::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::MAX, src1, src2, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClElementwiseMax::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, src1, src2, dst, act_info);
-}
-
-void ClElementwiseMin::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::MIN, src1, src2, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClElementwiseMin::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, src1, src2, dst, act_info);
-}
-
-void ClElementwiseSquaredDiff::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClElementwiseSquaredDiff::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info);
-}
-
-void ClElementwisePower::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::POWER, src1, src2, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClElementwisePower::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, src1, src2, dst, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h b/src/runtime/gpu/cl/operators/ClElementwiseOperations.h
deleted file mode 100644
index b9ab1405c8..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H
-#define ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for division
- *
- * @note The tensor data type for the inputs must be F16/F32.
- * @note The function performs an arithmetic division between two tensors.
- */
-class ClElementwiseDivision : public IClOperator
-{
-public:
- /** Default Constructor */
- ClElementwiseDivision() = default;
- /** Configure function for a given list of arguments.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src1 First source tensor info. Data types supported: F16/F32.
- * @param[in] src2 Second source tensor info. same as @p src1.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref ClElementwiseDivision
- *
- * @param[in] src1 First source tensor info. Data types supported: F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for max
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @note The function performs a max operation between two tensors.
- */
-class ClElementwiseMax : public IClOperator
-{
-public:
- /** Default Constructor */
- ClElementwiseMax() = default;
- /** Configure function for a given list of arguments.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for max
- *
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for min
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @note The function performs a max operation between two tensors.
- */
-class ClElementwiseMin : public IClOperator
-{
-public:
- /** Default Constructor */
- ClElementwiseMin() = default;
- /** Configure function for a given list of arguments.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for min
- *
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for squared difference
- *
- * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32.
- * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
- */
-class ClElementwiseSquaredDiff : public IClOperator
-{
-public:
- /** Default Constructor */
- ClElementwiseSquaredDiff() = default;
- /** Configure function for a given list of arguments.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for squared difference
- *
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: same as @p src1.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for power
- *
- * @note The tensor data type for the inputs must be F16/F32.
- * @note The function performs an elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
- */
-class ClElementwisePower : public IClOperator
-{
-public:
- /** Default Constructor */
- ClElementwisePower() = default;
- /** Configure function for a given list of arguments.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src1 First source tensor info. Data types supported: F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported:F16/F32.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for power
- *
- * @param[in] src1 First source tensor info. Data types supported: F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: F16/F32.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H */
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp b/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp
deleted file mode 100644
index 7b830a077f..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClElementwiseUnary.h"
-
-#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClRsqrt::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::RSQRT);
- _kernel = std::move(k);
-}
-
-Status ClRsqrt::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::RSQRT);
-}
-
-void ClExp::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::EXP);
- _kernel = std::move(k);
-}
-
-Status ClExp::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::EXP);
-}
-
-void ClNeg::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::NEG);
- _kernel = std::move(k);
-}
-
-Status ClNeg::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::NEG);
-}
-
-void ClSin::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::SIN);
- _kernel = std::move(k);
-}
-
-Status ClSin::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::SIN);
-}
-
-void ClAbs::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::ABS);
- _kernel = std::move(k);
-}
-
-Status ClAbs::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ABS);
-}
-
-void ClLog::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::LOG);
- _kernel = std::move(k);
-}
-
-Status ClLog::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOG);
-}
-
-void ClRound::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::ROUND);
- _kernel = std::move(k);
-}
-
-Status ClRound::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ROUND);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h b/src/runtime/gpu/cl/operators/ClElementwiseUnary.h
deleted file mode 100644
index b40e3e9a3b..0000000000
--- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H
-#define ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to perform inverse square root on an src tensor. */
-class ClRsqrt : public IClOperator
-{
-public:
- /** Constructor */
- ClRsqrt() = default;
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClRsqrt
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to perform exponential on an src tensor. */
-class ClExp : public IClOperator
-{
-public:
- /** Constructor */
- ClExp() = default;
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClExp
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to negate an src tensor. */
-class ClNeg : public IClOperator
-{
-public:
- /** Constructor */
- ClNeg() = default;
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClNeg
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to calculate sine of an src tensor. */
-class ClSin : public IClOperator
-{
-public:
- /** Constructor */
- ClSin() = default;
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClSin
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to perform elementwise log on an src tensor. */
-class ClLog : public IClOperator
-{
-public:
- /** Constructor */
- ClLog() = default;
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClLog
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to get the absolute value of an src tensor. */
-class ClAbs : public IClOperator
-{
-public:
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClAbs
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-
-/** Basic function to get the round (to the nearest even) value of an src tensor. */
-class ClRound : public IClOperator
-{
-public:
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClRound
- *
- * @param[in] src First source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H */
diff --git a/src/runtime/gpu/cl/operators/ClFill.cpp b/src/runtime/gpu/cl/operators/ClFill.cpp
deleted file mode 100644
index 4d0afaef24..0000000000
--- a/src/runtime/gpu/cl/operators/ClFill.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClFill.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClFillKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClFill::configure(const ClCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window)
-{
- auto k = std::make_unique<kernels::ClFillKernel>();
- k->configure(compile_context, tensor, constant_value, dst_window);
- _kernel = std::move(k);
-}
-
-Status ClFill::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window)
-{
- return kernels::ClFillKernel::validate(tensor, constant_value, dst_window);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClFill.h b/src/runtime/gpu/cl/operators/ClFill.h
deleted file mode 100644
index e632d88546..0000000000
--- a/src/runtime/gpu/cl/operators/ClFill.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_FILL_H
-#define ARM_COMPUTE_CL_FILL_H
-
-#include "arm_compute/core/Window.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClFillKernel */
-class ClFill : public IClOperator
-{
-public:
- /** Constructor */
- ClFill() = default;
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Source tensor info. Supported data types: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClFillKernel
- *
- * @param[in] tensor Source tensor info. Data types supported: All.
- * @param[in] constant_value The value used to fill the planes of the tensor.
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_FILL_H */
diff --git a/src/runtime/gpu/cl/operators/ClFlatten.cpp b/src/runtime/gpu/cl/operators/ClFlatten.cpp
deleted file mode 100644
index 060b653dee..0000000000
--- a/src/runtime/gpu/cl/operators/ClFlatten.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClFlatten.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClReshapeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClFlatten::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClReshapeKernel>();
- k->configure(compile_context, src, dst);
- _kernel = std::move(k);
-}
-
-Status ClFlatten::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClReshapeKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClFlatten.h b/src/runtime/gpu/cl/operators/ClFlatten.h
deleted file mode 100644
index 20ad06ee57..0000000000
--- a/src/runtime/gpu/cl/operators/ClFlatten.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_FLATTEN_H
-#define ARM_COMPUTE_CL_FLATTEN_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to flatten a given input */
-class ClFlatten : public IClOperator
-{
-public:
- /** Constructor */
- ClFlatten() = default;
- /** Configure operator for a given list of arguments
- *
- * Valid data layouts:
- * - All
- *
- * Valid data type configurations:
- * |src |dst |
- * |:--------------|:--------------|
- * |All |All |
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[in] dst Destination tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor.
- * Data type supported: same as @p src
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to @ref ClFlatten::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_FLATTEN_H */
diff --git a/src/runtime/gpu/cl/operators/ClFloor.cpp b/src/runtime/gpu/cl/operators/ClFloor.cpp
deleted file mode 100644
index 94e77c0c54..0000000000
--- a/src/runtime/gpu/cl/operators/ClFloor.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClFloor.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClFloorKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClFloor::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClFloorKernel>();
- k->configure(compile_context, src, dst);
- _kernel = std::move(k);
-}
-
-Status ClFloor::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClFloorKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClFloor.h b/src/runtime/gpu/cl/operators/ClFloor.h
deleted file mode 100644
index f54eef9140..0000000000
--- a/src/runtime/gpu/cl/operators/ClFloor.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_FLOOR_H
-#define ARM_COMPUTE_CL_FLOOR_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClFloorKernel */
-class ClFloor : public IClOperator
-{
-public:
- /** Constructor */
- ClFloor() = default;
- /** Configure operator for a given list of arguments
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data type supported: same as @p src
- */
- void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref ClFloor
- *
- * @param[in] src Source tensor info. Data types supported: F16/F32.
- * @param[in] dst Destination tensor info. Data type supported: same as @p src
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_FLOOR_H */
diff --git a/src/runtime/gpu/cl/operators/ClGemm.cpp b/src/runtime/gpu/cl/operators/ClGemm.cpp
deleted file mode 100644
index a80375447d..0000000000
--- a/src/runtime/gpu/cl/operators/ClGemm.cpp
+++ /dev/null
@@ -1,760 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClGemm.h"
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Log.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/MemoryHelpers.h"
-#include "src/core/utils/helpers/float_ops.h"
-#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
-#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
-#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h"
-
-#include "support/Cast.h"
-#include "utils/TypePrinter.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::cl_gemm;
-using namespace arm_compute::experimental;
-using namespace arm_compute::utils::cast;
-using namespace arm_compute::opencl::kernels;
-
-namespace
-{
-inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type)
-{
- switch(kernel_type)
- {
- case CLGEMMKernelType::NATIVE_V1:
- case CLGEMMKernelType::RESHAPED_ONLY_RHS:
- case CLGEMMKernelType::RESHAPED_V1:
- case CLGEMMKernelType::RESHAPED:
- {
- return true;
- }
- default:
- {
- return false;
- }
- }
-}
-//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type
-inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run, bool constant_weights)
-{
- if(!constant_weights)
- {
- return CLGEMMKernelType::NATIVE_V1;
- }
-
- auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run);
- if(bool(gemm_kernel))
- {
- if(validate_gemm_kernel(gemm_kernel.gemm_type))
- {
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
- return gemm_kernel.gemm_type;
- }
- }
- gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run);
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
- return gemm_kernel.gemm_type;
-}
-// Validate lhs_info and rhs_info for reshaped only rhs kernel
-inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
- const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info)
-{
- // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
- TensorInfo tmp_b_info{};
- // Validate reshape RHS kernel
- auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
- {
- return false;
- }
- // Validate mm kernel
- gemm_kernel_info.lhs_info = lhs_info;
- gemm_kernel_info.rhs_info = rhs_info;
- gemm_kernel_info.has_pad_y = false;
- if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
- {
- return false;
- }
- gemm_kernel_info.has_pad_y = true;
- if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
- {
- return false;
- }
- return true;
-}
-
-//Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs
-inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a,
- const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output)
-{
- auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
- if(config)
- {
- if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info))
- {
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
- }
- }
- config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query);
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
-}
-
-// Validate lhs_info and rhs_info for reshaped kernel
-inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
- const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info, bool reinterpret_input_as_3d)
-{
- // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped kernel
- TensorInfo tmp_a_info{};
- TensorInfo tmp_b_info{};
-
- // Validate reshape LHS kernel
- auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, reinterpret_input_as_3d)));
- if(!bool(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, reinterpret_input_as_3d)))
- {
- return false;
- }
-
- // Validate reshape RHS kernel
- auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
- {
- return false;
- }
- // Validate mm kernel
- gemm_kernel_info.lhs_info = lhs_info;
- gemm_kernel_info.rhs_info = rhs_info;
- if(!bool(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
- {
- return false;
- }
- return true;
-}
-
-//Automatically select between mlgo (prioritized) and default heuristics for reshaped kernel configs
-inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output, bool reinterpret_input_as_3d)
-{
- auto config = auto_heuristics::select_mlgo_gemm_config_reshaped(query);
- if(config)
- {
- if(validate_lhs_rhs_info_reshaped(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info, reinterpret_input_as_3d))
- {
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
- }
- }
- config = auto_heuristics::select_default_gemm_config_reshaped(query);
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
-}
-} // namespace
-
-ClGemm::ClGemm()
- : _mm_kernel(std::make_unique<ClGemmMatrixMultiplyKernel>()),
- _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()),
- _reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()),
- _mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()),
- _mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),
- _mm_reshaped_only_rhs_fallback_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()),
- _tmp_a(),
- _tmp_b(),
- _reshape_b_only_on_first_run(false),
- _gemm_kernel_type(CLGEMMKernelType::NATIVE_V1),
- _aux_mem(AuxTensorIdx::Count)
-{
-}
-
-void ClGemm::configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
-{
- const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const GPUTarget gpu_target = CLScheduler::get().target();
-
- // Set the target for the kernels
- _mm_kernel->set_target(gpu_target);
-
- GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias());
-
- // Configure and tune matrix multiply kernel
- _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
-
- // Tune kernel statically
- CLScheduler::get().tune_kernel_static(*_mm_kernel);
-}
-
-void ClGemm::configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
-{
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- int mult_transpose1xW_width = 1;
- int mult_interleave4x4_height = 1;
-
- // Set the target for the kernels
- _reshape_lhs_kernel->set_target(gpu_target);
- _mm_kernel->set_target(gpu_target);
-
- if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)
- {
- mult_transpose1xW_width = 4;
- mult_interleave4x4_height = 2;
- }
-
- GEMMRHSMatrixInfo rhs_info;
- rhs_info.n0 = 16 / b->element_size();
- rhs_info.k0 = 1;
- rhs_info.h0 = mult_transpose1xW_width;
- rhs_info.interleave = false;
- rhs_info.transpose = false;
-
- GEMMLHSMatrixInfo lhs_info;
- lhs_info.m0 = 4;
- lhs_info.k0 = 4;
- lhs_info.v0 = mult_interleave4x4_height;
- lhs_info.interleave = true;
- lhs_info.transpose = true;
-
- GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias());
-
- // Configure interleave kernel
- _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
-
- // Configure transpose kernel
- _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
-
- // Configure and tune matrix multiply kernel
- _mm_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
-
- CLScheduler::get().tune_kernel_static(*_mm_kernel);
-
- // Request memory for LHS and RHS reshape matrix
- _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());
- _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
-}
-
-void ClGemm::configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
-{
- DataType data_type = a->data_type();
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool broadcast_bias = gemm_info.broadcast_bias();
-
- GEMMKernelInfo kernel_info;
- kernel_info.m = m;
- kernel_info.n = n;
- kernel_info.k = k;
- kernel_info.depth_output_gemm3d = depth_output_gemm3d;
- kernel_info.reinterpret_input_as_3d = false;
- kernel_info.broadcast_bias = broadcast_bias;
- kernel_info.activation_info = gemm_info.activation_info();
-
- // Set the target for the kernels
- _reshape_lhs_kernel->set_target(gpu_target);
- _mm_kernel->set_target(gpu_target);
-
- GEMMLHSMatrixInfo lhs_info{};
- GEMMRHSMatrixInfo rhs_info{};
-
- // Pick up the GEMM configuration
- std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b,
- c, output, gemm_info.reinterpret_input_as_3d());
-
- _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
- _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
-
- // Configure and tune matrix multiply kernel
- _mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
- // Request memory for LHS and RHS reshape matrix
- _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());
- _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
-}
-
-void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
-{
- DataType data_type = a->data_type();
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool broadcast_bias = gemm_info.broadcast_bias();
-
- GEMMKernelInfo kernel_info;
- kernel_info.m = m;
- kernel_info.n = n;
- kernel_info.k = k;
- kernel_info.depth_output_gemm3d = depth_output_gemm3d;
- kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
- kernel_info.broadcast_bias = broadcast_bias;
- kernel_info.activation_info = gemm_info.activation_info();
-
- // Set the target for the kernels
- _mm_kernel->set_target(gpu_target);
-
- GEMMLHSMatrixInfo lhs_info{};
- GEMMRHSMatrixInfo rhs_info{};
-
- // Pick up the GEMM configuration
- std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output);
-
- // Transpose matrix
- _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
-
- // Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)
- // During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have
- // pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false
-
- // Configure matrix multiply kernel with no y padding support
- kernel_info.has_pad_y = false;
- _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
- // Configure matrix multiply kernel with y padding support
- kernel_info.has_pad_y = true;
- _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
-
- // Request memory for RHS reshape matrix
- _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
-}
-
-Status ClGemm::validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_UNUSED(alpha);
- ARM_COMPUTE_UNUSED(output);
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, gemm_info.broadcast_bias());
-
- // Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(a, b, c, output, alpha, beta,
- false, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info()));
-
- return Status{};
-}
-
-Status ClGemm::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_UNUSED(alpha);
- ARM_COMPUTE_UNUSED(output);
-
- TensorInfo tmp_a_info{};
- TensorInfo tmp_b_info{};
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
- const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- int mult_transpose1xW_width = 1;
- int mult_interleave4x4_height = 1;
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
- if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)
- {
- mult_transpose1xW_width = 4;
- mult_interleave4x4_height = 2;
- }
-
- GEMMRHSMatrixInfo rhs_info;
- rhs_info.n0 = 16 / b->element_size();
- rhs_info.k0 = 1;
- rhs_info.h0 = mult_transpose1xW_width;
- rhs_info.interleave = false;
- rhs_info.transpose = false;
-
- GEMMLHSMatrixInfo lhs_info;
- lhs_info.m0 = 4;
- lhs_info.k0 = 4;
- lhs_info.v0 = mult_interleave4x4_height;
- lhs_info.interleave = true;
- lhs_info.transpose = true;
-
- const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias());
-
- // Validate interleave kernel
- auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));
-
- // Validate transpose kernel
- auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
-
- // Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta,
- true, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info()));
-
- return Status{};
-}
-
-Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_UNUSED(alpha);
- ARM_COMPUTE_UNUSED(output);
-
- TensorInfo tmp_a_info{};
- TensorInfo tmp_b_info{};
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
- DataType data_type = a->data_type();
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const bool broadcast_bias = gemm_info.broadcast_bias();
-
- GEMMKernelInfo kernel_info;
- kernel_info.m = m;
- kernel_info.n = n;
- kernel_info.k = k;
- kernel_info.depth_output_gemm3d = depth_output_gemm3d;
- kernel_info.reinterpret_input_as_3d = false;
- kernel_info.broadcast_bias = broadcast_bias;
- kernel_info.activation_info = gemm_info.activation_info();
-
- GEMMLHSMatrixInfo lhs_info;
- GEMMRHSMatrixInfo rhs_info;
-
- // Pick up the GEMM configuration
- // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
- const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
- lhs_info = gemm_config.lhs_info;
- rhs_info = gemm_config.rhs_info;
-
- auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));
-
- auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
-
- // Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
-
- return Status{};
-}
-
-Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_UNUSED(alpha);
- ARM_COMPUTE_UNUSED(output);
-
- TensorInfo tmp_b_info{};
-
- // Get the GPU target
- const GPUTarget gpu_target = CLScheduler::get().target();
- const DataType data_type = a->data_type();
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const bool broadcast_bias = gemm_info.broadcast_bias();
-
- GEMMKernelInfo kernel_info;
- kernel_info.m = m;
- kernel_info.n = n;
- kernel_info.k = k;
- kernel_info.depth_output_gemm3d = depth_output_gemm3d;
- kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d;
- kernel_info.broadcast_bias = broadcast_bias;
- kernel_info.activation_info = gemm_info.activation_info();
-
- GEMMLHSMatrixInfo lhs_info;
- GEMMRHSMatrixInfo rhs_info;
-
- // Pick up the GEMM configuration
- // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
- const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
- lhs_info = gemm_config.lhs_info;
- rhs_info = gemm_config.rhs_info;
-
- auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
-
- // Validate matrix multiply
- kernel_info.has_pad_y = false;
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
-
- kernel_info.has_pad_y = true;
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
-
- return Status{};
-}
-
-void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info));
-
- // Check if we need to reshape the matrix B only on the first run
- _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
-
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-
- // Select GEMMType
- _gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run,
- gemm_info.constant_weights());
-
- const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);
-
- ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;
-
- switch(_gemm_kernel_type)
- {
- case CLGEMMKernelType::NATIVE_V1:
- {
- configure_native_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
- break;
- }
- case CLGEMMKernelType::RESHAPED_V1:
- {
- configure_reshaped_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
- break;
- }
- case CLGEMMKernelType::RESHAPED:
- {
- configure_reshaped_v2(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
- break;
- }
- case CLGEMMKernelType::RESHAPED_ONLY_RHS:
- {
- configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info);
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("GEMMType not supported");
- }
- }
-}
-
-Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
-{
- // Get the GPU target
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
-
- // Select GEMMType
- CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery
- {
- CLScheduler::get().target(), a->data_type(), m, n, k, batch_size,
- },
- gemm_info.reshape_b_only_on_first_run(), gemm_info.constant_weights());
-
- const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);
-
- const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;
-
- switch(gemm_kernel_type)
- {
- case CLGEMMKernelType::NATIVE_V1:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(validate_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info));
- break;
- }
- case CLGEMMKernelType::RESHAPED_V1:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info));
- break;
- }
- case CLGEMMKernelType::RESHAPED:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info));
- break;
- }
- case CLGEMMKernelType::RESHAPED_ONLY_RHS:
- {
- ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info));
- break;
- }
- default:
- {
- ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported");
- }
- }
-
- return Status{};
-}
-
-void ClGemm::run(ITensorPack &tensors)
-{
- const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0);
- const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1);
- const ITensor *src2 = tensors.get_const_tensor(ACL_SRC_2);
- ITensor *dst = tensors.get_tensor(ACL_DST);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst);
-
- CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true);
- CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true);
-
- // Prepare the consts if needed
- prepare(tensors);
-
- // Run matrix multiply kernel
- switch(_gemm_kernel_type)
- {
- case CLGEMMKernelType::NATIVE_V1:
- {
- CLScheduler::get().enqueue_op(*_mm_kernel, tensors, true);
- break;
- }
- case CLGEMMKernelType::RESHAPED_V1:
- case CLGEMMKernelType::RESHAPED:
- {
- // Run interleave kernel
- ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } };
- CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false);
-
- if(!_reshape_b_only_on_first_run)
- {
- // Run transpose kernel
- ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
- CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
- }
-
- ITensorPack gemm_reshaped_pack{ { ACL_SRC_0, lhs_reshaped.get() }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } };
- if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED)
- {
- CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true);
- }
- else
- {
- CLScheduler::get().enqueue_op(*_mm_kernel, gemm_reshaped_pack, true);
- }
- break;
- }
- case CLGEMMKernelType::RESHAPED_ONLY_RHS:
- {
- if(!_reshape_b_only_on_first_run)
- {
- // Run transpose kernel
- ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
- CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
- }
- // In case of RESHAPED_ONLY_RHS, we need to check the padding requirement
- // Check if the lhs or dst tensors have padding
- const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom;
- const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom;
- bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0);
-
- ITensorPack gemm_reshaped_onlyrhs_pack{ { ACL_SRC_0, lhs }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } };
- if(has_pad_y)
- {
- CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true);
- }
- else
- {
- CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true);
- }
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("GEMMType not supported");
- }
- }
-}
-
-void ClGemm::prepare(ITensorPack &constants)
-{
- const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1);
- ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));
-
- // If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed
- if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)
- {
- CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux);
- ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr);
-
- ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } };
- CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true);
- }
-}
-
-experimental::MemoryRequirements ClGemm::workspace() const
-{
- return _aux_mem;
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClGemm.h b/src/runtime/gpu/cl/operators/ClGemm.h
deleted file mode 100644
index bd9ca17edf..0000000000
--- a/src/runtime/gpu/cl/operators/ClGemm.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_GEMM_H
-#define ARM_COMPUTE_CL_GEMM_H
-
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTypes.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/IClKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
-#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
- *
- * -# @ref kernels::ClGemmReshapeLhsMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model)
- * -# @ref kernels::ClGemmReshapeRhsMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- * -# @ref kernels::ClGemmMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method())
- * -# @ref kernels::ClGemmMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method())
- * -# @ref kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- */
-class ClGemm : public IClOperator
-{
-public:
- /** Constructor */
- ClGemm();
- /** Initialise the kernel's inputs and output
- *
- * Valid data layouts:
- * - All
- *
- * Valid data type configurations:
- * |src0 |src1 |src2 |dst |
- * |:------------|:-----------|:---------|:--------------|
- * |F32 |F32 |F32 |F32 |
- * |F16 |F16 |F16 |F16 |
- *
- * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
- *
- * @note All tensors must have the same data type.
- *
- * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F16/F32
- * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a.
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
- * @param[out] output Output tensor. Data type supported: same as @p a
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of matrix C
- * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
- * if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
- * in case matrix A and matrix B have been already transformed.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to ClGemm::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
- void prepare(ITensorPack &constants) override;
- experimental::MemoryRequirements workspace() const override;
-
-private:
- void configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- void configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- void configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- void configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
- static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
-private:
- enum AuxTensorIdx
- {
- LhsReshape = 0,
- RhsReshape,
- Count
- };
-
-private:
- std::unique_ptr<kernels::ClGemmMatrixMultiplyKernel> _mm_kernel;
- std::unique_ptr<kernels::ClGemmReshapeLhsMatrixKernel> _reshape_lhs_kernel;
- std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _reshape_rhs_kernel;
- std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedKernel> _mm_reshaped_kernel;
- std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel;
- std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_fallback_kernel;
- TensorInfo _tmp_a;
- TensorInfo _tmp_b;
- bool _reshape_b_only_on_first_run;
- CLGEMMKernelType _gemm_kernel_type;
-
- experimental::MemoryRequirements _aux_mem{};
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMM_H */
diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp b/src/runtime/gpu/cl/operators/ClLogicalNot.cpp
deleted file mode 100644
index 400efe450d..0000000000
--- a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClLogicalNot.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClLogicalNot::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
- k->configure(compile_context, src, dst, ElementWiseUnary::LOGICAL_NOT);
- _kernel = std::move(k);
-}
-
-Status ClLogicalNot::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOGICAL_NOT);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.h b/src/runtime/gpu/cl/operators/ClLogicalNot.h
deleted file mode 100644
index 25ddf564b5..0000000000
--- a/src/runtime/gpu/cl/operators/ClLogicalNot.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_LOGICAL_NOT_H
-#define ARM_COMPUTE_CL_LOGICAL_NOT_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClElementWiseUnaryKernel for NOT operation */
-class ClLogicalNot : public IClOperator
-{
-public:
- /** Constructor */
- ClLogicalNot() = default;
- /** Configure operator for a given list of arguments
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: U8.
- * @param[out] dst Destination tensor info. Data types supported: same as @p src.
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration
- *
- * @param[in] src Soure tensor info. Data types supported: U8.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_LOGICAL_NOT_H */
diff --git a/src/runtime/gpu/cl/operators/ClMul.cpp b/src/runtime/gpu/cl/operators/ClMul.cpp
deleted file mode 100644
index d1e2bc806f..0000000000
--- a/src/runtime/gpu/cl/operators/ClMul.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClMul.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClMulKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClMulKernel>();
- k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
- _kernel = std::move(k);
-}
-
-Status ClMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
-{
- return kernels::ClMulKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
-}
-
-void ClComplexMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClComplexMulKernel>();
- k->configure(compile_context, src1, src2, dst, act_info);
- _kernel = std::move(k);
-}
-
-Status ClComplexMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
-{
- return kernels::ClComplexMulKernel::validate(src1, src2, dst, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClMul.h b/src/runtime/gpu/cl/operators/ClMul.h
deleted file mode 100644
index 4a662b3276..0000000000
--- a/src/runtime/gpu/cl/operators/ClMul.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_MUL_H
-#define ARM_COMPUTE_CL_MUL_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref opencl::kernels::ClMulKernel */
-class ClMul : public IClOperator
-{
-public:
- /** Default Constructor */
- ClMul() = default;
- /** Initialise the kernel's sources, dst and convertion policy.
- *
- * Valid configurations (src1,src2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,U8) -> S16
- * - (S16,S16) -> S16
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- * - (QSYMM16,QSYMM16) -> S32
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] src1 An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] src2 An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] dst The dst tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to @ref ClMul::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-
-/** Basic function to run @ref opencl::kernels::ClComplexMulKernel */
-class ClComplexMul : public IClOperator
-{
-public:
- /** Default Constructor */
- ClComplexMul() = default;
- /** Initialise the kernel's sources, dst.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] src1 An src tensor info. Data types supported: F16/F32. Number of channels supported: 2.
- * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] src2 An src tensor info. Data types supported: same as @p src1. Number of channels supported: same as @p src1.
- * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] dst The dst tensor info, Data types supported: same as @p src1. Number of channels supported: same as @p src1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to @ref ClComplexMul::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_MUL_H */ \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPRelu.cpp b/src/runtime/gpu/cl/operators/ClPRelu.cpp
deleted file mode 100644
index d1ce14cc87..0000000000
--- a/src/runtime/gpu/cl/operators/ClPRelu.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClPRelu.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-using KernelType = kernels::ClArithmeticKernel;
-void ClPRelu::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output)
-{
- auto k = std::make_unique<KernelType>();
- k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
- _kernel = std::move(k);
-}
-
-Status ClPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
- return KernelType::validate(ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
-}
-
-void ClPRelu::run(ITensorPack &tensors)
-{
- // Output tensor can be given as nullptr for in-place computation.
- // In this case, get the input tensor and use it as the output tensor.
- if(tensors.get_tensor(TensorType::ACL_DST) == nullptr)
- {
- auto src_tensor = const_cast<ITensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
- ARM_COMPUTE_ERROR_ON_MSG(src_tensor == nullptr, "invalid source tensor is given for in-place computation");
- tensors.add_tensor(TensorType::ACL_DST, src_tensor);
- }
- IClOperator::run(tensors);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPRelu.h b/src/runtime/gpu/cl/operators/ClPRelu.h
deleted file mode 100644
index 70202aeb81..0000000000
--- a/src/runtime/gpu/cl/operators/ClPRelu.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_PRELU_H
-#define ARM_COMPUTE_CL_PRELU_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic operator to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
- *
- * @note The operator implements an activation layer with the PRELU activation function.
- */
-class ClPRelu : public IClOperator
-{
-public:
- /** Default constructor */
- ClPRelu() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_PRELU_H */
diff --git a/src/runtime/gpu/cl/operators/ClPermute.cpp b/src/runtime/gpu/cl/operators/ClPermute.cpp
deleted file mode 100644
index 719bb6dac6..0000000000
--- a/src/runtime/gpu/cl/operators/ClPermute.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClPermute.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClPermuteKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClPermute::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
-{
- auto k = std::make_unique<kernels::ClPermuteKernel>();
- k->configure(compile_context, src, dst, perm);
- _kernel = std::move(k);
-}
-
-Status ClPermute::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm)
-{
- return kernels::ClPermuteKernel::validate(src, dst, perm);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPermute.h b/src/runtime/gpu/cl/operators/ClPermute.h
deleted file mode 100644
index 20e7a32428..0000000000
--- a/src/runtime/gpu/cl/operators/ClPermute.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_PERMUTE_H
-#define ARM_COMPUTE_CL_PERMUTE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClPermuteKernel */
-class ClPermute : public IClOperator
-{
-public:
- /** Constructor */
- ClPermute() = default;
- /** Initialise the kernel's inputs and outputs and permute vector
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src The src tensor info. Data types supported: All.
- * @param[in] dst The dst tensor info. Data types supported: Same as @p src
- * @param[in] perm Permutation vector
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClPermuteKernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] src First tensor src info. Data types supported: All.
- * @param[in] dst Output tensor info. Data types supported: same as @p src.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_PERMUTE_H */ \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPool2d.cpp b/src/runtime/gpu/cl/operators/ClPool2d.cpp
deleted file mode 100644
index 40c2b0a8ba..0000000000
--- a/src/runtime/gpu/cl/operators/ClPool2d.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClPool2d.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClPool2dKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(src);
- // Configure pooling kernel
- auto k = std::make_unique<kernels::ClPool2dKernel>();
- k->set_target(CLScheduler::get().target());
- k->configure(compile_context, src, dst, info, indices);
- _pooling = std::move(k);
-
- const DataType data_type = src->data_type();
-
- // Configure border depending on operation required (quantize border in case of asymmetric data_type)
- BorderMode border_mode{};
- PixelValue pixel_value(0.f);
- if(is_data_type_quantized_asymmetric(data_type) && !info.exclude_padding)
- {
- pixel_value = PixelValue(0, data_type, src->quantization_info());
- }
-
- // Data layout
- const auto data_layout = info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : info.data_layout;
-
- switch(data_layout)
- {
- case DataLayout::NCHW:
- border_mode = (PoolingType::MAX == info.pool_type) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
- break;
- case DataLayout::NHWC:
- border_mode = BorderMode::CONSTANT;
- if(PoolingType::MAX == info.pool_type)
- {
- if(is_data_type_quantized(data_type))
- {
- std::tie(pixel_value, std::ignore) = get_min_max(data_type);
- }
- else
- {
- pixel_value = PixelValue(std::numeric_limits<float>::lowest());
- }
- }
- break;
- default:
- ARM_COMPUTE_ERROR("Data layout not supported");
- }
- auto b = std::make_unique<CLFillBorderKernel>();
- b->configure(compile_context, src, _pooling->border_size(), border_mode, pixel_value);
- _border_handler = std::move(b);
-
- // Tune kernels
- CLScheduler::get().tune_kernel_static(*_pooling);
-}
-
-Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices)
-{
- return kernels::ClPool2dKernel::validate(src, dst, info, indices);
-}
-
-void ClPool2d::run(ITensorPack &tensors)
-{
- ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
-
- CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false);
- CLScheduler::get().enqueue_op(*_pooling.get(), tensors, false);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClPool2d.h b/src/runtime/gpu/cl/operators/ClPool2d.h
deleted file mode 100644
index 8ac386a64b..0000000000
--- a/src/runtime/gpu/cl/operators/ClPool2d.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_POOL2D_H
-#define ARM_COMPUTE_CL_POOL2D_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref opencl::ClPool2d
- */
-class ClPool2d : public IClOperator
-{
-public:
- /** Constructor */
- ClPool2d() = default;
- /** Configure operator for a given list of arguments
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] dst Destination tensor info. Data type supported: same as @p src
- * @param[in] info Pooling layer parameters.
- * @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to ClPool2d::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices = nullptr);
-
- // Inherited method overridden
- void run(ITensorPack &tensors) override;
-
-private:
- std::unique_ptr<ICLKernel> _pooling{ nullptr };
- std::unique_ptr<ICLKernel> _border_handler{ nullptr };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_POOL2D_H */
diff --git a/src/runtime/gpu/cl/operators/ClQuantize.cpp b/src/runtime/gpu/cl/operators/ClQuantize.cpp
deleted file mode 100644
index 92bbb62ba5..0000000000
--- a/src/runtime/gpu/cl/operators/ClQuantize.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClQuantize.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClQuantizeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClQuantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClQuantizeKernel>();
- k->configure(compile_context, src, dst);
- _kernel = std::move(k);
-}
-
-Status ClQuantize::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClQuantizeKernel::validate(src, dst);
-}
-
-void ClQuantize::run(ITensorPack &tensors)
-{
- ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
- CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClQuantize.h b/src/runtime/gpu/cl/operators/ClQuantize.h
deleted file mode 100644
index 0b6d2c8cbe..0000000000
--- a/src/runtime/gpu/cl/operators/ClQuantize.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_QUANTIZE_H
-#define ARM_COMPUTE_CL_QUANTIZE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClQuantizeKernel that dequantizes an input tensor */
-class ClQuantize : public IClOperator
-{
-public:
- /** Constructor */
- ClQuantize() = default;
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
- * @param[out] dst Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this function
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to @ref ClQuantize::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-
- // Inherited method overridden
- void run(ITensorPack &tensors) override;
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_QUANTIZE_H */
diff --git a/src/runtime/gpu/cl/operators/ClReshape.cpp b/src/runtime/gpu/cl/operators/ClReshape.cpp
deleted file mode 100644
index d3fa9f10ab..0000000000
--- a/src/runtime/gpu/cl/operators/ClReshape.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClReshape.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClReshapeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClReshape::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClReshapeKernel>();
- k->configure(compile_context, src, dst);
- _kernel = std::move(k);
-}
-
-Status ClReshape::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClReshapeKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClReshape.h b/src/runtime/gpu/cl/operators/ClReshape.h
deleted file mode 100644
index 8cccc5776c..0000000000
--- a/src/runtime/gpu/cl/operators/ClReshape.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_RESHAPE_H
-#define ARM_COMPUTE_CL_RESHAPE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClReshapeKernel */
-class ClReshape : public IClOperator
-{
-public:
- /** Constructor */
- ClReshape() = default;
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor info. Data type supported: All
- * @param[out] output Output info. Data type supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClReshapeKernel
- *
- * @param[in] input Input tensor info. Data type supported: All
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_RESHAPE_H */ \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClScale.cpp b/src/runtime/gpu/cl/operators/ClScale.cpp
deleted file mode 100644
index 4730c8a16e..0000000000
--- a/src/runtime/gpu/cl/operators/ClScale.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClScale.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClScaleKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClScale::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(src);
- // Configure Scale kernel
- auto k = std::make_unique<kernels::ClScaleKernel>();
- k->set_target(CLScheduler::get().target());
- k->configure(compile_context, src, dst, info);
- _kernel = std::move(k);
- if(!_kernel->border_size().empty())
- {
- auto b = std::make_unique<CLFillBorderKernel>();
- b->configure(compile_context, src, _kernel->border_size(), info.border_mode, info.constant_border_value);
- _border_handler = std::move(b);
- }
- // Tune kernel
- CLScheduler::get().tune_kernel_static(*_kernel);
-}
-
-Status ClScale::validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info)
-{
- return kernels::ClScaleKernel::validate(src, dst, info);
-}
-
-void ClScale::run(ITensorPack &tensors)
-{
- ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
- if(!_kernel->border_size().empty())
- {
- CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false);
- }
- CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClScale.h b/src/runtime/gpu/cl/operators/ClScale.h
deleted file mode 100644
index 6eccb59be8..0000000000
--- a/src/runtime/gpu/cl/operators/ClScale.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_SCALE_H
-#define ARM_COMPUTE_CL_SCALE_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to simulate a scale layer. This function calls the following OpenCL kernels:
- *
- * -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref kernels::ClScaleKernel
- */
-class ClScale : public IClOperator
-{
-public:
- /** Constructor */
- ClScale() = default;
- /** Initialize the function's source, destination, interpolation type and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] src Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] dst Destination tensor info. Data types supported: Same as @p src
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo descriptor to be used to configure
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref ClScale
- *
- * @param[in] src Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32.
- * @param[in] dst Output tensor info. Data type supported: Same as @p src
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo descriptor to be used to validate
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info);
-
- // Inherited method overridden
- void run(ITensorPack &tensors) override;
-
-protected:
- std::unique_ptr<ICLKernel> _border_handler{ nullptr };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCALE_H */
diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.cpp b/src/runtime/gpu/cl/operators/ClSoftmax.cpp
deleted file mode 100644
index 975bb0b932..0000000000
--- a/src/runtime/gpu/cl/operators/ClSoftmax.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClSoftmax.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/gpu/cl/kernels/ClSoftmaxKernel.h"
-#include "src/core/helpers/MemoryHelpers.h"
-#include "src/core/helpers/SoftmaxHelpers.h"
-#include "src/runtime/gpu/cl/operators/ClPermute.h"
-#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h"
-#include "support/Cast.h"
-
-using namespace arm_compute::experimental;
-
-namespace arm_compute
-{
-namespace opencl
-{
-ClSoftmax::ClSoftmax()
- : _permute_input(std::make_unique<ClPermute>()),
- _permute_output(std::make_unique<ClPermute>()),
- _max_shift_exp_sum_kernel(std::make_unique<kernels::ClLogits1DMaxShiftExpSumKernel>()),
- _norm_kernel(std::make_unique<kernels::ClLogits1DNormKernel>()),
- _max_info(),
- _sum_info(),
- _tmp_info(),
- _permuted_src_info(),
- _permuted_dst_info(),
- _aux_mem(InternalTensorIdx::COUNT)
-{
-}
-
-void ClSoftmax::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info));
-
- const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
-
- _needs_permute = actual_axis != 0;
-
- const ITensorInfo &tmp_input_info = _needs_permute ? _permuted_src_info : src;
- ITensorInfo &tmp_output_info = _needs_permute ? _permuted_dst_info : dst;
-
- if(_needs_permute)
- {
- const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
- _permute_input->configure(compile_context, &src, &_permuted_src_info, perm_info);
- }
-
- DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input_info.data_type()) ? DataType::S32 : tmp_input_info.data_type();
- _tmp_info = tmp_input_info.clone()->set_data_type(tmp_data_type);
-
- TensorShape max_sum_shape = tmp_input_info.tensor_shape();
- _max_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape);
- _sum_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type);
-
- // Set GPU target to kernels
- _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
-
- _max_shift_exp_sum_kernel->configure(compile_context, tmp_input_info, _max_info, _tmp_info, _sum_info, info);
- _norm_kernel->configure(compile_context, _tmp_info, _sum_info, tmp_output_info, info);
-
- if(_needs_permute)
- {
- const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
- _permute_output->configure(compile_context, &_permuted_dst_info, &dst, perm_info);
- }
-
- _aux_mem[InternalTensorIdx::SUM] = MemoryInfo(offset_int_vec(InternalTensorIdx::SUM), MemoryLifetime::Temporary, _sum_info.total_size());
- _aux_mem[InternalTensorIdx::TMP] = MemoryInfo(offset_int_vec(InternalTensorIdx::TMP), MemoryLifetime::Temporary, _tmp_info.total_size());
- _aux_mem[InternalTensorIdx::MAX] = MemoryInfo(offset_int_vec(InternalTensorIdx::MAX), MemoryLifetime::Temporary, _max_info.total_size());
-
- _aux_mem[InternalTensorIdx::PERMUTED_SRC] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), MemoryLifetime::Temporary, _permuted_src_info.total_size());
- _aux_mem[InternalTensorIdx::PERMUTED_DST] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_DST), MemoryLifetime::Temporary, _permuted_dst_info.total_size());
-}
-
-Status ClSoftmax::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(src.num_dimensions() > 4, "Only up to 4 dimensions are supported");
- ARM_COMPUTE_UNUSED(info.beta);
- ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) || static_cast<int32_t>(src.num_dimensions()) <= info.axis);
-
- const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
- const bool needs_permute = actual_axis != 0;
- if(needs_permute)
- {
- const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
- const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(src, permutation_vector);
- TensorInfo input_permuted(src.clone()->set_tensor_shape(permuted_shape));
- ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&src, &input_permuted, permutation_vector));
- TensorInfo output_permuted(dst.clone()->set_tensor_shape(permuted_shape));
- ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&output_permuted, &dst, permutation_vector));
- }
-
- // Create intermediate tensor info
- DataType tmp_data_type = is_data_type_quantized_asymmetric(src.data_type()) ? DataType::S32 : src.data_type();
- TensorInfo tensor_info_tmp(src.clone()->set_data_type(tmp_data_type).set_is_resizable(true));
-
- TensorShape max_sum_shape = src.tensor_shape();
- max_sum_shape.set(0, 1);
- TensorInfo tensor_info_max(src.clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
- TensorInfo tensor_info_sum(src.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
-
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DMaxShiftExpSumKernel::validate(src, tensor_info_max, tensor_info_tmp, tensor_info_sum));
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DNormKernel::validate(tensor_info_tmp, tensor_info_sum, dst, info));
-
- return Status{};
-}
-
-void ClSoftmax::run(ITensorPack &tensors)
-{
- auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
- auto dst = tensors.get_tensor(TensorType::ACL_DST);
-
- CLAuxTensorHandler sum(offset_int_vec(InternalTensorIdx::SUM), _sum_info, tensors, false);
- CLAuxTensorHandler tmp(offset_int_vec(InternalTensorIdx::TMP), _tmp_info, tensors, false);
- CLAuxTensorHandler max(offset_int_vec(InternalTensorIdx::MAX), _max_info, tensors, false);
-
- CLAuxTensorHandler permuted_src(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), _permuted_src_info, tensors, false);
- CLAuxTensorHandler permuted_dst(offset_int_vec(InternalTensorIdx::PERMUTED_DST), _permuted_dst_info, tensors, false);
-
- if(_needs_permute)
- {
- ITensorPack pack;
- pack.add_const_tensor(TensorType::ACL_SRC, src);
- pack.add_tensor(TensorType::ACL_DST, permuted_src.get());
- _permute_input.get()->run(pack);
- }
-
- ITensorPack sum_pack;
- ITensorPack norm_pack;
- if(_needs_permute)
- {
- sum_pack.add_const_tensor(TensorType::ACL_SRC, permuted_src.get());
- norm_pack.add_tensor(TensorType::ACL_DST, permuted_dst.get());
- }
- else
- {
- sum_pack.add_const_tensor(TensorType::ACL_SRC, src);
- norm_pack.add_tensor(TensorType::ACL_DST, dst);
- }
- sum_pack.add_tensor(TensorType::ACL_DST, tmp.get());
- sum_pack.add_tensor(TensorType::ACL_INT_0, max.get());
- sum_pack.add_tensor(TensorType::ACL_INT_1, sum.get());
-
- norm_pack.add_const_tensor(TensorType::ACL_SRC, tmp.get());
- norm_pack.add_tensor(TensorType::ACL_INT_0, sum.get());
-
- CLScheduler::get().enqueue_op(*_max_shift_exp_sum_kernel.get(), sum_pack, false);
- CLScheduler::get().enqueue_op(*_norm_kernel.get(), norm_pack, false);
-
- if(_needs_permute)
- {
- ITensorPack pack;
- pack.add_const_tensor(TensorType::ACL_SRC, permuted_dst.get());
- pack.add_tensor(TensorType::ACL_DST, dst);
- _permute_output.get()->run(pack);
- }
-}
-
-experimental::MemoryRequirements ClSoftmax::workspace() const
-{
- return _aux_mem;
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.h b/src/runtime/gpu/cl/operators/ClSoftmax.h
deleted file mode 100644
index f19a51fc5e..0000000000
--- a/src/runtime/gpu/cl/operators/ClSoftmax.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_SOFTMAX_H
-#define ARM_COMPUTE_CL_SOFTMAX_H
-
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-struct SoftmaxKernelInfo;
-
-namespace opencl
-{
-class ClPermute;
-namespace kernels
-{
-class ClLogits1DMaxShiftExpSumKernel;
-class ClLogits1DNormKernel;
-} // namespace kernels
-class ClSoftmax : public IClOperator
-{
-public:
- /** Constructor */
- ClSoftmax();
- /** Configure the operator
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
- * @param[out] dst Destination tensor info. Data types supported: same as @p src
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- *
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info);
- /** Static function to check if the given info will lead to a valid configuration
- *
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
- * @param[out] dst Destination tensor info. Data types supported: same as @p src
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- *
- */
- static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info);
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
- experimental::MemoryRequirements workspace() const override;
-
-private:
- enum InternalTensorIdx
- {
- MAX = 0,
- SUM,
- TMP,
- PERMUTED_SRC,
- PERMUTED_DST,
- COUNT
- };
-
- std::unique_ptr<ClPermute> _permute_input;
- std::unique_ptr<ClPermute> _permute_output;
- std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
- std::unique_ptr<kernels::ClLogits1DNormKernel> _norm_kernel;
- bool _needs_permute{ false };
-
- TensorInfo _max_info;
- TensorInfo _sum_info;
- TensorInfo _tmp_info;
- TensorInfo _permuted_src_info;
- TensorInfo _permuted_dst_info;
-
- experimental::MemoryRequirements _aux_mem{};
-};
-
-} // opencl
-} // arm_compute
-#endif /* ARM_COMPUTE_CL_SOFTMAX_H */ \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClSub.cpp b/src/runtime/gpu/cl/operators/ClSub.cpp
deleted file mode 100644
index 429f23a837..0000000000
--- a/src/runtime/gpu/cl/operators/ClSub.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClSub.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClSub::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst,
- ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
- auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>();
- k->configure(compile_context, ArithmeticOperation::SUB, src1, src2, dst, policy, act_info);
- _kernel = std::move(k);
-}
-
-Status ClSub::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst,
- ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
- return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::SUB, src1, src2, dst, policy, act_info);
-}
-} // namespace opencl
-} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClSub.h b/src/runtime/gpu/cl/operators/ClSub.h
deleted file mode 100644
index bcad84d583..0000000000
--- a/src/runtime/gpu/cl/operators/ClSub.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_SUB_H
-#define ARM_COMPUTE_CL_SUB_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run arithmetic subtraction
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @note The function performs an arithmetic subtraction between two tensors.
- */
-class ClSub : public IClOperator
-{
-public:
- /** Default Constructor */
- ClSub() = default;
- /** Configure function for a given list of arguments.
- *
- * Valid configurations (src1,src2) -> dst :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref ClSub
- *
- * Valid configurations (src1,src2) -> dst :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (S16,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,S16) -> S16
- * - (S32,S32) -> S32
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- *
- * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_SUB_H */
diff --git a/src/runtime/gpu/cl/operators/ClTranspose.cpp b/src/runtime/gpu/cl/operators/ClTranspose.cpp
deleted file mode 100644
index 48f44282e8..0000000000
--- a/src/runtime/gpu/cl/operators/ClTranspose.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClTranspose.h"
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClTransposeKernel.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-void ClTranspose::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
-{
- auto k = std::make_unique<kernels::ClTransposeKernel>();
- k->configure(compile_context, src, dst);
- _kernel = std::move(k);
-}
-
-Status ClTranspose::validate(const ITensorInfo *src, const ITensorInfo *dst)
-{
- return kernels::ClTransposeKernel::validate(src, dst);
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClTranspose.h b/src/runtime/gpu/cl/operators/ClTranspose.h
deleted file mode 100644
index d898f677ca..0000000000
--- a/src/runtime/gpu/cl/operators/ClTranspose.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_TRANSPOSE_H
-#define ARM_COMPUTE_CL_TRANSPOSE_H
-
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/** Basic function to run @ref kernels::ClTransposeKernel */
-class ClTranspose : public IClOperator
-{
-public:
- /** Constructor */
- ClTranspose() = default;
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src The src tensor info. Data types supported: All.
- * @param[in] dst The dst tensor info. Data types supported: Same as @p src
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClTransposeKernel.
- *
- * @param[in] src First tensor src info. Data types supported: All.
- * @param[in] dst Output tensor info. Data types supported: same as @p src.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_TRANSPOSE_H */
diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp b/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp
deleted file mode 100644
index c8db697778..0000000000
--- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright (c) 2018-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h"
-#include "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.h"
-#include "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h"
-#include "src/core/helpers/MemoryHelpers.h"
-#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h"
-#include "support/Cast.h"
-
-using namespace arm_compute::experimental;
-
-namespace arm_compute
-{
-namespace opencl
-{
-namespace
-{
-Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataLayout data_layout)
-{
- Size2D output_tile = Size2D{};
-
- const unsigned int kernel_max_dim = std::max(kernel_dims.width, kernel_dims.height);
-
- // Check if the input spatial dimensions are smaller than 4
- const bool is_input_lt4_nchw = (input_dims.width <= 4 && input_dims.height <= 4) && (data_layout == DataLayout::NCHW);
-
- if(kernel_max_dim == 3U)
- {
- if(kernel_dims == Size2D(3U, 3U))
- {
- output_tile = is_input_lt4_nchw ? Size2D(2U, 2U) : Size2D(4U, 4U);
- }
- else if(kernel_dims == Size2D(3U, 1U))
- {
- output_tile = is_input_lt4_nchw ? Size2D(2U, 1U) : Size2D(4U, 1U);
- }
- else
- {
- output_tile = is_input_lt4_nchw ? Size2D(1U, 2U) : Size2D(1U, 4U);
- }
- }
- else if(kernel_max_dim == 5U)
- {
- output_tile = Size2D(kernel_dims.width == 1 ? 1U : 4U,
- kernel_dims.height == 1 ? 1U : 4U);
- }
- else if(kernel_max_dim == 7U)
- {
- output_tile = Size2D(kernel_dims.width == 1 ? 1U : 2U,
- kernel_dims.height == 1 ? 1U : 2U);
- }
-
- return output_tile;
-}
-
-bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size)
-{
- // Check if we want to configure a Winograd configuration which requires fast math
- using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
-
- std::vector<WinogradConfiguration> fast_math_winograd =
- {
- WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)),
- WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7))
- };
-
- auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
- std::pair<int, int>(kernel_size.width, kernel_size.height));
-
- return std::find(fast_math_winograd.begin(), fast_math_winograd.end(), p) != fast_math_winograd.end();
-}
-
-Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info, bool enable_fast_math)
-{
- // Get indeces for the width and height
- const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
-
- // Input shape, kernel size and output tile
- const Size2D input_dims = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]);
- const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
- const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout());
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_left() > (kernel_size.x() / 2u)) || (conv_info.pad_right() > (kernel_size.x() / 2u))), "Winograd only supports padding up to half kernel size");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_top() > (kernel_size.y() / 2u)) || (conv_info.pad_bottom() > (kernel_size.y() / 2u))), "Winograd only supports padding up to half kernel size");
-
- // Check if the Winograd configuration requires fast math
- if(!enable_fast_math)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false.
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true");
- }
-
- const WinogradInfo winograd_info = WinogradInfo(output_tile,
- kernel_size,
- input_dims,
- conv_info,
- src->data_layout());
-
- // Validate input transform
- const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info);
- const TensorInfo input0 = src->clone()->set_tensor_shape(input0_shape);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradInputTransformKernel::validate(src, &input0, winograd_info));
-
- // Validate filter transform
- const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info);
- const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape);
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradFilterTransformKernel::validate(weights, &input1, winograd_info));
-
- // Validate batched matrix multiply
- TensorShape batched_mm_output_shape = input0.tensor_shape();
- batched_mm_output_shape[0] = input1.tensor_shape()[0];
- const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape);
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false,
- GEMMLowpOutputStageInfo(), (src->data_type() == DataType::F16))));
-
- // Configure output transform
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradOutputTransformKernel::validate(&batched_mm_output, biases, dst, winograd_info, act_info));
- return Status{};
-}
-
-} // namespace
-
-ClWinogradConv2d::ClWinogradConv2d()
- : _batched_mm(),
- _input_transform(std::make_unique<kernels::ClWinogradInputTransformKernel>()),
- _filter_transform(std::make_unique<kernels::ClWinogradFilterTransformKernel>()),
- _output_transform(std::make_unique<kernels::ClWinogradOutputTransformKernel>()),
- _border_handler(),
- _input0(),
- _input1(),
- _batched_mm_output(),
- _is_prepared(false),
- _aux_mem()
-{
-}
-
-ClWinogradConv2d::~ClWinogradConv2d() = default;
-
-void ClWinogradConv2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math));
- // Get indices for the width and height
- const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
-
- // Input shape, kernel size and output tile
- const Size2D input_dims = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]);
- const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]);
- const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout());
-
- // Check if the Winograd configuration requires fast math
- if(!enable_fast_math)
- {
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false.
- ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true");
- }
- const WinogradInfo winograd_info = WinogradInfo(output_tile,
- kernel_size,
- input_dims,
- conv_info,
- src->data_layout());
-
- _is_prepared = false;
-
- // Configure input transform
- _input_transform->configure(compile_context, src, &_input0, winograd_info);
- _border_handler.configure(compile_context, src, _input_transform->border_size(), BorderMode::CONSTANT, PixelValue());
-
- // Configure filter transform
- _filter_transform->configure(compile_context, weights, &_input1, winograd_info);
-
- // Configure batched matrix multiply
- _batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0,
- false, false,
- GEMMLowpOutputStageInfo(),
- (src->data_type() == DataType::F16)));
-
- // Configure output transform
- _output_transform->configure(compile_context, &_batched_mm_output, biases, dst, winograd_info, act_info);
-
- _aux_mem = _batched_mm.workspace();
- _aux_mem.push_back(MemoryInfo(offset_int_vec(2), MemoryLifetime::Temporary, _input0.total_size()));
- _aux_mem.push_back(MemoryInfo(offset_int_vec(3), MemoryLifetime::Persistent, _input1.total_size()));
- _aux_mem.push_back(MemoryInfo(offset_int_vec(4), MemoryLifetime::Temporary, _batched_mm_output.total_size()));
-}
-
-Status ClWinogradConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info, bool enable_fast_math)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math));
- return Status{};
-}
-
-void ClWinogradConv2d::run(ITensorPack &tensors)
-{
- prepare(tensors);
-
- // Run input transform
- auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
- auto biases = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- CLAuxTensorHandler input0(offset_int_vec(2), _input0, tensors, true);
- CLAuxTensorHandler input1(offset_int_vec(3), _input1, tensors, true);
- CLAuxTensorHandler batched_mm_output(offset_int_vec(4), _batched_mm_output, tensors, true);
-
- ITensorPack pack_it
- {
- { TensorType::ACL_SRC, src },
- { TensorType::ACL_DST, input0.get() },
- };
- CLScheduler::get().enqueue_op(_border_handler, pack_it);
- CLScheduler::get().enqueue_op(*_input_transform, pack_it);
-
- // Run batched matrix multiplication
- ITensorPack pack_mm
- {
- { TensorType::ACL_SRC_0, input0.get() },
- { TensorType::ACL_SRC_1, input1.get() },
- { TensorType::ACL_DST, batched_mm_output.get() },
- };
- _batched_mm.run(pack_mm);
-
- // Run output transform
- ITensorPack pack_ot
- {
- { TensorType::ACL_SRC_0, batched_mm_output.get() },
- { TensorType::ACL_SRC_1, biases },
- { TensorType::ACL_DST, dst },
- };
- CLScheduler::get().enqueue_op(*_output_transform, pack_ot);
-}
-
-void ClWinogradConv2d::prepare(ITensorPack &tensors)
-{
- if(!_is_prepared)
- {
- auto weights = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1));
- ICLTensor *in1_aux = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(3)));
-
- CLAuxTensorHandler input1(_input1, *in1_aux);
- ITensorPack pack_ft
- {
- { TensorType::ACL_SRC, weights },
- { TensorType::ACL_DST, input1.get() },
- };
- // Run filter transform and mark original weights as unused
- CLScheduler::get().enqueue_op(*_filter_transform, pack_ft, false);
- weights->mark_as_unused();
-
- tensors.add_tensor(ACL_SRC_1, input1.get());
- // Prepare GEMM and release reshaped weights if marked unused by ClGemm
- _batched_mm.prepare(tensors);
-
- CLScheduler::get().queue().finish();
- _is_prepared = true;
- }
-}
-
-experimental::MemoryRequirements ClWinogradConv2d::workspace() const
-{
- return _aux_mem;
-}
-} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h b/src/runtime/gpu/cl/operators/ClWinogradConv2d.h
deleted file mode 100644
index 83b31f1c99..0000000000
--- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2018-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_WINOGRADCONV2D_H
-#define ARM_COMPUTE_CL_WINOGRADCONV2D_H
-
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/runtime/gpu/cl/IClOperator.h"
-#include "src/runtime/gpu/cl/operators/ClGemm.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ITensorInfo;
-namespace opencl
-{
-namespace kernels
-{
-class ClWinogradInputTransformKernel;
-class ClWinogradFilterTransformKernel;
-class ClWinogradOutputTransformKernel;
-} // kernels
-/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
- *
- * -# @ref kernels::ClWinogradInputTransformKernel
- * -# @ref kernels::ClWinogradFilterTransformKernel (only once)
- * -# @ref ClGemm
- * -# @ref kernels::ClWinogradOutputTransformKernel
- *
- */
-class ClWinogradConv2d : public IClOperator
-{
-public:
- /** Default constructor */
- ClWinogradConv2d();
- /** Default destructor */
- ~ClWinogradConv2d();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ClWinogradConv2d(const ClWinogradConv2d &) = delete;
- /** Default move constructor */
- ClWinogradConv2d(ClWinogradConv2d &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ClWinogradConv2d &operator=(const ClWinogradConv2d &) = delete;
- /** Default move assignment operator */
- ClWinogradConv2d &operator=(ClWinogradConv2d &&) = default;
- /** Set the input and output tensors.
- *
- * Valid data layouts:
- * - NHWC
- * - NCHW
- *
- * Valid data type configurations:
- * |src0 |src1 |src2 |dst |
- * |:--------------|:--------------|:------|:--------------|
- * |F16 |F16 |F16 |F16 |
- * |F32 |F32 |F32 |F32 |
- *
- * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
- * @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: F16/F32.
- * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src.
- * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p src
- * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p src.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
- */
- void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to ClWinogradConv2d::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
-
- // Inherited method overridden
- void run(ITensorPack &tensors) override;
- void prepare(ITensorPack &tensors) override;
- experimental::MemoryRequirements workspace() const override;
-
-private:
- ClGemm _batched_mm;
- std::unique_ptr<kernels::ClWinogradInputTransformKernel> _input_transform;
- std::unique_ptr<kernels::ClWinogradFilterTransformKernel> _filter_transform;
- std::unique_ptr<kernels::ClWinogradOutputTransformKernel> _output_transform;
- CLFillBorderKernel _border_handler;
- TensorInfo _input0;
- TensorInfo _input1;
- TensorInfo _batched_mm_output;
- bool _is_prepared;
- experimental::MemoryRequirements _aux_mem{};
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_WINOGRADCONV2D_H */
diff --git a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h b/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h
deleted file mode 100644
index 152e3c6c04..0000000000
--- a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H
-#define ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H
-
-#include "arm_compute/core/ITensorPack.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-
-#include "support/Cast.h"
-
-namespace arm_compute
-{
-namespace opencl
-{
-/* Tensor handler to wrap and handle tensor allocations on workspace buffers */
-class CLAuxTensorHandler
-{
-public:
- CLAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false)
- : _tensor()
- {
- _tensor.allocator()->soft_init(info);
-
- ICLTensor *packed_tensor = utils::cast::polymorphic_downcast<ICLTensor *>(pack.get_tensor(slot_id));
- if((packed_tensor == nullptr) || (info.total_size() > packed_tensor->info()->total_size()))
- {
- _tensor.allocator()->allocate();
- if(pack_inject)
- {
- pack.add_tensor(slot_id, &_tensor);
- _injected_tensor_pack = &pack;
- _injected_slot_id = slot_id;
- }
- }
- else
- {
- _tensor.allocator()->import_memory(packed_tensor->cl_buffer());
- }
- }
-
- CLAuxTensorHandler(TensorInfo &info, ICLTensor &tensor)
- : _tensor()
- {
- _tensor.allocator()->soft_init(info);
- if(info.total_size() <= tensor.info()->total_size())
- {
- _tensor.allocator()->import_memory(tensor.cl_buffer());
- }
- }
-
- CLAuxTensorHandler(const CLAuxTensorHandler &) = delete;
- CLAuxTensorHandler &operator=(const CLAuxTensorHandler) = delete;
-
- ~CLAuxTensorHandler()
- {
- if(_injected_tensor_pack)
- {
- _injected_tensor_pack->remove_tensor(_injected_slot_id);
- }
- }
-
- ICLTensor *get()
- {
- return &_tensor;
- }
-
- ICLTensor *operator()()
- {
- return &_tensor;
- }
-
-private:
- CLTensor _tensor{};
- ITensorPack *_injected_tensor_pack{ nullptr };
- int _injected_slot_id{ TensorType::ACL_UNKNOWN };
-};
-} // namespace opencl
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H */ \ No newline at end of file