diff options
Diffstat (limited to 'src/runtime/gpu')
56 files changed, 0 insertions, 5290 deletions
diff --git a/src/runtime/gpu/cl/IClOperator.h b/src/runtime/gpu/cl/IClOperator.h deleted file mode 100644 index 049bf05dc1..0000000000 --- a/src/runtime/gpu/cl/IClOperator.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_ICL_OPERATOR_H -#define ARM_COMPUTE_ICL_OPERATOR_H - -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/runtime/CL/ICLOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -using IClOperator = experimental::ICLOperator; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_ICL_OPERATOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClActivation.cpp b/src/runtime/gpu/cl/operators/ClActivation.cpp deleted file mode 100644 index 71aa57bdbd..0000000000 --- a/src/runtime/gpu/cl/operators/ClActivation.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClActivation.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClActivationKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClActivation::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClActivationKernel>(); - k->configure(compile_context, src, dst, act_info); - _kernel = std::move(k); -} - -Status ClActivation::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClActivationKernel::validate(src, dst, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClActivation.h b/src/runtime/gpu/cl/operators/ClActivation.h deleted file mode 100644 index 235b826b87..0000000000 --- a/src/runtime/gpu/cl/operators/ClActivation.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ACTIVATION_H -#define ARM_COMPUTE_CL_ACTIVATION_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClActivationKernel */ -class ClActivation : public IClOperator -{ -public: - /** Constructor */ - ClActivation() = default; - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] dst Destination tensor info. Data type supported: same as @p src - * @param[in] activation_info Activation layer parameters. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &activation_info); - /** Static function to check if given info will lead to a valid configuration of @ref ClActivation - * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[in] dst Destination tensor info. Data type supported: same as @p src - * @param[in] act_info Activation layer information. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ACTIVATION_H */ diff --git a/src/runtime/gpu/cl/operators/ClAdd.cpp b/src/runtime/gpu/cl/operators/ClAdd.cpp deleted file mode 100644 index 01f550f819..0000000000 --- a/src/runtime/gpu/cl/operators/ClAdd.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClAdd.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClAdd::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::ADD, src1, src2, dst, policy, act_info); - _kernel = std::move(k); -} - -Status ClAdd::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::ADD, src1, src2, dst, policy, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClAdd.h b/src/runtime/gpu/cl/operators/ClAdd.h deleted file mode 100644 index f751d8dc83..0000000000 --- a/src/runtime/gpu/cl/operators/ClAdd.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ADD_H -#define ARM_COMPUTE_CL_ADD_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run arithmetic addition - * - * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @note The function performs an arithmetic addition between two tensors. - */ -class ClAdd : public IClOperator -{ -public: - /** Default Constructor */ - ClAdd() = default; - /** Configure function for a given list of arguments. - * - * Valid configurations (src1,src2) -> dst : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref ClAdd - * - * Valid configurations (src1,src2) -> dst : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ADD_H */ diff --git a/src/runtime/gpu/cl/operators/ClCast.cpp b/src/runtime/gpu/cl/operators/ClCast.cpp deleted file mode 100644 index 3f54004aa7..0000000000 --- a/src/runtime/gpu/cl/operators/ClCast.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClCast.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClCastKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy) -{ - auto k = std::make_unique<kernels::ClCastKernel>(); - k->configure(compile_context, src, dst, policy); - _kernel = std::move(k); -} - -Status ClCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy) -{ - return kernels::ClCastKernel::validate(src, dst, policy); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClCast.h b/src/runtime/gpu/cl/operators/ClCast.h deleted file mode 100644 index 69e028debd..0000000000 --- a/src/runtime/gpu/cl/operators/ClCast.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_CAST_H -#define ARM_COMPUTE_CL_CAST_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClCastKernel */ -class ClCast : public IClOperator -{ -public: - /** Constructor */ - ClCast() = default; - /** Configure operator for a given list of arguments - * - * @note Input data type must be different than output data type. - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src |dst | - * |:--------------|:--------------------------------------| - * |U8 | S8, U16, S16, U32, S32, F16, F32 | - * |U16 | U8, S8, S16, U32, S32, F16, F32 | - * |S16 | U8, S8, U16, U32, S32, F16, F32 | - * |U32 | U8, S8, U16, S16, S32, F16, F32 | - * |S32 | U8, S8, U16, S16, U32, F16, F32 | - * |F16 | U8, S8, U16, S16, U32, F32 | - * |F32 | U8, S8, U16, S16, U32, F16 | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[out] dst The destinatio tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClCast::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_CAST_H */ diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.cpp b/src/runtime/gpu/cl/operators/ClConcatenate.cpp deleted file mode 100644 index 4385fcfaed..0000000000 --- a/src/runtime/gpu/cl/operators/ClConcatenate.cpp +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClConcatenate.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h" -#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h" -#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h" -#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h" -#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h" -#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "src/core/helpers/AutoConfiguration.h" - -namespace arm_compute -{ -namespace opencl -{ -ClConcatenate::ClConcatenate() - : _concat_kernels(), - _num_inputs(0), - _axis(Window::DimX) -{ -} - -void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis) -{ - ARM_COMPUTE_ERROR_ON(dst == nullptr); - _axis = axis; - _num_inputs = src_vector.size(); - - TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis); - std::vector<const ITensorInfo *> const_src_vector(src_vector.size()); - std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(t); - return t; - }); - - // dst auto inizialitation if not yet initialized - auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type()); - ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis)); - - unsigned int offset = 0; - switch(_axis) - { - case Window::DimX: - { - switch(_num_inputs) - { - case 2: - { - // Configure WidthConcatenate2Tensors kernel - auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>(); - kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - case 4: - { - // Configure WidthConcatenate4Tensors kernel - auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>(); - kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst); - _concat_kernels.emplace_back(std::move(kernel)); - break; - } - default: - { - // Configure generic case WidthConcatenate kernels - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - } - break; - } - case Window::DimY: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case Window::DimZ: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - case 3: - { - for(unsigned int i = 0; i < _num_inputs; ++i) - { - auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>(); - kernel->configure(compile_context, src_vector.at(i), offset, dst); - offset += src_vector.at(i)->dimension(_axis); - _concat_kernels.emplace_back(std::move(kernel)); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } -} - -Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis) -{ - ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr); - const unsigned int num_inputs = src_vector.size(); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2); - - unsigned int offset = 0; - switch(axis) - { - case Window::DimX: - { - switch(num_inputs) - { - case 2: - // Validate WidthConcatenate2Tensors kernels if there are 2 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst)); - break; - case 4: - // Validate WidthConcatenate4Tensors kernels if there are 4 inputs - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst)); - break; - default: - // Validate generic case of WidthConcatenate kernel - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - break; - } - case Window::DimY: - { - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - case Window::DimZ: - { - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - case 3: - { - for(const auto &src : src_vector) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst)); - offset += src->dimension(axis); - } - break; - } - default: - ARM_COMPUTE_ERROR("Axis not supported"); - } - - if(dst->total_size() != 0) - { - TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis); - ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size()); - } - - return Status{}; -} - -void ClConcatenate::run(ITensorPack &tensors) -{ - if(tensors.empty()) - { - ARM_COMPUTE_ERROR("No inputs provided"); - } - - if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs)) - { - ARM_COMPUTE_ERROR("Configured with different number of inputs"); - } - - if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4)) - { - ARM_COMPUTE_ERROR_ON(_concat_kernels.empty()); - CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true); - } - else - { - int i = 0; - for(auto &k : _concat_kernels) - { - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST)); - CLScheduler::get().enqueue_op(*k, pack, true); - ++i; - } - } -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h deleted file mode 100644 index 0d960a605c..0000000000 --- a/src/runtime/gpu/cl/operators/ClConcatenate.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONCATENATE_H -#define ARM_COMPUTE_CLCONCATENATE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <vector> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels: - * - * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0). - * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1). - * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2). - * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3). - */ -class ClConcatenate : public IClOperator -{ -public: - /** Default constructor */ - ClConcatenate(); - /** Initialise the kernel's inputs vector and dst. - * - * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, - * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. - * - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All - * @param[out] dst Destination tensor info. Data types supported: same as @p src_vector. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - */ - void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis); - /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate - * - * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis. - * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel, - * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel. - * - * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All - * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector. - * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. - * - * @return a status - */ - static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - -private: - std::vector<std::unique_ptr<IClKernel>> _concat_kernels; - unsigned int _num_inputs; - unsigned int _axis; -}; -} // namespace opencl -} // namespace arm_comPUTE -#endif /* ARM_COMPUTE_CL_CONCATENATE_H */ diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp deleted file mode 100644 index 0d2f2925d3..0000000000 --- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClConvertFullyConnectedWeights::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout) -{ - auto k = std::make_unique<kernels::ClConvertFullyConnectedWeightsKernel>(); - k->configure(compile_context, src, dst, original_src_shape, data_layout); - _kernel = std::move(k); -} - -Status ClConvertFullyConnectedWeights::validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout) -{ - return kernels::ClConvertFullyConnectedWeightsKernel::validate(src, dst, original_src_shape, data_layout); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h b/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h deleted file mode 100644 index efedc2fcb7..0000000000 --- a/src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H -#define ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClConvertFullyConnectedWeightsKernel */ -class ClConvertFullyConnectedWeights : public IClOperator -{ -public: - /** Constructor */ - ClConvertFullyConnectedWeights() = default; - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The src tensor info. Data types supported: All. - * @param[in] dst The dst tensor info. Data types supported: Same as @p src - * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout); - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClConvertFullyConnectedWeightsKernel. - * - * @param[in] src First tensor src info. Data types supported: All. - * @param[in] dst Output tensor info. Data types supported: same as @p src. - * @param[in] original_src_shape Shape of the original src tensor (the one entering fully connected layer). - * @param[in] data_layout The data layout the weights have been trained in. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_CONVERTFULLYCONNECTEDWEIGHTS_H */ diff --git a/src/runtime/gpu/cl/operators/ClCopy.cpp b/src/runtime/gpu/cl/operators/ClCopy.cpp deleted file mode 100644 index 2bdb1f5ba1..0000000000 --- a/src/runtime/gpu/cl/operators/ClCopy.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClCopy.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClCopyKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClCopy::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window) -{ - auto k = std::make_unique<kernels::ClCopyKernel>(); - k->configure(compile_context, src, dst, dst_window); - _kernel = std::move(k); -} - -Status ClCopy::validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window) -{ - return kernels::ClCopyKernel::validate(src, dst, dst_window); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClCopy.h b/src/runtime/gpu/cl/operators/ClCopy.h deleted file mode 100644 index 0b99676f65..0000000000 --- a/src/runtime/gpu/cl/operators/ClCopy.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_COPY_H -#define ARM_COMPUTE_CL_COPY_H - -#include "arm_compute/core/Window.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClCopyKernel */ -class ClCopy : public IClOperator -{ -public: - /** Constructor */ - ClCopy() = default; - /** Initialise the function's source and destination. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: All. - * @param[out] dst Output tensor info. Data types supported: Same as @p src. - * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClCopyKernel - * - * @param[in] src Source tensor info. Data types supported: All. - * @param[in] dst Output tensor info. Data types supported: Same as @p src. - * @param[in] dst_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Window *dst_window = nullptr); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_COPY_H */ diff --git a/src/runtime/gpu/cl/operators/ClCrop.cpp b/src/runtime/gpu/cl/operators/ClCrop.cpp deleted file mode 100644 index 17bb11912f..0000000000 --- a/src/runtime/gpu/cl/operators/ClCrop.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClCrop.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClCropKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClCrop::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, - Window *dst_window) -{ - auto k = std::make_unique<kernels::ClCropKernel>(); - k->configure(compile_context, src, dst, start, end, batch_index, extrapolation_value, dst_window); - _kernel = std::move(k); -} - -Status ClCrop::validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *dst_window) -{ - return kernels::ClCropKernel::validate(src, dst, start, end, batch_index, extrapolation_value, dst_window); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClCrop.h b/src/runtime/gpu/cl/operators/ClCrop.h deleted file mode 100644 index acfbf14742..0000000000 --- a/src/runtime/gpu/cl/operators/ClCrop.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_COPY_H -#define ARM_COMPUTE_CL_COPY_H - -#include "arm_compute/core/Window.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClCropKernel */ -class ClCrop : public IClOperator -{ -public: - /** Constructor */ - ClCrop() = default; - /** Initialise the function's source and destination. - * - * @note Supported tensor rank: up to 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC. - * @param[out] dst Destination tensor info. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *dst_window = nullptr); - - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClCropKernel - * - * @note Supported tensor rank: up to 4 - * - * @param[in] src Source tensor info. Data type supported: All. Data layouts supported: NHWC. - * @param[in] dst Destination tensor info. Data type supported: F32 - * @param[in] start Coordinates of where to start cropping the image. - * @param[in] end Coordinates of where to end cropping the image. - * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p src. - * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0. - * @param[in] dst_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr. - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, - Window *dst_window = nullptr); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_COPY_H */ diff --git a/src/runtime/gpu/cl/operators/ClDequantize.cpp b/src/runtime/gpu/cl/operators/ClDequantize.cpp deleted file mode 100644 index 0c1391bb45..0000000000 --- a/src/runtime/gpu/cl/operators/ClDequantize.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClDequantize.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClDequantizeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClDequantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClDequantizeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClDequantize::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClDequantizeKernel::validate(src, dst); -} - -void ClDequantize::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - CLScheduler::get().enqueue_op(*_kernel.get(), tensors); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClDequantize.h b/src/runtime/gpu/cl/operators/ClDequantize.h deleted file mode 100644 index 47fad3eeee..0000000000 --- a/src/runtime/gpu/cl/operators/ClDequantize.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_DEQUANTIZE_H -#define ARM_COMPUTE_CL_DEQUANTIZE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClDequantizeKernel that dequantizes an input tensor */ -class ClDequantize : public IClOperator -{ -public: - /** Constructor */ - ClDequantize() = default; - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16. - * @param[out] dst Destination tensor info with the same dimensions of @p src. Data type supported: F16/F32. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClDequantize::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); - - // Inherited method overridden - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_DEQUANTIZE_H */ diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp deleted file mode 100644 index 13ef42a640..0000000000 --- a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClActivationKernel.h" -#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -namespace -{ -ITensorPack select_activation_src_dst(ITensorPack &tensors) -{ - ITensorPack pack; - pack.add_tensor(TensorType::ACL_SRC, tensors.get_tensor(TensorType::ACL_DST)); - pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(TensorType::ACL_DST)); - return pack; -} -} // namespace - -void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - - // Configure direct convolution kernel - const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo(); - auto k = std::make_unique<kernels::ClDirectConv2dKernel>(); - k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info); - _direct_conv_kernel = std::move(k); - - // Configure border handler - PixelValue zero_value(0.f); - if(is_data_type_quantized_asymmetric(src->data_type())) - { - zero_value = PixelValue(0, src->data_type(), src->quantization_info()); - } - auto b = std::make_unique<CLFillBorderKernel>(); - b->configure(compile_context, src, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value); - _src_border_handler = std::move(b); - - // Fused activation is currently supported for NHWC and floating point types - if(act_info.enabled() && !conv2d_act_info.enabled()) - { - auto a = std::make_unique<kernels::ClActivationKernel>(); - a->configure(compile_context, dst, dst, act_info); - _activation_kernel = std::move(a); - } - - // Tune kernels - CLScheduler::get().tune_kernel_static(*_direct_conv_kernel); -} - -Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), CLScheduler::get().target())); - if(act_info.enabled()) - { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info)); - } - return Status{}; -} - -void ClDirectConv2d::run(ITensorPack &tensors) -{ - // Run border handler - CLScheduler::get().enqueue_op(*_src_border_handler.get(), tensors, false); - // Run direct convolution - CLScheduler::get().enqueue_op(*_direct_conv_kernel.get(), tensors, false); - // Run activation kernel - if(_activation_kernel) - { - auto act_pack = select_activation_src_dst(tensors); - CLScheduler::get().enqueue_op(*_activation_kernel.get(), act_pack, false); - } -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.h b/src/runtime/gpu/cl/operators/ClDirectConv2d.h deleted file mode 100644 index e069733fab..0000000000 --- a/src/runtime/gpu/cl/operators/ClDirectConv2d.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_H -#define ARM_COMPUTE_CL_DIRECT_CONV2D_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to simulate a directly convolution layer. This function calls the following OpenCL kernels: - * - * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref opencl::ClDirectConv2d - */ -class ClDirectConv2d : public IClOperator -{ -public: - /** Constructor */ - ClDirectConv2d() = default; - /** Set the src and dst tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of srcs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. - * @param[out] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. - * Data types supported: Same as @p src. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClDirectConv2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr<IClKernel> _direct_conv_kernel{ nullptr }; - std::unique_ptr<IClKernel> _src_border_handler{ nullptr }; - std::unique_ptr<IClKernel> _activation_kernel{ nullptr }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_DIRECT_CONV2D_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp b/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp deleted file mode 100644 index e5b836a0d8..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClElementwiseOperations.h" - -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClElementwiseDivision::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::DIV, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseDivision::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, src1, src2, dst, act_info); -} - -void ClElementwiseMax::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::MAX, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseMax::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, src1, src2, dst, act_info); -} - -void ClElementwiseMin::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::MIN, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseMin::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, src1, src2, dst, act_info); -} - -void ClElementwiseSquaredDiff::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwiseSquaredDiff::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info); -} - -void ClElementwisePower::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::POWER, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClElementwisePower::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, src1, src2, dst, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h b/src/runtime/gpu/cl/operators/ClElementwiseOperations.h deleted file mode 100644 index b9ab1405c8..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseOperations.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H -#define ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for division - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an arithmetic division between two tensors. - */ -class ClElementwiseDivision : public IClOperator -{ -public: - /** Default Constructor */ - ClElementwiseDivision() = default; - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: F16/F32. - * @param[in] src2 Second source tensor info. same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref ClElementwiseDivision - * - * @param[in] src1 First source tensor info. Data types supported: F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[in] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for max - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @note The function performs a max operation between two tensors. - */ -class ClElementwiseMax : public IClOperator -{ -public: - /** Default Constructor */ - ClElementwiseMax() = default; - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for max - * - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[in] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for min - * - * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32. - * @note The function performs a max operation between two tensors. - */ -class ClElementwiseMin : public IClOperator -{ -public: - /** Default Constructor */ - ClElementwiseMin() = default; - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for min - * - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[in] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for squared difference - * - * @note The tensor data type for the inputs must be QASYMM8/U8/S16/QSYMM16/F16/F32. - * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2 - */ -class ClElementwiseSquaredDiff : public IClOperator -{ -public: - /** Default Constructor */ - ClElementwiseSquaredDiff() = default; - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[out] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for squared difference - * - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: same as @p src1. - * @param[in] dst Destination tensor info. Data types supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClArithmeticKernel for power - * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs an elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i]) - */ -class ClElementwisePower : public IClOperator -{ -public: - /** Default Constructor */ - ClElementwisePower() = default; - /** Configure function for a given list of arguments. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src1 First source tensor info. Data types supported: F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported:F16/F32. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref opencl::kernels::ClArithmeticKernel for power - * - * @param[in] src1 First source tensor info. Data types supported: F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: F16/F32. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ELEMENTWISE_OPERATIONS_H */ diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp b/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp deleted file mode 100644 index 7b830a077f..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClElementwiseUnary.h" - -#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClRsqrt::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::RSQRT); - _kernel = std::move(k); -} - -Status ClRsqrt::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::RSQRT); -} - -void ClExp::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::EXP); - _kernel = std::move(k); -} - -Status ClExp::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::EXP); -} - -void ClNeg::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::NEG); - _kernel = std::move(k); -} - -Status ClNeg::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::NEG); -} - -void ClSin::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::SIN); - _kernel = std::move(k); -} - -Status ClSin::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::SIN); -} - -void ClAbs::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::ABS); - _kernel = std::move(k); -} - -Status ClAbs::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ABS); -} - -void ClLog::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::LOG); - _kernel = std::move(k); -} - -Status ClLog::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOG); -} - -void ClRound::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::ROUND); - _kernel = std::move(k); -} - -Status ClRound::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::ROUND); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h b/src/runtime/gpu/cl/operators/ClElementwiseUnary.h deleted file mode 100644 index b40e3e9a3b..0000000000 --- a/src/runtime/gpu/cl/operators/ClElementwiseUnary.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H -#define ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to perform inverse square root on an src tensor. */ -class ClRsqrt : public IClOperator -{ -public: - /** Constructor */ - ClRsqrt() = default; - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClRsqrt - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to perform exponential on an src tensor. */ -class ClExp : public IClOperator -{ -public: - /** Constructor */ - ClExp() = default; - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClExp - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to negate an src tensor. */ -class ClNeg : public IClOperator -{ -public: - /** Constructor */ - ClNeg() = default; - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClNeg - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to calculate sine of an src tensor. */ -class ClSin : public IClOperator -{ -public: - /** Constructor */ - ClSin() = default; - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClSin - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to perform elementwise log on an src tensor. */ -class ClLog : public IClOperator -{ -public: - /** Constructor */ - ClLog() = default; - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClLog - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to get the absolute value of an src tensor. */ -class ClAbs : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClAbs - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; - -/** Basic function to get the round (to the nearest even) value of an src tensor. */ -class ClRound : public IClOperator -{ -public: - /** Initialize the function - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClRound - * - * @param[in] src First source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_ELEMENTWISE_UNARY_H */ diff --git a/src/runtime/gpu/cl/operators/ClFill.cpp b/src/runtime/gpu/cl/operators/ClFill.cpp deleted file mode 100644 index 4d0afaef24..0000000000 --- a/src/runtime/gpu/cl/operators/ClFill.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFill.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClFillKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClFill::configure(const ClCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window) -{ - auto k = std::make_unique<kernels::ClFillKernel>(); - k->configure(compile_context, tensor, constant_value, dst_window); - _kernel = std::move(k); -} - -Status ClFill::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window) -{ - return kernels::ClFillKernel::validate(tensor, constant_value, dst_window); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClFill.h b/src/runtime/gpu/cl/operators/ClFill.h deleted file mode 100644 index e632d88546..0000000000 --- a/src/runtime/gpu/cl/operators/ClFill.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FILL_H -#define ARM_COMPUTE_CL_FILL_H - -#include "arm_compute/core/Window.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClFillKernel */ -class ClFill : public IClOperator -{ -public: - /** Constructor */ - ClFill() = default; - /** Initialise the kernel's tensor and filling value - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] tensor Source tensor info. Supported data types: All. - * @param[in] constant_value The value used to fill the planes of the tensor - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClFillKernel - * - * @param[in] tensor Source tensor info. Data types supported: All. - * @param[in] constant_value The value used to fill the planes of the tensor. - * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr. - * - * @return a status - */ - static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FILL_H */ diff --git a/src/runtime/gpu/cl/operators/ClFlatten.cpp b/src/runtime/gpu/cl/operators/ClFlatten.cpp deleted file mode 100644 index 060b653dee..0000000000 --- a/src/runtime/gpu/cl/operators/ClFlatten.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFlatten.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClReshapeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClFlatten::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClReshapeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClFlatten::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClReshapeKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClFlatten.h b/src/runtime/gpu/cl/operators/ClFlatten.h deleted file mode 100644 index 20ad06ee57..0000000000 --- a/src/runtime/gpu/cl/operators/ClFlatten.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FLATTEN_H -#define ARM_COMPUTE_CL_FLATTEN_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to flatten a given input */ -class ClFlatten : public IClOperator -{ -public: - /** Constructor */ - ClFlatten() = default; - /** Configure operator for a given list of arguments - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src |dst | - * |:--------------|:--------------| - * |All |All | - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[in] dst Destination tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. - * Data type supported: same as @p src - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClFlatten::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FLATTEN_H */ diff --git a/src/runtime/gpu/cl/operators/ClFloor.cpp b/src/runtime/gpu/cl/operators/ClFloor.cpp deleted file mode 100644 index 94e77c0c54..0000000000 --- a/src/runtime/gpu/cl/operators/ClFloor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClFloor.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClFloorKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClFloor::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClFloorKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClFloor::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClFloorKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClFloor.h b/src/runtime/gpu/cl/operators/ClFloor.h deleted file mode 100644 index f54eef9140..0000000000 --- a/src/runtime/gpu/cl/operators/ClFloor.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_FLOOR_H -#define ARM_COMPUTE_CL_FLOOR_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClFloorKernel */ -class ClFloor : public IClOperator -{ -public: - /** Constructor */ - ClFloor() = default; - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data type supported: same as @p src - */ - void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref ClFloor - * - * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data type supported: same as @p src - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_FLOOR_H */ diff --git a/src/runtime/gpu/cl/operators/ClGemm.cpp b/src/runtime/gpu/cl/operators/ClGemm.cpp deleted file mode 100644 index a80375447d..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemm.cpp +++ /dev/null @@ -1,760 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClGemm.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/GPUTarget.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Log.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/ITensorAllocator.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/core/utils/helpers/float_ops.h" -#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" -#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" - -#include "support/Cast.h" -#include "utils/TypePrinter.h" - -namespace arm_compute -{ -namespace opencl -{ -using namespace arm_compute::misc::shape_calculator; -using namespace arm_compute::cl_gemm; -using namespace arm_compute::experimental; -using namespace arm_compute::utils::cast; -using namespace arm_compute::opencl::kernels; - -namespace -{ -inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type) -{ - switch(kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - case CLGEMMKernelType::RESHAPED_V1: - case CLGEMMKernelType::RESHAPED: - { - return true; - } - default: - { - return false; - } - } -} -//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type -inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run, bool constant_weights) -{ - if(!constant_weights) - { - return CLGEMMKernelType::NATIVE_V1; - } - - auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run); - if(bool(gemm_kernel)) - { - if(validate_gemm_kernel(gemm_kernel.gemm_type)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str()); - return gemm_kernel.gemm_type; - } - } - gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str()); - return gemm_kernel.gemm_type; -} -// Validate lhs_info and rhs_info for reshaped only rhs kernel -inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info) -{ - // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel - TensorInfo tmp_b_info{}; - // Validate reshape RHS kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info))) - { - return false; - } - // Validate mm kernel - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - gemm_kernel_info.has_pad_y = false; - if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info))) - { - return false; - } - gemm_kernel_info.has_pad_y = true; - if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info))) - { - return false; - } - return true; -} - -//Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs -inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, - const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output) -{ - auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query); - if(config) - { - if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; - } - } - config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; -} - -// Validate lhs_info and rhs_info for reshaped kernel -inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, - const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info, bool reinterpret_input_as_3d) -{ - // Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped kernel - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - - // Validate reshape LHS kernel - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, reinterpret_input_as_3d))); - if(!bool(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, reinterpret_input_as_3d))) - { - return false; - } - - // Validate reshape RHS kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info))) - { - return false; - } - // Validate mm kernel - gemm_kernel_info.lhs_info = lhs_info; - gemm_kernel_info.rhs_info = rhs_info; - if(!bool(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info))) - { - return false; - } - return true; -} - -//Automatically select between mlgo (prioritized) and default heuristics for reshaped kernel configs -inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, const ITensorInfo *b, - const ITensorInfo *c, const ITensorInfo *output, bool reinterpret_input_as_3d) -{ - auto config = auto_heuristics::select_mlgo_gemm_config_reshaped(query); - if(config) - { - if(validate_lhs_rhs_info_reshaped(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info, reinterpret_input_as_3d)) - { - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; - } - } - config = auto_heuristics::select_default_gemm_config_reshaped(query); - ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str()); - return { config.lhs_info, config.rhs_info }; -} -} // namespace - -ClGemm::ClGemm() - : _mm_kernel(std::make_unique<ClGemmMatrixMultiplyKernel>()), - _reshape_lhs_kernel(std::make_unique<ClGemmReshapeLhsMatrixKernel>()), - _reshape_rhs_kernel(std::make_unique<ClGemmReshapeRhsMatrixKernel>()), - _mm_reshaped_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedKernel>()), - _mm_reshaped_only_rhs_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()), - _mm_reshaped_only_rhs_fallback_kernel(std::make_unique<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>()), - _tmp_a(), - _tmp_b(), - _reshape_b_only_on_first_run(false), - _gemm_kernel_type(CLGEMMKernelType::NATIVE_V1), - _aux_mem(AuxTensorIdx::Count) -{ -} - -void ClGemm::configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const GPUTarget gpu_target = CLScheduler::get().target(); - - // Set the target for the kernels - _mm_kernel->set_target(gpu_target); - - GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias()); - - // Configure and tune matrix multiply kernel - _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); - - // Tune kernel statically - CLScheduler::get().tune_kernel_static(*_mm_kernel); -} - -void ClGemm::configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const GPUTarget gpu_target = CLScheduler::get().target(); - int mult_transpose1xW_width = 1; - int mult_interleave4x4_height = 1; - - // Set the target for the kernels - _reshape_lhs_kernel->set_target(gpu_target); - _mm_kernel->set_target(gpu_target); - - if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST) - { - mult_transpose1xW_width = 4; - mult_interleave4x4_height = 2; - } - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = 16 / b->element_size(); - rhs_info.k0 = 1; - rhs_info.h0 = mult_transpose1xW_width; - rhs_info.interleave = false; - rhs_info.transpose = false; - - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = 4; - lhs_info.k0 = 4; - lhs_info.v0 = mult_interleave4x4_height; - lhs_info.interleave = true; - lhs_info.transpose = true; - - GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias()); - - // Configure interleave kernel - _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d); - - // Configure transpose kernel - _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); - - // Configure and tune matrix multiply kernel - _mm_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info()); - - CLScheduler::get().tune_kernel_static(*_mm_kernel); - - // Request memory for LHS and RHS reshape matrix - _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size()); - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); -} - -void ClGemm::configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const GPUTarget gpu_target = CLScheduler::get().target(); - bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - // Set the target for the kernels - _reshape_lhs_kernel->set_target(gpu_target); - _mm_kernel->set_target(gpu_target); - - GEMMLHSMatrixInfo lhs_info{}; - GEMMRHSMatrixInfo rhs_info{}; - - // Pick up the GEMM configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, - c, output, gemm_info.reinterpret_input_as_3d()); - - _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d()); - _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); - - // Configure and tune matrix multiply kernel - _mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - - // Request memory for LHS and RHS reshape matrix - _aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size()); - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); -} - -void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, - const GEMMInfo &gemm_info) -{ - DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const GPUTarget gpu_target = CLScheduler::get().target(); - bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - // Set the target for the kernels - _mm_kernel->set_target(gpu_target); - - GEMMLHSMatrixInfo lhs_info{}; - GEMMRHSMatrixInfo rhs_info{}; - - // Pick up the GEMM configuration - std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output); - - // Transpose matrix - _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info); - - // Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true) - // During the prepare stage we check the padding requirement for the lhs and dst tensors. If they do not have - // pad y, we dispatch CLGEMMMatrixMultiplyReshapedOnlyRHSKernel with has_pad_y = false - - // Configure matrix multiply kernel with no y padding support - kernel_info.has_pad_y = false; - _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - - // Configure matrix multiply kernel with y padding support - kernel_info.has_pad_y = true; - _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info); - - // Request memory for RHS reshape matrix - _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size()); -} - -Status ClGemm::validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d, gemm_info.broadcast_bias()); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(a, b, c, output, alpha, beta, - false, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info())); - - return Status{}; -} - -Status ClGemm::validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - const unsigned int m = gemm_info.reinterpret_input_as_3d() ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - int mult_transpose1xW_width = 1; - int mult_interleave4x4_height = 1; - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - - if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST) - { - mult_transpose1xW_width = 4; - mult_interleave4x4_height = 2; - } - - GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = 16 / b->element_size(); - rhs_info.k0 = 1; - rhs_info.h0 = mult_transpose1xW_width; - rhs_info.interleave = false; - rhs_info.transpose = false; - - GEMMLHSMatrixInfo lhs_info; - lhs_info.m0 = 4; - lhs_info.k0 = 4; - lhs_info.v0 = mult_interleave4x4_height; - lhs_info.interleave = true; - lhs_info.transpose = true; - - const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height, depth_output_gemm3d, false, gemm_info.broadcast_bias()); - - // Validate interleave kernel - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d()))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d())); - - // Validate transpose kernel - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, - true, reshape_info, gpu_target, gemm_info.fp_mixed_precision(), gemm_info.activation_info())); - - return Status{}; -} - -Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - TensorInfo tmp_a_info{}; - TensorInfo tmp_b_info{}; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = false; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - GEMMLHSMatrixInfo lhs_info; - GEMMRHSMatrixInfo rhs_info; - - // Pick up the GEMM configuration - // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails - const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }); - lhs_info = gemm_config.lhs_info; - rhs_info = gemm_config.rhs_info; - - auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d()))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d())); - - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)); - - // Validate matrix multiply - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); - - return Status{}; -} - -Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_UNUSED(alpha); - ARM_COMPUTE_UNUSED(output); - - TensorInfo tmp_b_info{}; - - // Get the GPU target - const GPUTarget gpu_target = CLScheduler::get().target(); - const DataType data_type = a->data_type(); - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); - const bool broadcast_bias = gemm_info.broadcast_bias(); - - GEMMKernelInfo kernel_info; - kernel_info.m = m; - kernel_info.n = n; - kernel_info.k = k; - kernel_info.depth_output_gemm3d = depth_output_gemm3d; - kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; - kernel_info.broadcast_bias = broadcast_bias; - kernel_info.activation_info = gemm_info.activation_info(); - - GEMMLHSMatrixInfo lhs_info; - GEMMRHSMatrixInfo rhs_info; - - // Pick up the GEMM configuration - // NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails - const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }); - lhs_info = gemm_config.lhs_info; - rhs_info = gemm_config.rhs_info; - - auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info))); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)); - - // Validate matrix multiply - kernel_info.has_pad_y = false; - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); - - kernel_info.has_pad_y = true; - ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info)); - - return Status{}; -} - -void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info)); - - // Check if we need to reshape the matrix B only on the first run - _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); - - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - - // Select GEMMType - _gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run, - gemm_info.constant_weights()); - - const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr); - - ITensorInfo *c_to_use = fuse_add_c ? c : nullptr; - - switch(_gemm_kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - { - configure_native_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - case CLGEMMKernelType::RESHAPED_V1: - { - configure_reshaped_v1(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - case CLGEMMKernelType::RESHAPED: - { - configure_reshaped_v2(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - configure_reshaped_only_rhs(compile_context, a, b, c_to_use, output, alpha, beta, gemm_info); - break; - } - default: - { - ARM_COMPUTE_ERROR("GEMMType not supported"); - } - } -} - -Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info) -{ - // Get the GPU target - bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); - const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); - const unsigned int n = b->dimension(0); - const unsigned int k = a->dimension(0); - const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2); - - // Select GEMMType - CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery - { - CLScheduler::get().target(), a->data_type(), m, n, k, batch_size, - }, - gemm_info.reshape_b_only_on_first_run(), gemm_info.constant_weights()); - - const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr); - - const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr; - - switch(gemm_kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_native_v1(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - case CLGEMMKernelType::RESHAPED_V1: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_v1(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - case CLGEMMKernelType::RESHAPED: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs(a, b, c_to_use, output, alpha, beta, gemm_info)); - break; - } - default: - { - ARM_COMPUTE_RETURN_ERROR_MSG("GEMMType not supported"); - } - } - - return Status{}; -} - -void ClGemm::run(ITensorPack &tensors) -{ - const ITensor *lhs = tensors.get_const_tensor(ACL_SRC_0); - const ITensor *rhs = tensors.get_const_tensor(ACL_SRC_1); - const ITensor *src2 = tensors.get_const_tensor(ACL_SRC_2); - ITensor *dst = tensors.get_tensor(ACL_DST); - - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, dst); - - CLAuxTensorHandler lhs_reshaped(offset_int_vec(LhsReshape), _tmp_a, tensors, true); - CLAuxTensorHandler rhs_reshaped(offset_int_vec(RhsReshape), _tmp_b, tensors, true); - - // Prepare the consts if needed - prepare(tensors); - - // Run matrix multiply kernel - switch(_gemm_kernel_type) - { - case CLGEMMKernelType::NATIVE_V1: - { - CLScheduler::get().enqueue_op(*_mm_kernel, tensors, true); - break; - } - case CLGEMMKernelType::RESHAPED_V1: - case CLGEMMKernelType::RESHAPED: - { - // Run interleave kernel - ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false); - - if(!_reshape_b_only_on_first_run) - { - // Run transpose kernel - ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false); - } - - ITensorPack gemm_reshaped_pack{ { ACL_SRC_0, lhs_reshaped.get() }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } }; - if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED) - { - CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true); - } - else - { - CLScheduler::get().enqueue_op(*_mm_kernel, gemm_reshaped_pack, true); - } - break; - } - case CLGEMMKernelType::RESHAPED_ONLY_RHS: - { - if(!_reshape_b_only_on_first_run) - { - // Run transpose kernel - ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false); - } - // In case of RESHAPED_ONLY_RHS, we need to check the padding requirement - // Check if the lhs or dst tensors have padding - const unsigned int cross_plane_pad_lhs = lhs->info()->padding().top + lhs->info()->padding().bottom; - const unsigned int cross_plane_pad_dst = dst->info()->padding().top + dst->info()->padding().bottom; - bool has_pad_y = (cross_plane_pad_lhs != 0) || (cross_plane_pad_dst != 0); - - ITensorPack gemm_reshaped_onlyrhs_pack{ { ACL_SRC_0, lhs }, { ACL_SRC_1, rhs_reshaped.get() }, { ACL_SRC_2, src2 }, { ACL_DST, dst } }; - if(has_pad_y) - { - CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_fallback_kernel, gemm_reshaped_onlyrhs_pack, true); - } - else - { - CLScheduler::get().enqueue_op(*_mm_reshaped_only_rhs_kernel, gemm_reshaped_onlyrhs_pack, true); - } - break; - } - default: - { - ARM_COMPUTE_ERROR("GEMMType not supported"); - } - } -} - -void ClGemm::prepare(ITensorPack &constants) -{ - const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1); - ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape))); - - // If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed - if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux) - { - CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux); - ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr); - - ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } }; - CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true); - } -} - -experimental::MemoryRequirements ClGemm::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClGemm.h b/src/runtime/gpu/cl/operators/ClGemm.h deleted file mode 100644 index bd9ca17edf..0000000000 --- a/src/runtime/gpu/cl/operators/ClGemm.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2016-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_GEMM_H -#define ARM_COMPUTE_CL_GEMM_H - -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/CLTypes.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/IClKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" -#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels: - * - * -# @ref kernels::ClGemmReshapeLhsMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model) - * -# @ref kernels::ClGemmReshapeRhsMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method()) - * -# @ref kernels::ClGemmMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method()) - * -# @ref kernels::ClGemmMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method()) - * -# @ref kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method()) - */ -class ClGemm : public IClOperator -{ -public: - /** Constructor */ - ClGemm(); - /** Initialise the kernel's inputs and output - * - * Valid data layouts: - * - All - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:------------|:-----------|:---------|:--------------| - * |F32 |F32 |F32 |F32 | - * |F16 |F16 |F16 |F16 | - * - * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. - * - * @note All tensors must have the same data type. - * - * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix - * - * @param[in] compile_context The compile context to be used. - * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F16/F32 - * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. - * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. - * @param[out] output Output tensor. Data type supported: same as @p a - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of matrix C - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping - * in case matrix A and matrix B have been already transformed. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClGemm::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &constants) override; - experimental::MemoryRequirements workspace() const override; - -private: - void configure_native_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_v1(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_v2(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - void configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - - static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info); - -private: - enum AuxTensorIdx - { - LhsReshape = 0, - RhsReshape, - Count - }; - -private: - std::unique_ptr<kernels::ClGemmMatrixMultiplyKernel> _mm_kernel; - std::unique_ptr<kernels::ClGemmReshapeLhsMatrixKernel> _reshape_lhs_kernel; - std::unique_ptr<kernels::ClGemmReshapeRhsMatrixKernel> _reshape_rhs_kernel; - std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedKernel> _mm_reshaped_kernel; - std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_kernel; - std::unique_ptr<kernels::ClGemmMatrixMultiplyReshapedOnlyRhsKernel> _mm_reshaped_only_rhs_fallback_kernel; - TensorInfo _tmp_a; - TensorInfo _tmp_b; - bool _reshape_b_only_on_first_run; - CLGEMMKernelType _gemm_kernel_type; - - experimental::MemoryRequirements _aux_mem{}; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMM_H */ diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp b/src/runtime/gpu/cl/operators/ClLogicalNot.cpp deleted file mode 100644 index 400efe450d..0000000000 --- a/src/runtime/gpu/cl/operators/ClLogicalNot.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClLogicalNot.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClLogicalNot::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>(); - k->configure(compile_context, src, dst, ElementWiseUnary::LOGICAL_NOT); - _kernel = std::move(k); -} - -Status ClLogicalNot::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClElementWiseUnaryKernel::validate(src, dst, ElementWiseUnary::LOGICAL_NOT); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClLogicalNot.h b/src/runtime/gpu/cl/operators/ClLogicalNot.h deleted file mode 100644 index 25ddf564b5..0000000000 --- a/src/runtime/gpu/cl/operators/ClLogicalNot.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_LOGICAL_NOT_H -#define ARM_COMPUTE_CL_LOGICAL_NOT_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClElementWiseUnaryKernel for NOT operation */ -class ClLogicalNot : public IClOperator -{ -public: - /** Constructor */ - ClLogicalNot() = default; - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: U8. - * @param[out] dst Destination tensor info. Data types supported: same as @p src. - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * @param[in] src Soure tensor info. Data types supported: U8. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_LOGICAL_NOT_H */ diff --git a/src/runtime/gpu/cl/operators/ClMul.cpp b/src/runtime/gpu/cl/operators/ClMul.cpp deleted file mode 100644 index d1e2bc806f..0000000000 --- a/src/runtime/gpu/cl/operators/ClMul.cpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClMul.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClMulKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClMulKernel>(); - k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info); - _kernel = std::move(k); -} - -Status ClMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) -{ - return kernels::ClMulKernel::validate(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info); -} - -void ClComplexMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClComplexMulKernel>(); - k->configure(compile_context, src1, src2, dst, act_info); - _kernel = std::move(k); -} - -Status ClComplexMul::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) -{ - return kernels::ClComplexMulKernel::validate(src1, src2, dst, act_info); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClMul.h b/src/runtime/gpu/cl/operators/ClMul.h deleted file mode 100644 index 4a662b3276..0000000000 --- a/src/runtime/gpu/cl/operators/ClMul.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_MUL_H -#define ARM_COMPUTE_CL_MUL_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref opencl::kernels::ClMulKernel */ -class ClMul : public IClOperator -{ -public: - /** Default Constructor */ - ClMul() = default; - /** Initialise the kernel's sources, dst and convertion policy. - * - * Valid configurations (src1,src2) -> Output : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,U8) -> S16 - * - (S16,S16) -> S16 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - (QSYMM16,QSYMM16) -> S32 - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 An src tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst The dst tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. - * @param[in] scale Scale to apply after multiplication. - * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClMul::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; - -/** Basic function to run @ref opencl::kernels::ClComplexMulKernel */ -class ClComplexMul : public IClOperator -{ -public: - /** Default Constructor */ - ClComplexMul() = default; - /** Initialise the kernel's sources, dst. - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 An src tensor info. Data types supported: F16/F32. Number of channels supported: 2. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 An src tensor info. Data types supported: same as @p src1. Number of channels supported: same as @p src1. - * The src tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst The dst tensor info, Data types supported: same as @p src1. Number of channels supported: same as @p src1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClComplexMul::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_MUL_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPRelu.cpp b/src/runtime/gpu/cl/operators/ClPRelu.cpp deleted file mode 100644 index d1ce14cc87..0000000000 --- a/src/runtime/gpu/cl/operators/ClPRelu.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClPRelu.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -using KernelType = kernels::ClArithmeticKernel; -void ClPRelu::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output) -{ - auto k = std::make_unique<KernelType>(); - k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output)); - _kernel = std::move(k); -} - -Status ClPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output) -{ - return KernelType::validate(ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output)); -} - -void ClPRelu::run(ITensorPack &tensors) -{ - // Output tensor can be given as nullptr for in-place computation. - // In this case, get the input tensor and use it as the output tensor. - if(tensors.get_tensor(TensorType::ACL_DST) == nullptr) - { - auto src_tensor = const_cast<ITensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0)); - ARM_COMPUTE_ERROR_ON_MSG(src_tensor == nullptr, "invalid source tensor is given for in-place computation"); - tensors.add_tensor(TensorType::ACL_DST, src_tensor); - } - IClOperator::run(tensors); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPRelu.h b/src/runtime/gpu/cl/operators/ClPRelu.h deleted file mode 100644 index 70202aeb81..0000000000 --- a/src/runtime/gpu/cl/operators/ClPRelu.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_PRELU_H -#define ARM_COMPUTE_CL_PRELU_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic operator to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU - * - * @note The operator implements an activation layer with the PRELU activation function. - */ -class ClPRelu : public IClOperator -{ -public: - /** Default constructor */ - ClPRelu() = default; - /** Set the input and output tensor. - * - * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU - * - * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] alpha PRelu layer parameters. Data types supported: same of @p input. - * @param[in] output Destination tensor info. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_PRELU_H */ diff --git a/src/runtime/gpu/cl/operators/ClPermute.cpp b/src/runtime/gpu/cl/operators/ClPermute.cpp deleted file mode 100644 index 719bb6dac6..0000000000 --- a/src/runtime/gpu/cl/operators/ClPermute.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClPermute.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClPermuteKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClPermute::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm) -{ - auto k = std::make_unique<kernels::ClPermuteKernel>(); - k->configure(compile_context, src, dst, perm); - _kernel = std::move(k); -} - -Status ClPermute::validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm) -{ - return kernels::ClPermuteKernel::validate(src, dst, perm); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPermute.h b/src/runtime/gpu/cl/operators/ClPermute.h deleted file mode 100644 index 20e7a32428..0000000000 --- a/src/runtime/gpu/cl/operators/ClPermute.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_PERMUTE_H -#define ARM_COMPUTE_CL_PERMUTE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClPermuteKernel */ -class ClPermute : public IClOperator -{ -public: - /** Constructor */ - ClPermute() = default; - /** Initialise the kernel's inputs and outputs and permute vector - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The src tensor info. Data types supported: All. - * @param[in] dst The dst tensor info. Data types supported: Same as @p src - * @param[in] perm Permutation vector - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm); - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClPermuteKernel. - * - * @note Arbitrary permutation vectors are supported with rank not greater than 4 - * - * @param[in] src First tensor src info. Data types supported: All. - * @param[in] dst Output tensor info. Data types supported: same as @p src. - * @param[in] perm Permutation vector - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PermutationVector &perm); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_PERMUTE_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPool2d.cpp b/src/runtime/gpu/cl/operators/ClPool2d.cpp deleted file mode 100644 index 40c2b0a8ba..0000000000 --- a/src/runtime/gpu/cl/operators/ClPool2d.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClPool2d.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClPool2dKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - // Configure pooling kernel - auto k = std::make_unique<kernels::ClPool2dKernel>(); - k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, dst, info, indices); - _pooling = std::move(k); - - const DataType data_type = src->data_type(); - - // Configure border depending on operation required (quantize border in case of asymmetric data_type) - BorderMode border_mode{}; - PixelValue pixel_value(0.f); - if(is_data_type_quantized_asymmetric(data_type) && !info.exclude_padding) - { - pixel_value = PixelValue(0, data_type, src->quantization_info()); - } - - // Data layout - const auto data_layout = info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : info.data_layout; - - switch(data_layout) - { - case DataLayout::NCHW: - border_mode = (PoolingType::MAX == info.pool_type) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - break; - case DataLayout::NHWC: - border_mode = BorderMode::CONSTANT; - if(PoolingType::MAX == info.pool_type) - { - if(is_data_type_quantized(data_type)) - { - std::tie(pixel_value, std::ignore) = get_min_max(data_type); - } - else - { - pixel_value = PixelValue(std::numeric_limits<float>::lowest()); - } - } - break; - default: - ARM_COMPUTE_ERROR("Data layout not supported"); - } - auto b = std::make_unique<CLFillBorderKernel>(); - b->configure(compile_context, src, _pooling->border_size(), border_mode, pixel_value); - _border_handler = std::move(b); - - // Tune kernels - CLScheduler::get().tune_kernel_static(*_pooling); -} - -Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices) -{ - return kernels::ClPool2dKernel::validate(src, dst, info, indices); -} - -void ClPool2d::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - - CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false); - CLScheduler::get().enqueue_op(*_pooling.get(), tensors, false); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClPool2d.h b/src/runtime/gpu/cl/operators/ClPool2d.h deleted file mode 100644 index 8ac386a64b..0000000000 --- a/src/runtime/gpu/cl/operators/ClPool2d.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_POOL2D_H -#define ARM_COMPUTE_CL_POOL2D_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -#include <memory> - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: - * - * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref opencl::ClPool2d - */ -class ClPool2d : public IClOperator -{ -public: - /** Constructor */ - ClPool2d() = default; - /** Configure operator for a given list of arguments - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] dst Destination tensor info. Data type supported: same as @p src - * @param[in] info Pooling layer parameters. - * @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClPool2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices = nullptr); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - -private: - std::unique_ptr<ICLKernel> _pooling{ nullptr }; - std::unique_ptr<ICLKernel> _border_handler{ nullptr }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_POOL2D_H */ diff --git a/src/runtime/gpu/cl/operators/ClQuantize.cpp b/src/runtime/gpu/cl/operators/ClQuantize.cpp deleted file mode 100644 index 92bbb62ba5..0000000000 --- a/src/runtime/gpu/cl/operators/ClQuantize.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClQuantize.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClQuantizeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClQuantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClQuantizeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClQuantize::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClQuantizeKernel::validate(src, dst); -} - -void ClQuantize::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - CLScheduler::get().enqueue_op(*_kernel.get(), tensors); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClQuantize.h b/src/runtime/gpu/cl/operators/ClQuantize.h deleted file mode 100644 index 0b6d2c8cbe..0000000000 --- a/src/runtime/gpu/cl/operators/ClQuantize.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_QUANTIZE_H -#define ARM_COMPUTE_CL_QUANTIZE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClQuantizeKernel that dequantizes an input tensor */ -class ClQuantize : public IClOperator -{ -public: - /** Constructor */ - ClQuantize() = default; - /** Set the input and output tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32. - * @param[out] dst Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16. - * - * @note Output auto initialization is not supported by this function - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to @ref ClQuantize::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); - - // Inherited method overridden - void run(ITensorPack &tensors) override; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_QUANTIZE_H */ diff --git a/src/runtime/gpu/cl/operators/ClReshape.cpp b/src/runtime/gpu/cl/operators/ClReshape.cpp deleted file mode 100644 index d3fa9f10ab..0000000000 --- a/src/runtime/gpu/cl/operators/ClReshape.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClReshape.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClReshapeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClReshape::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClReshapeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClReshape::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClReshapeKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClReshape.h b/src/runtime/gpu/cl/operators/ClReshape.h deleted file mode 100644 index 8cccc5776c..0000000000 --- a/src/runtime/gpu/cl/operators/ClReshape.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_RESHAPE_H -#define ARM_COMPUTE_CL_RESHAPE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClReshapeKernel */ -class ClReshape : public IClOperator -{ -public: - /** Constructor */ - ClReshape() = default; - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor info. Data type supported: All - * @param[out] output Output info. Data type supported: Same as @p input - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output); - - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClReshapeKernel - * - * @param[in] input Input tensor info. Data type supported: All - * @param[in] output Output tensor info. Data type supported: Same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_RESHAPE_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClScale.cpp b/src/runtime/gpu/cl/operators/ClScale.cpp deleted file mode 100644 index 4730c8a16e..0000000000 --- a/src/runtime/gpu/cl/operators/ClScale.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClScale.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClScaleKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClScale::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(src); - // Configure Scale kernel - auto k = std::make_unique<kernels::ClScaleKernel>(); - k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, dst, info); - _kernel = std::move(k); - if(!_kernel->border_size().empty()) - { - auto b = std::make_unique<CLFillBorderKernel>(); - b->configure(compile_context, src, _kernel->border_size(), info.border_mode, info.constant_border_value); - _border_handler = std::move(b); - } - // Tune kernel - CLScheduler::get().tune_kernel_static(*_kernel); -} - -Status ClScale::validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info) -{ - return kernels::ClScaleKernel::validate(src, dst, info); -} - -void ClScale::run(ITensorPack &tensors) -{ - ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); - if(!_kernel->border_size().empty()) - { - CLScheduler::get().enqueue_op(*_border_handler.get(), tensors, false); - } - CLScheduler::get().enqueue_op(*_kernel.get(), tensors); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClScale.h b/src/runtime/gpu/cl/operators/ClScale.h deleted file mode 100644 index 6eccb59be8..0000000000 --- a/src/runtime/gpu/cl/operators/ClScale.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_SCALE_H -#define ARM_COMPUTE_CL_SCALE_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to simulate a scale layer. This function calls the following OpenCL kernels: - * - * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref kernels::ClScaleKernel - */ -class ClScale : public IClOperator -{ -public: - /** Constructor */ - ClScale() = default; - /** Initialize the function's source, destination, interpolation type and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] src Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED) - * @param[out] dst Destination tensor info. Data types supported: Same as @p src - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo descriptor to be used to configure - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref ClScale - * - * @param[in] src Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. - * @param[in] dst Output tensor info. Data type supported: Same as @p src - * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] info @ref ScaleKernelInfo descriptor to be used to validate - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ScaleKernelInfo &info); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - -protected: - std::unique_ptr<ICLKernel> _border_handler{ nullptr }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSCALE_H */ diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.cpp b/src/runtime/gpu/cl/operators/ClSoftmax.cpp deleted file mode 100644 index 975bb0b932..0000000000 --- a/src/runtime/gpu/cl/operators/ClSoftmax.cpp +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClSoftmax.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/gpu/cl/kernels/ClSoftmaxKernel.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/core/helpers/SoftmaxHelpers.h" -#include "src/runtime/gpu/cl/operators/ClPermute.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" -#include "support/Cast.h" - -using namespace arm_compute::experimental; - -namespace arm_compute -{ -namespace opencl -{ -ClSoftmax::ClSoftmax() - : _permute_input(std::make_unique<ClPermute>()), - _permute_output(std::make_unique<ClPermute>()), - _max_shift_exp_sum_kernel(std::make_unique<kernels::ClLogits1DMaxShiftExpSumKernel>()), - _norm_kernel(std::make_unique<kernels::ClLogits1DNormKernel>()), - _max_info(), - _sum_info(), - _tmp_info(), - _permuted_src_info(), - _permuted_dst_info(), - _aux_mem(InternalTensorIdx::COUNT) -{ -} - -void ClSoftmax::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info)); - - const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions()))); - - _needs_permute = actual_axis != 0; - - const ITensorInfo &tmp_input_info = _needs_permute ? _permuted_src_info : src; - ITensorInfo &tmp_output_info = _needs_permute ? _permuted_dst_info : dst; - - if(_needs_permute) - { - const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); - _permute_input->configure(compile_context, &src, &_permuted_src_info, perm_info); - } - - DataType tmp_data_type = is_data_type_quantized_asymmetric(tmp_input_info.data_type()) ? DataType::S32 : tmp_input_info.data_type(); - _tmp_info = tmp_input_info.clone()->set_data_type(tmp_data_type); - - TensorShape max_sum_shape = tmp_input_info.tensor_shape(); - _max_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape); - _sum_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type); - - // Set GPU target to kernels - _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target()); - - _max_shift_exp_sum_kernel->configure(compile_context, tmp_input_info, _max_info, _tmp_info, _sum_info, info); - _norm_kernel->configure(compile_context, _tmp_info, _sum_info, tmp_output_info, info); - - if(_needs_permute) - { - const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); - _permute_output->configure(compile_context, &_permuted_dst_info, &dst, perm_info); - } - - _aux_mem[InternalTensorIdx::SUM] = MemoryInfo(offset_int_vec(InternalTensorIdx::SUM), MemoryLifetime::Temporary, _sum_info.total_size()); - _aux_mem[InternalTensorIdx::TMP] = MemoryInfo(offset_int_vec(InternalTensorIdx::TMP), MemoryLifetime::Temporary, _tmp_info.total_size()); - _aux_mem[InternalTensorIdx::MAX] = MemoryInfo(offset_int_vec(InternalTensorIdx::MAX), MemoryLifetime::Temporary, _max_info.total_size()); - - _aux_mem[InternalTensorIdx::PERMUTED_SRC] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), MemoryLifetime::Temporary, _permuted_src_info.total_size()); - _aux_mem[InternalTensorIdx::PERMUTED_DST] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_DST), MemoryLifetime::Temporary, _permuted_dst_info.total_size()); -} - -Status ClSoftmax::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info) -{ - ARM_COMPUTE_RETURN_ERROR_ON_MSG(src.num_dimensions() > 4, "Only up to 4 dimensions are supported"); - ARM_COMPUTE_UNUSED(info.beta); - ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) || static_cast<int32_t>(src.num_dimensions()) <= info.axis); - - const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions()))); - const bool needs_permute = actual_axis != 0; - if(needs_permute) - { - const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); - const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(src, permutation_vector); - TensorInfo input_permuted(src.clone()->set_tensor_shape(permuted_shape)); - ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&src, &input_permuted, permutation_vector)); - TensorInfo output_permuted(dst.clone()->set_tensor_shape(permuted_shape)); - ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&output_permuted, &dst, permutation_vector)); - } - - // Create intermediate tensor info - DataType tmp_data_type = is_data_type_quantized_asymmetric(src.data_type()) ? DataType::S32 : src.data_type(); - TensorInfo tensor_info_tmp(src.clone()->set_data_type(tmp_data_type).set_is_resizable(true)); - - TensorShape max_sum_shape = src.tensor_shape(); - max_sum_shape.set(0, 1); - TensorInfo tensor_info_max(src.clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true)); - TensorInfo tensor_info_sum(src.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true)); - - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DMaxShiftExpSumKernel::validate(src, tensor_info_max, tensor_info_tmp, tensor_info_sum)); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DNormKernel::validate(tensor_info_tmp, tensor_info_sum, dst, info)); - - return Status{}; -} - -void ClSoftmax::run(ITensorPack &tensors) -{ - auto src = tensors.get_const_tensor(TensorType::ACL_SRC); - auto dst = tensors.get_tensor(TensorType::ACL_DST); - - CLAuxTensorHandler sum(offset_int_vec(InternalTensorIdx::SUM), _sum_info, tensors, false); - CLAuxTensorHandler tmp(offset_int_vec(InternalTensorIdx::TMP), _tmp_info, tensors, false); - CLAuxTensorHandler max(offset_int_vec(InternalTensorIdx::MAX), _max_info, tensors, false); - - CLAuxTensorHandler permuted_src(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), _permuted_src_info, tensors, false); - CLAuxTensorHandler permuted_dst(offset_int_vec(InternalTensorIdx::PERMUTED_DST), _permuted_dst_info, tensors, false); - - if(_needs_permute) - { - ITensorPack pack; - pack.add_const_tensor(TensorType::ACL_SRC, src); - pack.add_tensor(TensorType::ACL_DST, permuted_src.get()); - _permute_input.get()->run(pack); - } - - ITensorPack sum_pack; - ITensorPack norm_pack; - if(_needs_permute) - { - sum_pack.add_const_tensor(TensorType::ACL_SRC, permuted_src.get()); - norm_pack.add_tensor(TensorType::ACL_DST, permuted_dst.get()); - } - else - { - sum_pack.add_const_tensor(TensorType::ACL_SRC, src); - norm_pack.add_tensor(TensorType::ACL_DST, dst); - } - sum_pack.add_tensor(TensorType::ACL_DST, tmp.get()); - sum_pack.add_tensor(TensorType::ACL_INT_0, max.get()); - sum_pack.add_tensor(TensorType::ACL_INT_1, sum.get()); - - norm_pack.add_const_tensor(TensorType::ACL_SRC, tmp.get()); - norm_pack.add_tensor(TensorType::ACL_INT_0, sum.get()); - - CLScheduler::get().enqueue_op(*_max_shift_exp_sum_kernel.get(), sum_pack, false); - CLScheduler::get().enqueue_op(*_norm_kernel.get(), norm_pack, false); - - if(_needs_permute) - { - ITensorPack pack; - pack.add_const_tensor(TensorType::ACL_SRC, permuted_dst.get()); - pack.add_tensor(TensorType::ACL_DST, dst); - _permute_output.get()->run(pack); - } -} - -experimental::MemoryRequirements ClSoftmax::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.h b/src/runtime/gpu/cl/operators/ClSoftmax.h deleted file mode 100644 index f19a51fc5e..0000000000 --- a/src/runtime/gpu/cl/operators/ClSoftmax.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_SOFTMAX_H -#define ARM_COMPUTE_CL_SOFTMAX_H - -#include "arm_compute/runtime/CL/CLTensor.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -struct SoftmaxKernelInfo; - -namespace opencl -{ -class ClPermute; -namespace kernels -{ -class ClLogits1DMaxShiftExpSumKernel; -class ClLogits1DNormKernel; -} // namespace kernels -class ClSoftmax : public IClOperator -{ -public: - /** Constructor */ - ClSoftmax(); - /** Configure the operator - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax - * @param[out] dst Destination tensor info. Data types supported: same as @p src - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - * - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info); - /** Static function to check if the given info will lead to a valid configuration - * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax - * @param[out] dst Destination tensor info. Data types supported: same as @p src - * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. - * - */ - static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info); - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - experimental::MemoryRequirements workspace() const override; - -private: - enum InternalTensorIdx - { - MAX = 0, - SUM, - TMP, - PERMUTED_SRC, - PERMUTED_DST, - COUNT - }; - - std::unique_ptr<ClPermute> _permute_input; - std::unique_ptr<ClPermute> _permute_output; - std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel; - std::unique_ptr<kernels::ClLogits1DNormKernel> _norm_kernel; - bool _needs_permute{ false }; - - TensorInfo _max_info; - TensorInfo _sum_info; - TensorInfo _tmp_info; - TensorInfo _permuted_src_info; - TensorInfo _permuted_dst_info; - - experimental::MemoryRequirements _aux_mem{}; -}; - -} // opencl -} // arm_compute -#endif /* ARM_COMPUTE_CL_SOFTMAX_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClSub.cpp b/src/runtime/gpu/cl/operators/ClSub.cpp deleted file mode 100644 index 429f23a837..0000000000 --- a/src/runtime/gpu/cl/operators/ClSub.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClSub.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClSub::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>(); - k->configure(compile_context, ArithmeticOperation::SUB, src1, src2, dst, policy, act_info); - _kernel = std::move(k); -} - -Status ClSub::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, - ConvertPolicy policy, const ActivationLayerInfo &act_info) -{ - return kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::SUB, src1, src2, dst, policy, act_info); -} -} // namespace opencl -} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClSub.h b/src/runtime/gpu/cl/operators/ClSub.h deleted file mode 100644 index bcad84d583..0000000000 --- a/src/runtime/gpu/cl/operators/ClSub.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_SUB_H -#define ARM_COMPUTE_CL_SUB_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run arithmetic subtraction - * - * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @note The function performs an arithmetic subtraction between two tensors. - */ -class ClSub : public IClOperator -{ -public: - /** Default Constructor */ - ClSub() = default; - /** Configure function for a given list of arguments. - * - * Valid configurations (src1,src2) -> dst : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] compile_context The compile context to be used. - * @param[in, out] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * The source tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref ClSub - * - * Valid configurations (src1,src2) -> dst : - * - * - (U8,U8) -> U8 - * - (U8,U8) -> S16 - * - (S16,U8) -> S16 - * - (U8,S16) -> S16 - * - (S16,S16) -> S16 - * - (S32,S32) -> S32 - * - (F16,F16) -> F16 - * - (F32,F32) -> F32 - * - (QASYMM8,QASYMM8) -> QASYMM8 - * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED - * - (QSYMM16,QSYMM16) -> QSYMM16 - * - * @param[in] src1 First source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] src2 Second source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32. - * @param[in] policy Policy to use to handle overflow. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, ConvertPolicy policy, - const ActivationLayerInfo &act_info = ActivationLayerInfo()); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_SUB_H */ diff --git a/src/runtime/gpu/cl/operators/ClTranspose.cpp b/src/runtime/gpu/cl/operators/ClTranspose.cpp deleted file mode 100644 index 48f44282e8..0000000000 --- a/src/runtime/gpu/cl/operators/ClTranspose.cpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClTranspose.h" - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClTransposeKernel.h" - -namespace arm_compute -{ -namespace opencl -{ -void ClTranspose::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst) -{ - auto k = std::make_unique<kernels::ClTransposeKernel>(); - k->configure(compile_context, src, dst); - _kernel = std::move(k); -} - -Status ClTranspose::validate(const ITensorInfo *src, const ITensorInfo *dst) -{ - return kernels::ClTransposeKernel::validate(src, dst); -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClTranspose.h b/src/runtime/gpu/cl/operators/ClTranspose.h deleted file mode 100644 index d898f677ca..0000000000 --- a/src/runtime/gpu/cl/operators/ClTranspose.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_TRANSPOSE_H -#define ARM_COMPUTE_CL_TRANSPOSE_H - -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" - -namespace arm_compute -{ -namespace opencl -{ -/** Basic function to run @ref kernels::ClTransposeKernel */ -class ClTranspose : public IClOperator -{ -public: - /** Constructor */ - ClTranspose() = default; - /** Initialise the kernel's inputs and outputs - * - * @param[in] compile_context The compile context to be used. - * @param[in] src The src tensor info. Data types supported: All. - * @param[in] dst The dst tensor info. Data types supported: Same as @p src - */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst); - /** Static function to check if given info will lead to a valid configuration of @ref kernels::ClTransposeKernel. - * - * @param[in] src First tensor src info. Data types supported: All. - * @param[in] dst Output tensor info. Data types supported: same as @p src. - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *dst); -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_TRANSPOSE_H */ diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp b/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp deleted file mode 100644 index c8db697778..0000000000 --- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/kernels/ClWinogradFilterTransformKernel.h" -#include "src/core/gpu/cl/kernels/ClWinogradInputTransformKernel.h" -#include "src/core/gpu/cl/kernels/ClWinogradOutputTransformKernel.h" -#include "src/core/helpers/MemoryHelpers.h" -#include "src/runtime/gpu/cl/utils/ClAuxTensorHandler.h" -#include "support/Cast.h" - -using namespace arm_compute::experimental; - -namespace arm_compute -{ -namespace opencl -{ -namespace -{ -Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataLayout data_layout) -{ - Size2D output_tile = Size2D{}; - - const unsigned int kernel_max_dim = std::max(kernel_dims.width, kernel_dims.height); - - // Check if the input spatial dimensions are smaller than 4 - const bool is_input_lt4_nchw = (input_dims.width <= 4 && input_dims.height <= 4) && (data_layout == DataLayout::NCHW); - - if(kernel_max_dim == 3U) - { - if(kernel_dims == Size2D(3U, 3U)) - { - output_tile = is_input_lt4_nchw ? Size2D(2U, 2U) : Size2D(4U, 4U); - } - else if(kernel_dims == Size2D(3U, 1U)) - { - output_tile = is_input_lt4_nchw ? Size2D(2U, 1U) : Size2D(4U, 1U); - } - else - { - output_tile = is_input_lt4_nchw ? Size2D(1U, 2U) : Size2D(1U, 4U); - } - } - else if(kernel_max_dim == 5U) - { - output_tile = Size2D(kernel_dims.width == 1 ? 1U : 4U, - kernel_dims.height == 1 ? 1U : 4U); - } - else if(kernel_max_dim == 7U) - { - output_tile = Size2D(kernel_dims.width == 1 ? 1U : 2U, - kernel_dims.height == 1 ? 1U : 2U); - } - - return output_tile; -} - -bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size) -{ - // Check if we want to configure a Winograd configuration which requires fast math - using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>; - - std::vector<WinogradConfiguration> fast_math_winograd = - { - WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5)), - WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7)) - }; - - auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height), - std::pair<int, int>(kernel_size.width, kernel_size.height)); - - return std::find(fast_math_winograd.begin(), fast_math_winograd.end(), p) != fast_math_winograd.end(); -} - -Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info, bool enable_fast_math) -{ - // Get indeces for the width and height - const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); - - // Input shape, kernel size and output tile - const Size2D input_dims = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]); - const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]); - const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_left() > (kernel_size.x() / 2u)) || (conv_info.pad_right() > (kernel_size.x() / 2u))), "Winograd only supports padding up to half kernel size"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(((conv_info.pad_top() > (kernel_size.y() / 2u)) || (conv_info.pad_bottom() > (kernel_size.y() / 2u))), "Winograd only supports padding up to half kernel size"); - - // Check if the Winograd configuration requires fast math - if(!enable_fast_math) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false. - ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true"); - } - - const WinogradInfo winograd_info = WinogradInfo(output_tile, - kernel_size, - input_dims, - conv_info, - src->data_layout()); - - // Validate input transform - const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*src, winograd_info); - const TensorInfo input0 = src->clone()->set_tensor_shape(input0_shape); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradInputTransformKernel::validate(src, &input0, winograd_info)); - - // Validate filter transform - const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info); - const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape); - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradFilterTransformKernel::validate(weights, &input1, winograd_info)); - - // Validate batched matrix multiply - TensorShape batched_mm_output_shape = input0.tensor_shape(); - batched_mm_output_shape[0] = input1.tensor_shape()[0]; - const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape); - ARM_COMPUTE_RETURN_ON_ERROR(ClGemm::validate(&input0, &input1, nullptr, &batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false, - GEMMLowpOutputStageInfo(), (src->data_type() == DataType::F16)))); - - // Configure output transform - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWinogradOutputTransformKernel::validate(&batched_mm_output, biases, dst, winograd_info, act_info)); - return Status{}; -} - -} // namespace - -ClWinogradConv2d::ClWinogradConv2d() - : _batched_mm(), - _input_transform(std::make_unique<kernels::ClWinogradInputTransformKernel>()), - _filter_transform(std::make_unique<kernels::ClWinogradFilterTransformKernel>()), - _output_transform(std::make_unique<kernels::ClWinogradOutputTransformKernel>()), - _border_handler(), - _input0(), - _input1(), - _batched_mm_output(), - _is_prepared(false), - _aux_mem() -{ -} - -ClWinogradConv2d::~ClWinogradConv2d() = default; - -void ClWinogradConv2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math)); - // Get indices for the width and height - const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); - - // Input shape, kernel size and output tile - const Size2D input_dims = Size2D(src->tensor_shape()[idx_width], src->tensor_shape()[idx_height]); - const Size2D kernel_size = Size2D(weights->tensor_shape()[idx_width], weights->tensor_shape()[idx_height]); - const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, src->data_layout()); - - // Check if the Winograd configuration requires fast math - if(!enable_fast_math) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F32); //disable winograd for fp16 if fast math is false. - ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size), "This Winograd configuration requires enable_fast_math=true"); - } - const WinogradInfo winograd_info = WinogradInfo(output_tile, - kernel_size, - input_dims, - conv_info, - src->data_layout()); - - _is_prepared = false; - - // Configure input transform - _input_transform->configure(compile_context, src, &_input0, winograd_info); - _border_handler.configure(compile_context, src, _input_transform->border_size(), BorderMode::CONSTANT, PixelValue()); - - // Configure filter transform - _filter_transform->configure(compile_context, weights, &_input1, winograd_info); - - // Configure batched matrix multiply - _batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, - false, false, - GEMMLowpOutputStageInfo(), - (src->data_type() == DataType::F16))); - - // Configure output transform - _output_transform->configure(compile_context, &_batched_mm_output, biases, dst, winograd_info, act_info); - - _aux_mem = _batched_mm.workspace(); - _aux_mem.push_back(MemoryInfo(offset_int_vec(2), MemoryLifetime::Temporary, _input0.total_size())); - _aux_mem.push_back(MemoryInfo(offset_int_vec(3), MemoryLifetime::Persistent, _input1.total_size())); - _aux_mem.push_back(MemoryInfo(offset_int_vec(4), MemoryLifetime::Temporary, _batched_mm_output.total_size())); -} - -Status ClWinogradConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info, bool enable_fast_math) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math)); - return Status{}; -} - -void ClWinogradConv2d::run(ITensorPack &tensors) -{ - prepare(tensors); - - // Run input transform - auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0)); - auto biases = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_2)); - auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST)); - - CLAuxTensorHandler input0(offset_int_vec(2), _input0, tensors, true); - CLAuxTensorHandler input1(offset_int_vec(3), _input1, tensors, true); - CLAuxTensorHandler batched_mm_output(offset_int_vec(4), _batched_mm_output, tensors, true); - - ITensorPack pack_it - { - { TensorType::ACL_SRC, src }, - { TensorType::ACL_DST, input0.get() }, - }; - CLScheduler::get().enqueue_op(_border_handler, pack_it); - CLScheduler::get().enqueue_op(*_input_transform, pack_it); - - // Run batched matrix multiplication - ITensorPack pack_mm - { - { TensorType::ACL_SRC_0, input0.get() }, - { TensorType::ACL_SRC_1, input1.get() }, - { TensorType::ACL_DST, batched_mm_output.get() }, - }; - _batched_mm.run(pack_mm); - - // Run output transform - ITensorPack pack_ot - { - { TensorType::ACL_SRC_0, batched_mm_output.get() }, - { TensorType::ACL_SRC_1, biases }, - { TensorType::ACL_DST, dst }, - }; - CLScheduler::get().enqueue_op(*_output_transform, pack_ot); -} - -void ClWinogradConv2d::prepare(ITensorPack &tensors) -{ - if(!_is_prepared) - { - auto weights = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_1)); - ICLTensor *in1_aux = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(3))); - - CLAuxTensorHandler input1(_input1, *in1_aux); - ITensorPack pack_ft - { - { TensorType::ACL_SRC, weights }, - { TensorType::ACL_DST, input1.get() }, - }; - // Run filter transform and mark original weights as unused - CLScheduler::get().enqueue_op(*_filter_transform, pack_ft, false); - weights->mark_as_unused(); - - tensors.add_tensor(ACL_SRC_1, input1.get()); - // Prepare GEMM and release reshaped weights if marked unused by ClGemm - _batched_mm.prepare(tensors); - - CLScheduler::get().queue().finish(); - _is_prepared = true; - } -} - -experimental::MemoryRequirements ClWinogradConv2d::workspace() const -{ - return _aux_mem; -} -} // namespace opencl -} // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h b/src/runtime/gpu/cl/operators/ClWinogradConv2d.h deleted file mode 100644 index 83b31f1c99..0000000000 --- a/src/runtime/gpu/cl/operators/ClWinogradConv2d.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2018-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_WINOGRADCONV2D_H -#define ARM_COMPUTE_CL_WINOGRADCONV2D_H - -#include "arm_compute/runtime/CL/CLTensor.h" -#include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/gpu/cl/ClCompileContext.h" -#include "src/runtime/gpu/cl/IClOperator.h" -#include "src/runtime/gpu/cl/operators/ClGemm.h" - -namespace arm_compute -{ -class CLCompileContext; -class ITensorInfo; -namespace opencl -{ -namespace kernels -{ -class ClWinogradInputTransformKernel; -class ClWinogradFilterTransformKernel; -class ClWinogradOutputTransformKernel; -} // kernels -/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels: - * - * -# @ref kernels::ClWinogradInputTransformKernel - * -# @ref kernels::ClWinogradFilterTransformKernel (only once) - * -# @ref ClGemm - * -# @ref kernels::ClWinogradOutputTransformKernel - * - */ -class ClWinogradConv2d : public IClOperator -{ -public: - /** Default constructor */ - ClWinogradConv2d(); - /** Default destructor */ - ~ClWinogradConv2d(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClWinogradConv2d(const ClWinogradConv2d &) = delete; - /** Default move constructor */ - ClWinogradConv2d(ClWinogradConv2d &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - ClWinogradConv2d &operator=(const ClWinogradConv2d &) = delete; - /** Default move assignment operator */ - ClWinogradConv2d &operator=(ClWinogradConv2d &&) = default; - /** Set the input and output tensors. - * - * Valid data layouts: - * - NHWC - * - NCHW - * - * Valid data type configurations: - * |src0 |src1 |src2 |dst | - * |:--------------|:--------------|:------|:--------------| - * |F16 |F16 |F16 |F16 | - * |F32 |F32 |F32 |F32 | - * - * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout - * @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true - * - * @param[in] compile_context The compile context to be used. - * @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src. - * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p src - * @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p src. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false - */ - void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); - /** Static function to check if given info will lead to a valid configuration - * - * Similar to ClWinogradConv2d::configure() - * - * @return a status - */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); - - // Inherited method overridden - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - experimental::MemoryRequirements workspace() const override; - -private: - ClGemm _batched_mm; - std::unique_ptr<kernels::ClWinogradInputTransformKernel> _input_transform; - std::unique_ptr<kernels::ClWinogradFilterTransformKernel> _filter_transform; - std::unique_ptr<kernels::ClWinogradOutputTransformKernel> _output_transform; - CLFillBorderKernel _border_handler; - TensorInfo _input0; - TensorInfo _input1; - TensorInfo _batched_mm_output; - bool _is_prepared; - experimental::MemoryRequirements _aux_mem{}; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_WINOGRADCONV2D_H */ diff --git a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h b/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h deleted file mode 100644 index 152e3c6c04..0000000000 --- a/src/runtime/gpu/cl/utils/ClAuxTensorHandler.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H -#define ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H - -#include "arm_compute/core/ITensorPack.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/runtime/CL/CLTensor.h" - -#include "support/Cast.h" - -namespace arm_compute -{ -namespace opencl -{ -/* Tensor handler to wrap and handle tensor allocations on workspace buffers */ -class CLAuxTensorHandler -{ -public: - CLAuxTensorHandler(int slot_id, TensorInfo &info, ITensorPack &pack, bool pack_inject = false) - : _tensor() - { - _tensor.allocator()->soft_init(info); - - ICLTensor *packed_tensor = utils::cast::polymorphic_downcast<ICLTensor *>(pack.get_tensor(slot_id)); - if((packed_tensor == nullptr) || (info.total_size() > packed_tensor->info()->total_size())) - { - _tensor.allocator()->allocate(); - if(pack_inject) - { - pack.add_tensor(slot_id, &_tensor); - _injected_tensor_pack = &pack; - _injected_slot_id = slot_id; - } - } - else - { - _tensor.allocator()->import_memory(packed_tensor->cl_buffer()); - } - } - - CLAuxTensorHandler(TensorInfo &info, ICLTensor &tensor) - : _tensor() - { - _tensor.allocator()->soft_init(info); - if(info.total_size() <= tensor.info()->total_size()) - { - _tensor.allocator()->import_memory(tensor.cl_buffer()); - } - } - - CLAuxTensorHandler(const CLAuxTensorHandler &) = delete; - CLAuxTensorHandler &operator=(const CLAuxTensorHandler) = delete; - - ~CLAuxTensorHandler() - { - if(_injected_tensor_pack) - { - _injected_tensor_pack->remove_tensor(_injected_slot_id); - } - } - - ICLTensor *get() - { - return &_tensor; - } - - ICLTensor *operator()() - { - return &_tensor; - } - -private: - CLTensor _tensor{}; - ITensorPack *_injected_tensor_pack{ nullptr }; - int _injected_slot_id{ TensorType::ACL_UNKNOWN }; -}; -} // namespace opencl -} // namespace arm_compute -#endif /* ARM_COMPUTE_CL_UTILS_CL_AUX_TENSOR_HANDLER_H */
\ No newline at end of file |